'
13 |
14 | hr_faded: '
'
15 | hr_shaded: '
'
--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
1 | [DEFAULT]
2 | host = github
3 | lib_name = dtype_diet
4 | user = noklam
5 | description = Attempt to shrink Pandas `dtypes` without losing data so you have more RAM (and maybe more speed)
6 | keywords = pandas, optimization
7 | author = noklam
8 | author_email = mediumnok@gmail.com
9 | copyright = Chan Nok Lam
10 | branch = master
11 | version = 0.0.3
12 | min_python = 3.7
13 | audience = Developers
14 | language = English
15 | custom_sidebar = False
16 | license = MIT
17 | status = 2
18 | requirements = pandas>=1.0.0 tabulate
19 | nbs_path = .
20 | doc_path = docs
21 | doc_host = https://noklam.github.io
22 | doc_baseurl = /dtype_diet/
23 | git_url = https://github.com/noklam/dtype_diet/tree/master/
24 | lib_path = dtype_diet
25 | title = dtype_diet
26 | tst_flags = slow
27 | recursive = True
28 | black_formatting = False
29 | readme_nb = index.ipynb
30 | allowed_metadata_keys =
31 | allowed_cell_metadata_keys =
32 | jupyter_hooks = True
33 | clean_ids = True
34 | clear_all = False
35 | put_version_in_init = True
36 |
37 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | services:
3 | fastai: &fastai
4 | restart: unless-stopped
5 | working_dir: /data
6 | image: fastai/codespaces
7 | logging:
8 | driver: json-file
9 | options:
10 | max-size: 50m
11 | stdin_open: true
12 | tty: true
13 | volumes:
14 | - .:/data/
15 |
16 | notebook:
17 | <<: *fastai
18 | command: bash -c "pip install -e . && jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port=8080 --NotebookApp.token='' --NotebookApp.password=''"
19 | ports:
20 | - "8080:8080"
21 |
22 | watcher:
23 | <<: *fastai
24 | command: watchmedo shell-command --command nbdev_build_docs --pattern *.ipynb --recursive --drop
25 | network_mode: host # for GitHub Codespaces https://github.com/features/codespaces/
26 |
27 | jekyll:
28 | <<: *fastai
29 | ports:
30 | - "4000:4000"
31 | command: >
32 | bash -c "cp -r docs_src docs
33 | && pip install .
34 | && nbdev_build_docs && cd docs
35 | && bundle i
36 | && chmod -R u+rwx . && bundle exec jekyll serve --host 0.0.0.0"
37 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Chan Nok Lam
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docs/licenses/LICENSE:
--------------------------------------------------------------------------------
1 | /* This license pertains to the docs template, except for the Navgoco jQuery component. */
2 |
3 | The MIT License (MIT)
4 |
5 | Original theme: Copyright (c) 2016 Tom Johnson
6 | Modifications: Copyright (c) 2017 onwards fast.ai, Inc
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a copy
9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 |
15 | The above copyright notice and this permission notice shall be included in all
16 | copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | SOFTWARE.
25 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 | on: [push, pull_request]
3 | jobs:
4 | build:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - uses: actions/checkout@v1
8 | - uses: actions/setup-python@v1
9 | with:
10 | python-version: '3.7'
11 | architecture: 'x64'
12 | - name: Install the library
13 | run: |
14 | pip install nbdev jupyter
15 | pip install -e .
16 | - name: Read all notebooks
17 | run: |
18 | nbdev_read_nbs
19 | - name: Check if all notebooks are cleaned
20 | run: |
21 | echo "Check we are starting with clean git checkout"
22 | if [ -n "$(git status -uno -s)" ]; then echo "git status is not clean"; false; fi
23 | echo "Trying to strip out notebooks"
24 | nbdev_clean_nbs
25 | echo "Check that strip out was unnecessary"
26 | git status -s # display the status to see which nbs need cleaning up
27 | if [ -n "$(git status -uno -s)" ]; then echo -e "!!! Detected unstripped out notebooks\n!!!Remember to run nbdev_install_git_hooks"; false; fi
28 | - name: Check if there is no diff library/notebooks
29 | run: |
30 | if [ -n "$(nbdev_diff_nbs)" ]; then echo -e "!!! Detected difference between the notebooks and the library"; false; fi
31 | - name: Run tests
32 | run: |
33 | nbdev_test_nbs
34 |
--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | repository: noklam/dtype_diet
2 | output: web
3 | topnav_title: dtype_diet
4 | site_title: dtype_diet
5 | company_name: Chan Nok Lam
6 | description: One step to take your dataset to go on a diet.
7 | # Set to false to disable KaTeX math
8 | use_math: true
9 | # Add Google analytics id if you have one and want to use it here
10 | google_analytics:
11 | # See http://nbdev.fast.ai/search for help with adding Search
12 | google_search:
13 |
14 | host: 127.0.0.1
15 | # the preview server used. Leave as is.
16 | port: 4000
17 | # the port where the preview is rendered.
18 |
19 | exclude:
20 | - .idea/
21 | - .gitignore
22 | - vendor
23 |
24 | exclude: [vendor]
25 |
26 | highlighter: rouge
27 | markdown: kramdown
28 | kramdown:
29 | input: GFM
30 | auto_ids: true
31 | hard_wrap: false
32 | syntax_highlighter: rouge
33 |
34 | collections:
35 | tooltips:
36 | output: false
37 |
38 | defaults:
39 | -
40 | scope:
41 | path: ""
42 | type: "pages"
43 | values:
44 | layout: "page"
45 | comments: true
46 | search: true
47 | sidebar: home_sidebar
48 | topnav: topnav
49 | -
50 | scope:
51 | path: ""
52 | type: "tooltips"
53 | values:
54 | layout: "page"
55 | comments: true
56 | search: true
57 | tooltip: true
58 |
59 | sidebars:
60 | - home_sidebar
61 |
62 | theme: jekyll-theme-cayman
63 | baseurl: /dtype_diet/
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3 |
4 | # This workflow uses actions that are not certified by GitHub.
5 | # They are provided by a third-party and are governed by
6 | # separate terms of service, privacy policy, and support
7 | # documentation.
8 |
9 | name: Upload Python Package to PyPI
10 |
11 | on:
12 | release:
13 | types: [published]
14 |
15 | # Allows you to run this workflow manually from the Actions tab
16 | workflow_dispatch:
17 |
18 |
19 | permissions:
20 | contents: read
21 |
22 | jobs:
23 | deploy:
24 |
25 | runs-on: ubuntu-22.04
26 |
27 | steps:
28 | - uses: actions/checkout@v3
29 |
30 | - name: Set up Python
31 | uses: actions/setup-python@v3
32 | with:
33 | python-version: '3.12.0'
34 |
35 | - name: Install dependencies
36 | run: |
37 | python -m pip install --upgrade build
38 |
39 | - name: Build package
40 | run: python -m build
41 |
42 | - name: Publish package
43 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
44 | with:
45 | user: __token__
46 | password: ${{ secrets.PYPI_API_TOKEN }}
47 |
--------------------------------------------------------------------------------
/docs/feed.xml:
--------------------------------------------------------------------------------
1 | ---
2 | search: exclude
3 | layout: none
4 | ---
5 |
6 |
7 |
8 |
9 | {{ site.title | xml_escape }}
10 | {{ site.description | xml_escape }}
11 | {{ site.url }}/
12 |
13 | {{ site.time | date_to_rfc822 }}
14 | {{ site.time | date_to_rfc822 }}
15 | Jekyll v{{ jekyll.version }}
16 | {% for post in site.posts limit:10 %}
17 | -
18 |
{{ post.title | xml_escape }}
19 | {{ post.content | xml_escape }}
20 | {{ post.date | date_to_rfc822 }}
21 | {{ post.url | prepend: site.url }}
22 | {{ post.url | prepend: site.url }}
23 | {% for tag in post.tags %}
24 | {{ tag | xml_escape }}
25 | {% endfor %}
26 | {% for tag in page.tags %}
27 | {{ cat | xml_escape }}
28 | {% endfor %}
29 |
30 | {% endfor %}
31 |
32 |
33 |
--------------------------------------------------------------------------------
/docs/_includes/links.html:
--------------------------------------------------------------------------------
1 | {% comment %}Get links from each sidebar, as listed in the _config.yml file under sidebars{% endcomment %}
2 |
3 | {% for sidebar in site.sidebars %}
4 | {% for entry in site.data.sidebars[sidebar].entries %}
5 | {% for folder in entry.folders %}
6 | {% for folderitem in folder.folderitems %}
7 | {% if folderitem.url contains "html#" %}
8 | [{{folderitem.url | remove: "/" }}]: {{folderitem.url | remove: "/"}}
9 | {% else %}
10 | [{{folderitem.url | remove: "/" | remove: ".html"}}]: {{folderitem.url | remove: "/"}}
11 | {% endif %}
12 | {% for subfolders in folderitem.subfolders %}
13 | {% for subfolderitem in subfolders.subfolderitems %}
14 | [{{subfolderitem.url | remove: "/" | remove: ".html"}}]: {{subfolderitem.url | remove: "/"}}
15 | {% endfor %}
16 | {% endfor %}
17 | {% endfor %}
18 | {% endfor %}
19 | {% endfor %}
20 | {% endfor %}
21 |
22 |
23 | {% comment %} Get links from topnav {% endcomment %}
24 |
25 | {% for entry in site.data.topnav.topnav %}
26 | {% for item in entry.items %}
27 | {% if item.external_url == null %}
28 | [{{item.url | remove: "/" | remove: ".html"}}]: {{item.url | remove: "/"}}
29 | {% endif %}
30 | {% endfor %}
31 | {% endfor %}
32 |
33 | {% comment %}Get links from topnav dropdowns {% endcomment %}
34 |
35 | {% for entry in site.data.topnav.topnav_dropdowns %}
36 | {% for folder in entry.folders %}
37 | {% for folderitem in folder.folderitems %}
38 | {% if folderitem.external_url == null %}
39 | [{{folderitem.url | remove: "/" | remove: ".html"}}]: {{folderitem.url | remove: "/"}}
40 | {% endif %}
41 | {% endfor %}
42 | {% endfor %}
43 | {% endfor %}
44 |
45 |
--------------------------------------------------------------------------------
/docs/_includes/head_print.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
{% if page.homepage == true %} {{site.homepage_title}} {% elsif page.title %}{{ page.title }}{% endif %} | {{ site.site_title }}
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
23 |
24 |
29 |
--------------------------------------------------------------------------------
/docs/css/modern-business.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Start Bootstrap - Modern Business HTML Template (http://startbootstrap.com)
3 | * Code licensed under the Apache License v2.0.
4 | * For details, see http://www.apache.org/licenses/LICENSE-2.0.
5 | */
6 |
7 | /* Global Styles */
8 |
9 | html,
10 | body {
11 | height: 100%;
12 | }
13 |
14 | .img-portfolio {
15 | margin-bottom: 30px;
16 | }
17 |
18 | .img-hover:hover {
19 | opacity: 0.8;
20 | }
21 |
22 | /* Home Page Carousel */
23 |
24 | header.carousel {
25 | height: 50%;
26 | }
27 |
28 | header.carousel .item,
29 | header.carousel .item.active,
30 | header.carousel .carousel-inner {
31 | height: 100%;
32 | }
33 |
34 | header.carousel .fill {
35 | width: 100%;
36 | height: 100%;
37 | background-position: center;
38 | background-size: cover;
39 | }
40 |
41 | /* 404 Page Styles */
42 |
43 | .error-404 {
44 | font-size: 100px;
45 | }
46 |
47 | /* Pricing Page Styles */
48 |
49 | .price {
50 | display: block;
51 | font-size: 50px;
52 | line-height: 50px;
53 | }
54 |
55 | .price sup {
56 | top: -20px;
57 | left: 2px;
58 | font-size: 20px;
59 | }
60 |
61 | .period {
62 | display: block;
63 | font-style: italic;
64 | }
65 |
66 | /* Footer Styles */
67 |
68 | footer {
69 | margin: 50px 0;
70 | }
71 |
72 | /* Responsive Styles */
73 |
74 | @media(max-width:991px) {
75 | .client-img,
76 | .img-related {
77 | margin-bottom: 30px;
78 | }
79 | }
80 |
81 | @media(max-width:767px) {
82 | .img-portfolio {
83 | margin-bottom: 15px;
84 | }
85 |
86 | header.carousel .carousel {
87 | height: 70%;
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/docs/licenses/LICENSE-BSD-NAVGOCO.txt:
--------------------------------------------------------------------------------
1 | /* This license pertains to the Navgoco jQuery component used for the sidebar. */
2 |
3 | Copyright (c) 2013, Christodoulos Tsoulloftas, http://www.komposta.net
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without modification,
7 | are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice,
10 | this list of conditions and the following disclaimer.
11 | * Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 | * Neither the name of the
nor the names of its
15 | contributors may be used to endorse or promote products derived from this
16 | software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27 | OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from pkg_resources import parse_version
2 | from configparser import ConfigParser
3 | import setuptools
4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
5 |
6 | # note: all settings are in settings.ini; edit there, not here
7 | config = ConfigParser(delimiters=['='])
8 | config.read('settings.ini')
9 | cfg = config['DEFAULT']
10 |
11 | cfg_keys = 'version description keywords author author_email'.split()
12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
15 |
16 | licenses = {
17 | 'MIT': ('MIT License', 'OSI Approved :: MIT License'),
18 | }
19 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
20 | '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
21 | py_versions = '3.6 3.7 3.8 3.9 3.10 3.11'.split()
22 |
23 | requirements = cfg.get('requirements','').split()
24 | lic = licenses[cfg['license']]
25 | min_python = cfg['min_python']
26 |
27 | setuptools.setup(
28 | name = cfg['lib_name'],
29 | license = lic[0],
30 | classifiers = [
31 | 'Development Status :: ' + statuses[int(cfg['status'])],
32 | 'Intended Audience :: ' + cfg['audience'].title(),
33 | 'License :: ' + lic[1],
34 | 'Natural Language :: ' + cfg['language'].title(),
35 | ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]],
36 | url = cfg['git_url'],
37 | packages = setuptools.find_packages(),
38 | include_package_data = True,
39 | install_requires = requirements,
40 | dependency_links = cfg.get('dep_links','').split(),
41 | python_requires = '>=' + cfg['min_python'],
42 | long_description = open('README.md').read(),
43 | long_description_content_type = 'text/markdown',
44 | zip_safe = False,
45 | entry_points = { 'console_scripts': cfg.get('console_scripts','').split() },
46 | **setup_cfg)
47 |
48 |
--------------------------------------------------------------------------------
/docs/_layouts/page.html:
--------------------------------------------------------------------------------
1 | ---
2 | layout: default
3 | ---
4 |
5 |
12 |
13 | {% if page.simple_map == true %}
14 |
15 |
20 |
21 | {% include custom/{{page.map_name}}.html %}
22 |
23 | {% elsif page.complex_map == true %}
24 |
25 |
30 |
31 | {% include custom/{{page.map_name}}.html %}
32 |
33 | {% endif %}
34 |
35 |
36 |
37 | {% if page.summary %}
38 |
{{page.summary}}
39 | {% endif %}
40 |
41 | {% unless page.toc == false %}
42 | {% include toc.html %}
43 | {% endunless %}
44 |
45 |
46 | {% if site.github_editme_path %}
47 |
48 |
Edit me
49 |
50 | {% endif %}
51 |
52 | {{content}}
53 |
54 |
65 |
66 |
67 |
68 | {{site.data.alerts.hr_shaded}}
69 |
70 | {% include footer.html %}
71 |
--------------------------------------------------------------------------------
/docs/js/customscripts.js:
--------------------------------------------------------------------------------
1 | $('#mysidebar').height($(".nav").height());
2 |
3 |
4 | $( document ).ready(function() {
5 |
6 | //this script says, if the height of the viewport is greater than 800px, then insert affix class, which makes the nav bar float in a fixed
7 | // position as your scroll. if you have a lot of nav items, this height may not work for you.
8 | var h = $(window).height();
9 | //console.log (h);
10 | if (h > 800) {
11 | $( "#mysidebar" ).attr("class", "nav affix");
12 | }
13 | // activate tooltips. although this is a bootstrap js function, it must be activated this way in your theme.
14 | $('[data-toggle="tooltip"]').tooltip({
15 | placement : 'top'
16 | });
17 |
18 | /**
19 | * AnchorJS
20 | */
21 | anchors.add('h2,h3,h4,h5');
22 |
23 | });
24 |
25 | // needed for nav tabs on pages. See Formatting > Nav tabs for more details.
26 | // script from http://stackoverflow.com/questions/10523433/how-do-i-keep-the-current-tab-active-with-twitter-bootstrap-after-a-page-reload
27 | $(function() {
28 | var json, tabsState;
29 | $('a[data-toggle="pill"], a[data-toggle="tab"]').on('shown.bs.tab', function(e) {
30 | var href, json, parentId, tabsState;
31 |
32 | tabsState = localStorage.getItem("tabs-state");
33 | json = JSON.parse(tabsState || "{}");
34 | parentId = $(e.target).parents("ul.nav.nav-pills, ul.nav.nav-tabs").attr("id");
35 | href = $(e.target).attr('href');
36 | json[parentId] = href;
37 |
38 | return localStorage.setItem("tabs-state", JSON.stringify(json));
39 | });
40 |
41 | tabsState = localStorage.getItem("tabs-state");
42 | json = JSON.parse(tabsState || "{}");
43 |
44 | $.each(json, function(containerId, href) {
45 | return $("#" + containerId + " a[href=" + href + "]").tab('show');
46 | });
47 |
48 | $("ul.nav.nav-pills, ul.nav.nav-tabs").each(function() {
49 | var $this = $(this);
50 | if (!json[$this.attr("id")]) {
51 | return $this.find("a[data-toggle=tab]:first, a[data-toggle=pill]:first").tab("show");
52 | }
53 | });
54 | });
55 |
--------------------------------------------------------------------------------
/docs/images/colab.svg:
--------------------------------------------------------------------------------
1 | Open in Colab Open in Colab
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bak
2 | .gitattributes
3 | .last_checked
4 | .gitconfig
5 | *.bak
6 | *.log
7 | *~
8 | ~*
9 | _tmp*
10 | tmp*
11 | tags
12 |
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 |
18 | # C extensions
19 | *.so
20 |
21 | # Distribution / packaging
22 | .Python
23 | env/
24 | build/
25 | develop-eggs/
26 | dist/
27 | downloads/
28 | eggs/
29 | .eggs/
30 | lib/
31 | lib64/
32 | parts/
33 | sdist/
34 | var/
35 | wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 |
40 | # PyInstaller
41 | # Usually these files are written by a python script from a template
42 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
43 | *.manifest
44 | *.spec
45 |
46 | # Installer logs
47 | pip-log.txt
48 | pip-delete-this-directory.txt
49 |
50 | # Unit test / coverage reports
51 | htmlcov/
52 | .tox/
53 | .coverage
54 | .coverage.*
55 | .cache
56 | nosetests.xml
57 | coverage.xml
58 | *.cover
59 | .hypothesis/
60 |
61 | # Translations
62 | *.mo
63 | *.pot
64 |
65 | # Django stuff:
66 | *.log
67 | local_settings.py
68 |
69 | # Flask stuff:
70 | instance/
71 | .webassets-cache
72 |
73 | # Scrapy stuff:
74 | .scrapy
75 |
76 | # Sphinx documentation
77 | docs/_build/
78 |
79 | # PyBuilder
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # celery beat schedule file
89 | celerybeat-schedule
90 |
91 | # SageMath parsed files
92 | *.sage.py
93 |
94 | # dotenv
95 | .env
96 |
97 | # virtualenv
98 | .venv
99 | venv/
100 | ENV/
101 |
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 |
106 | # Rope project settings
107 | .ropeproject
108 |
109 | # mkdocs documentation
110 | /site
111 |
112 | # mypy
113 | .mypy_cache/
114 |
115 | .vscode
116 | *.swp
117 |
118 | # osx generated files
119 | .DS_Store
120 | .DS_Store?
121 | .Trashes
122 | ehthumbs.db
123 | Thumbs.db
124 | .idea
125 |
126 | # pytest
127 | .pytest_cache
128 |
129 | # tools/trust-doc-nbs
130 | docs_src/.last_checked
131 |
132 | # symlinks to fastai
133 | docs_src/fastai
134 | tools/fastai
135 |
136 | # link checker
137 | checklink/cookies.txt
138 |
139 | # .gitconfig is now autogenerated
140 | .gitconfig
141 |
142 | # data
143 | *.csv
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to contribute
2 |
3 | ## How to get started
4 |
5 | Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it:
6 | ```
7 | nbdev_install_git_hooks
8 | ```
9 |
10 | ## Did you find a bug?
11 |
12 | * Ensure the bug was not already reported by searching on GitHub under Issues.
13 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring.
14 | * Be sure to add the complete error messages.
15 |
16 | #### Did you write a patch that fixes a bug?
17 |
18 | * Open a new GitHub pull request with the patch.
19 | * Ensure that your PR includes a test that fails without your patch, and pass with it.
20 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.
21 |
22 | ## PR submission guidelines
23 |
24 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused.
25 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected.
26 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can.
27 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project.
28 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another.
29 |
30 | ## Do you want to contribute to the documentation?
31 |
32 | * Docs are automatically created from the notebooks in the root directory.
33 |
--------------------------------------------------------------------------------
/docs/_includes/sidebar.html:
--------------------------------------------------------------------------------
1 | {% assign sidebar = site.data.sidebars[page.sidebar].entries %}
2 | {% assign pageurl = page.url | remove: ".html" %}
3 |
4 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/docs/css/theme-green.css:
--------------------------------------------------------------------------------
1 | .summary {
2 | color: #808080;
3 | border-left: 5px solid #E50E51;
4 | font-size:16px;
5 | }
6 |
7 |
8 | h3 {color: #E50E51; }
9 | h4 {color: #808080; }
10 |
11 | .nav-tabs > li.active > a, .nav-tabs > li.active > a:hover, .nav-tabs > li.active > a:focus {
12 | background-color: #248ec2;
13 | color: white;
14 | }
15 |
16 | .nav > li.active > a {
17 | background-color: #72ac4a;
18 | }
19 |
20 | .nav > li > a:hover {
21 | background-color: #72ac4a;
22 | }
23 |
24 | div.navbar-collapse .dropdown-menu > li > a:hover {
25 | background-color: #72ac4a;
26 | }
27 |
28 | .navbar-inverse .navbar-nav>li>a, .navbar-inverse .navbar-brand {
29 | color: white;
30 | }
31 |
32 | .navbar-inverse .navbar-nav>li>a:hover, a.fa.fa-home.fa-lg.navbar-brand:hover {
33 | color: #f0f0f0;
34 | }
35 |
36 | .nav li.thirdlevel > a {
37 | background-color: #FAFAFA !important;
38 | color: #72ac4a;
39 | font-weight: bold;
40 | }
41 |
42 | a[data-toggle="tooltip"] {
43 | color: #649345;
44 | font-style: italic;
45 | cursor: default;
46 | }
47 |
48 | .navbar-inverse {
49 | background-color: #72ac4a;
50 | border-color: #5b893c;
51 | }
52 |
53 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus {
54 | color: #5b893c;
55 | }
56 |
57 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus {
58 | background-color: #5b893c;
59 | color: #ffffff;
60 | }
61 |
62 | /* not sure if using this ...*/
63 | .navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form {
64 | border-color: #72ac4a !important;
65 | }
66 |
67 | .btn-primary {
68 | color: #ffffff;
69 | background-color: #5b893c;
70 | border-color: #5b893c;
71 | }
72 |
73 | .btn-primary:hover,
74 | .btn-primary:focus,
75 | .btn-primary:active,
76 | .btn-primary.active,
77 | .open .dropdown-toggle.btn-primary {
78 | background-color: #72ac4a;
79 | border-color: #5b893c;
80 | }
81 |
82 | .printTitle {
83 | color: #5b893c !important;
84 | }
85 |
86 | body.print h1 {color: #5b893c !important; font-size:28px;}
87 | body.print h2 {color: #595959 !important; font-size:24px;}
88 | body.print h3 {color: #E50E51 !important; font-size:14px;}
89 | body.print h4 {color: #679DCE !important; font-size:14px; font-style: italic;}
90 |
91 | .anchorjs-link:hover {
92 | color: #4f7233;
93 | }
94 |
95 | div.sidebarTitle {
96 | color: #E50E51;
97 | }
98 |
99 | li.sidebarTitle {
100 | margin-top:20px;
101 | font-weight:normal;
102 | font-size:130%;
103 | color: #ED1951;
104 | margin-bottom:10px;
105 | margin-left: 5px;
106 | }
107 |
108 | .navbar-inverse .navbar-toggle:focus, .navbar-inverse .navbar-toggle:hover {
109 | background-color: #E50E51;
110 | }
111 |
--------------------------------------------------------------------------------
/docs/css/theme-blue.css:
--------------------------------------------------------------------------------
1 | .summary {
2 | color: #808080;
3 | border-left: 5px solid #ED1951;
4 | font-size:16px;
5 | }
6 |
7 |
8 | h3 {color: #000000; }
9 | h4 {color: #000000; }
10 |
11 | .nav-tabs > li.active > a, .nav-tabs > li.active > a:hover, .nav-tabs > li.active > a:focus {
12 | background-color: #248ec2;
13 | color: white;
14 | }
15 |
16 | .nav > li.active > a {
17 | background-color: #347DBE;
18 | }
19 |
20 | .nav > li > a:hover {
21 | background-color: #248ec2;
22 | }
23 |
24 | div.navbar-collapse .dropdown-menu > li > a:hover {
25 | background-color: #347DBE;
26 | }
27 |
28 | .nav li.thirdlevel > a {
29 | background-color: #FAFAFA !important;
30 | color: #248EC2;
31 | font-weight: bold;
32 | }
33 |
34 | a[data-toggle="tooltip"] {
35 | color: #649345;
36 | font-style: italic;
37 | cursor: default;
38 | }
39 |
40 | .navbar-inverse {
41 | background-color: #347DBE;
42 | border-color: #015CAE;
43 | }
44 | .navbar-inverse .navbar-nav>li>a, .navbar-inverse .navbar-brand {
45 | color: white;
46 | }
47 |
48 | .navbar-inverse .navbar-nav>li>a:hover, a.fa.fa-home.fa-lg.navbar-brand:hover {
49 | color: #f0f0f0;
50 | }
51 |
52 | a.navbar-brand:hover {
53 | color: #f0f0f0;
54 | }
55 |
56 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus {
57 | color: #015CAE;
58 | }
59 |
60 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus {
61 | background-color: #015CAE;
62 | color: #ffffff;
63 | }
64 |
65 | .navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form {
66 | border-color: #248ec2 !important;
67 | }
68 |
69 | .btn-primary {
70 | color: #ffffff;
71 | background-color: #347DBE;
72 | border-color: #347DBE;
73 | }
74 |
75 | .navbar-inverse .navbar-nav > .active > a, .navbar-inverse .navbar-nav > .active > a:hover, .navbar-inverse .navbar-nav > .active > a:focus {
76 | background-color: #347DBE;
77 | }
78 |
79 | .btn-primary:hover,
80 | .btn-primary:focus,
81 | .btn-primary:active,
82 | .btn-primary.active,
83 | .open .dropdown-toggle.btn-primary {
84 | background-color: #248ec2;
85 | border-color: #347DBE;
86 | }
87 |
88 | .printTitle {
89 | color: #015CAE !important;
90 | }
91 |
92 | body.print h1 {color: #015CAE !important; font-size:28px !important;}
93 | body.print h2 {color: #595959 !important; font-size:20px !important;}
94 | body.print h3 {color: #E50E51 !important; font-size:14px !important;}
95 | body.print h4 {color: #679DCE !important; font-size:14px; font-style: italic !important;}
96 |
97 | .anchorjs-link:hover {
98 | color: #216f9b;
99 | }
100 |
101 | div.sidebarTitle {
102 | color: #015CAE;
103 | }
104 |
105 | li.sidebarTitle {
106 | margin-top:20px;
107 | font-weight:normal;
108 | font-size:130%;
109 | color: #ED1951;
110 | margin-bottom:10px;
111 | margin-left: 5px;
112 |
113 | }
114 |
115 | .navbar-inverse .navbar-toggle:focus, .navbar-inverse .navbar-toggle:hover {
116 | background-color: #015CAE;
117 | }
118 |
119 | .navbar-inverse .navbar-toggle {
120 | border-color: #015CAE;
121 | }
122 |
--------------------------------------------------------------------------------
/docs/_includes/topnav.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 | Nav
17 |
18 |
19 | {% assign topnav = site.data[page.topnav] %}
20 | {% assign topnav_dropdowns = site.data[page.topnav].topnav_dropdowns %}
21 |
22 | {% for entry in topnav.topnav %}
23 | {% for item in entry.items %}
24 | {% if item.external_url %}
25 | {{item.title}}
26 | {% elsif page.url contains item.url %}
27 | {{item.title}}
28 | {% else %}
29 | {{item.title}}
30 | {% endif %}
31 | {% endfor %}
32 | {% endfor %}
33 |
34 |
35 | {% for entry in topnav_dropdowns %}
36 | {% for folder in entry.folders %}
37 |
38 | {{ folder.title }}
39 |
50 |
51 | {% endfor %}
52 | {% endfor %}
53 | {% if site.google_search %}
54 |
55 | {% include search_google_custom.html %}
56 |
57 | {% endif %}
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/docs/js/jquery.navgoco.min.js:
--------------------------------------------------------------------------------
1 | /*
2 | * jQuery Navgoco Menus Plugin v0.2.1 (2014-04-11)
3 | * https://github.com/tefra/navgoco
4 | *
5 | * Copyright (c) 2014 Chris T (@tefra)
6 | * BSD - https://github.com/tefra/navgoco/blob/master/LICENSE-BSD
7 | */
8 | !function(a){"use strict";var b=function(b,c,d){return this.el=b,this.$el=a(b),this.options=c,this.uuid=this.$el.attr("id")?this.$el.attr("id"):d,this.state={},this.init(),this};b.prototype={init:function(){var b=this;b._load(),b.$el.find("ul").each(function(c){var d=a(this);d.attr("data-index",c),b.options.save&&b.state.hasOwnProperty(c)?(d.parent().addClass(b.options.openClass),d.show()):d.parent().hasClass(b.options.openClass)?(d.show(),b.state[c]=1):d.hide()});var c=a(" ").prepend(b.options.caretHtml),d=b.$el.find("li > a");b._trigger(c,!1),b._trigger(d,!0),b.$el.find("li:has(ul) > a").prepend(c)},_trigger:function(b,c){var d=this;b.on("click",function(b){b.stopPropagation();var e=c?a(this).next():a(this).parent().next(),f=!1;if(c){var g=a(this).attr("href");f=void 0===g||""===g||"#"===g}if(e=e.length>0?e:!1,d.options.onClickBefore.call(this,b,e),!c||e&&f)b.preventDefault(),d._toggle(e,e.is(":hidden")),d._save();else if(d.options.accordion){var h=d.state=d._parents(a(this));d.$el.find("ul").filter(":visible").each(function(){var b=a(this),c=b.attr("data-index");h.hasOwnProperty(c)||d._toggle(b,!1)}),d._save()}d.options.onClickAfter.call(this,b,e)})},_toggle:function(b,c){var d=this,e=b.attr("data-index"),f=b.parent();if(d.options.onToggleBefore.call(this,b,c),c){if(f.addClass(d.options.openClass),b.slideDown(d.options.slide),d.state[e]=1,d.options.accordion){var g=d.state=d._parents(b);g[e]=d.state[e]=1,d.$el.find("ul").filter(":visible").each(function(){var b=a(this),c=b.attr("data-index");g.hasOwnProperty(c)||d._toggle(b,!1)})}}else f.removeClass(d.options.openClass),b.slideUp(d.options.slide),d.state[e]=0;d.options.onToggleAfter.call(this,b,c)},_parents:function(b,c){var d={},e=b.parent(),f=e.parents("ul");return f.each(function(){var b=a(this),e=b.attr("data-index");return e?void(d[e]=c?b:1):!1}),d},_save:function(){if(this.options.save){var b={};for(var d in this.state)1===this.state[d]&&(b[d]=1);c[this.uuid]=this.state=b,a.cookie(this.options.cookie.name,JSON.stringify(c),this.options.cookie)}},_load:function(){if(this.options.save){if(null===c){var b=a.cookie(this.options.cookie.name);c=b?JSON.parse(b):{}}this.state=c.hasOwnProperty(this.uuid)?c[this.uuid]:{}}},toggle:function(b){var c=this,d=arguments.length;if(1>=d)c.$el.find("ul").each(function(){var d=a(this);c._toggle(d,b)});else{var e,f={},g=Array.prototype.slice.call(arguments,1);d--;for(var h=0;d>h;h++){e=g[h];var i=c.$el.find('ul[data-index="'+e+'"]').first();if(i&&(f[e]=i,b)){var j=c._parents(i,!0);for(var k in j)f.hasOwnProperty(k)||(f[k]=j[k])}}for(e in f)c._toggle(f[e],b)}c._save()},destroy:function(){a.removeData(this.$el),this.$el.find("li:has(ul) > a").unbind("click"),this.$el.find("li:has(ul) > a > span").unbind("click")}},a.fn.navgoco=function(c){if("string"==typeof c&&"_"!==c.charAt(0)&&"init"!==c)var d=!0,e=Array.prototype.slice.call(arguments,1);else c=a.extend({},a.fn.navgoco.defaults,c||{}),a.cookie||(c.save=!1);return this.each(function(f){var g=a(this),h=g.data("navgoco");h||(h=new b(this,d?a.fn.navgoco.defaults:c,f),g.data("navgoco",h)),d&&h[c].apply(h,e)})};var c=null;a.fn.navgoco.defaults={caretHtml:"",accordion:!1,openClass:"open",save:!0,cookie:{name:"navgoco",expires:!1,path:"/"},slide:{duration:400,easing:"swing"},onClickBefore:a.noop,onClickAfter:a.noop,onToggleBefore:a.noop,onToggleAfter:a.noop}}(jQuery);
--------------------------------------------------------------------------------
/docs/_includes/initialize_shuffle.html:
--------------------------------------------------------------------------------
1 |
7 |
8 |
100 |
101 |
102 |
103 |
114 |
115 |
129 |
130 |
131 |
--------------------------------------------------------------------------------
/docs/css/printstyles.css:
--------------------------------------------------------------------------------
1 |
2 | /*body.print .container {max-width: 650px;}*/
3 |
4 | body {
5 | font-size:14px;
6 | }
7 | .nav ul li a {border-top:0px; background-color:transparent; color: #808080; }
8 | #navig a[href] {color: #595959 !important;}
9 | table .table {max-width:650px;}
10 |
11 | #navig li.sectionHead {font-weight: bold; font-size: 18px; color: #595959 !important; }
12 | #navig li {font-weight: normal; }
13 |
14 | #navig a[href]::after { content: leader(".") target-counter(attr(href), page); }
15 |
16 | a[href]::after {
17 | content: " (page " target-counter(attr(href), page) ")"
18 | }
19 |
20 | a[href^="http:"]::after, a[href^="https:"]::after {
21 | content: "";
22 | }
23 |
24 | a[href] {
25 | color: blue !important;
26 | }
27 | a[href*="mailto"]::after, a[data-toggle="tooltip"]::after, a[href].noCrossRef::after {
28 | content: "";
29 | }
30 |
31 |
32 | @page {
33 | margin: 60pt 90pt 60pt 90pt;
34 | font-family: sans-serif;
35 | font-style:none;
36 | color: gray;
37 |
38 | }
39 |
40 | .printTitle {
41 | line-height:30pt;
42 | font-size:27pt;
43 | font-weight: bold;
44 | letter-spacing: -.5px;
45 | margin-bottom:25px;
46 | }
47 |
48 | .printSubtitle {
49 | font-size: 19pt;
50 | color: #cccccc !important;
51 | font-family: "Grotesque MT Light";
52 | line-height: 22pt;
53 | letter-spacing: -.5px;
54 | margin-bottom:20px;
55 | }
56 | .printTitleArea hr {
57 | color: #999999 !important;
58 | height: 2px;
59 | width: 100%;
60 | }
61 |
62 | .printTitleImage {
63 | max-width:300px;
64 | margin-bottom:200px;
65 | }
66 |
67 |
68 | .printTitleImage {
69 | max-width: 250px;
70 | }
71 |
72 | #navig {
73 | /*page-break-before: always;*/
74 | }
75 |
76 | .copyrightBoilerplate {
77 | page-break-before:always;
78 | font-size:14px;
79 | }
80 |
81 | .lastGeneratedDate {
82 | font-style: italic;
83 | font-size:14px;
84 | color: gray;
85 | }
86 |
87 | .alert a {
88 | text-decoration: none !important;
89 | }
90 |
91 |
92 | body.title { page: title }
93 |
94 | @page title {
95 | @top-left {
96 | content: " ";
97 | }
98 | @top-right {
99 | content: " "
100 | }
101 | @bottom-right {
102 | content: " ";
103 | }
104 | @bottom-left {
105 | content: " ";
106 | }
107 | }
108 |
109 | body.frontmatter { page: frontmatter }
110 | body.frontmatter {counter-reset: page 1}
111 |
112 |
113 | @page frontmatter {
114 | @top-left {
115 | content: prince-script(guideName);
116 | }
117 | @top-right {
118 | content: prince-script(datestamp);
119 | }
120 | @bottom-right {
121 | content: counter(page, lower-roman);
122 | }
123 | @bottom-left {
124 | content: "youremail@domain.com"; }
125 | }
126 |
127 | body.first_page {counter-reset: page 1}
128 |
129 | h1 { string-set: doctitle content() }
130 |
131 | @page {
132 | @top-left {
133 | content: string(doctitle);
134 | font-size: 11px;
135 | font-style: italic;
136 | }
137 | @top-right {
138 | content: prince-script(datestamp);
139 | font-size: 11px;
140 | }
141 |
142 | @bottom-right {
143 | content: "Page " counter(page);
144 | font-size: 11px;
145 | }
146 | @bottom-left {
147 | content: prince-script(guideName);
148 | font-size: 11px;
149 | }
150 | }
151 | .alert {
152 | background-color: #fafafa !important;
153 | border-color: #dedede !important;
154 | color: black;
155 | }
156 |
157 | pre {
158 | background-color: #fafafa;
159 | }
160 |
--------------------------------------------------------------------------------
/docs/css/syntax.css:
--------------------------------------------------------------------------------
1 | .highlight { background: #ffffff; }
2 | .highlight .c { color: #999988; font-style: italic } /* Comment */
3 | .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
4 | .highlight .k { font-weight: bold } /* Keyword */
5 | .highlight .o { font-weight: bold } /* Operator */
6 | .highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */
7 | .highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */
8 | .highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */
9 | .highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */
10 | .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
11 | .highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */
12 | .highlight .ge { font-style: italic } /* Generic.Emph */
13 | .highlight .gr { color: #aa0000 } /* Generic.Error */
14 | .highlight .gh { color: #999999 } /* Generic.Heading */
15 | .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
16 | .highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */
17 | .highlight .go { color: #888888 } /* Generic.Output */
18 | .highlight .gp { color: #555555 } /* Generic.Prompt */
19 | .highlight .gs { font-weight: bold } /* Generic.Strong */
20 | .highlight .gu { color: #aaaaaa } /* Generic.Subheading */
21 | .highlight .gt { color: #aa0000 } /* Generic.Traceback */
22 | .highlight .kc { font-weight: bold } /* Keyword.Constant */
23 | .highlight .kd { font-weight: bold } /* Keyword.Declaration */
24 | .highlight .kp { font-weight: bold } /* Keyword.Pseudo */
25 | .highlight .kr { font-weight: bold } /* Keyword.Reserved */
26 | .highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */
27 | .highlight .m { color: #009999 } /* Literal.Number */
28 | .highlight .s { color: #d14 } /* Literal.String */
29 | .highlight .na { color: #008080 } /* Name.Attribute */
30 | .highlight .nb { color: #0086B3 } /* Name.Builtin */
31 | .highlight .nc { color: #445588; font-weight: bold } /* Name.Class */
32 | .highlight .no { color: #008080 } /* Name.Constant */
33 | .highlight .ni { color: #800080 } /* Name.Entity */
34 | .highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */
35 | .highlight .nf { color: #990000; font-weight: bold } /* Name.Function */
36 | .highlight .nn { color: #555555 } /* Name.Namespace */
37 | .highlight .nt { color: #000080 } /* Name.Tag */
38 | .highlight .nv { color: #008080 } /* Name.Variable */
39 | .highlight .ow { font-weight: bold } /* Operator.Word */
40 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
41 | .highlight .mf { color: #009999 } /* Literal.Number.Float */
42 | .highlight .mh { color: #009999 } /* Literal.Number.Hex */
43 | .highlight .mi { color: #009999 } /* Literal.Number.Integer */
44 | .highlight .mo { color: #009999 } /* Literal.Number.Oct */
45 | .highlight .sb { color: #d14 } /* Literal.String.Backtick */
46 | .highlight .sc { color: #d14 } /* Literal.String.Char */
47 | .highlight .sd { color: #d14 } /* Literal.String.Doc */
48 | .highlight .s2 { color: #d14 } /* Literal.String.Double */
49 | .highlight .se { color: #d14 } /* Literal.String.Escape */
50 | .highlight .sh { color: #d14 } /* Literal.String.Heredoc */
51 | .highlight .si { color: #d14 } /* Literal.String.Interpol */
52 | .highlight .sx { color: #d14 } /* Literal.String.Other */
53 | .highlight .sr { color: #009926 } /* Literal.String.Regex */
54 | .highlight .s1 { color: #d14 } /* Literal.String.Single */
55 | .highlight .ss { color: #990073 } /* Literal.String.Symbol */
56 | .highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */
57 | .highlight .vc { color: #008080 } /* Name.Variable.Class */
58 | .highlight .vg { color: #008080 } /* Name.Variable.Global */
59 | .highlight .vi { color: #008080 } /* Name.Variable.Instance */
60 | .highlight .il { color: #009999 } /* Literal.Number.Integer.Long */
--------------------------------------------------------------------------------
/docs/js/toc.js:
--------------------------------------------------------------------------------
1 | // https://github.com/ghiculescu/jekyll-table-of-contents
2 | // this library modified by fastai to:
3 | // - update the location.href with the correct anchor when a toc item is clicked on
4 | (function($){
5 | $.fn.toc = function(options) {
6 | var defaults = {
7 | noBackToTopLinks: false,
8 | title: '',
9 | minimumHeaders: 3,
10 | headers: 'h1, h2, h3, h4',
11 | listType: 'ol', // values: [ol|ul]
12 | showEffect: 'show', // values: [show|slideDown|fadeIn|none]
13 | showSpeed: 'slow' // set to 0 to deactivate effect
14 | },
15 | settings = $.extend(defaults, options);
16 |
17 | var headers = $(settings.headers).filter(function() {
18 | // get all headers with an ID
19 | var previousSiblingName = $(this).prev().attr( "name" );
20 | if (!this.id && previousSiblingName) {
21 | this.id = $(this).attr( "id", previousSiblingName.replace(/\./g, "-") );
22 | }
23 | return this.id;
24 | }), output = $(this);
25 | if (!headers.length || headers.length < settings.minimumHeaders || !output.length) {
26 | return;
27 | }
28 |
29 | if (0 === settings.showSpeed) {
30 | settings.showEffect = 'none';
31 | }
32 |
33 | var render = {
34 | show: function() { output.hide().html(html).show(settings.showSpeed); },
35 | slideDown: function() { output.hide().html(html).slideDown(settings.showSpeed); },
36 | fadeIn: function() { output.hide().html(html).fadeIn(settings.showSpeed); },
37 | none: function() { output.html(html); }
38 | };
39 |
40 | var get_level = function(ele) { return parseInt(ele.nodeName.replace("H", ""), 10); }
41 | var highest_level = headers.map(function(_, ele) { return get_level(ele); }).get().sort()[0];
42 | //var return_to_top = ' ';
43 | // other nice icons that can be used instead: glyphicon-upload glyphicon-hand-up glyphicon-chevron-up glyphicon-menu-up glyphicon-triangle-top
44 | var level = get_level(headers[0]),
45 | this_level,
46 | html = settings.title + " <"+settings.listType+">";
47 | headers.on('click', function() {
48 | if (!settings.noBackToTopLinks) {
49 | var pos = $(window).scrollTop();
50 | window.location.hash = this.id;
51 | $(window).scrollTop(pos);
52 | }
53 | })
54 | .addClass('clickable-header')
55 | .each(function(_, header) {
56 | base_url = window.location.href;
57 | base_url = base_url.replace(/#.*$/, "");
58 | this_level = get_level(header);
59 | //if (!settings.noBackToTopLinks && this_level > 1) {
60 | // $(header).addClass('top-level-header').before(return_to_top);
61 | //}
62 | txt = header.textContent.split('¶')[0].split(/\[(test|source)\]/)[0];
63 | if (!txt) {return;}
64 | if (this_level === level) // same level as before; same indenting
65 | html += "" + txt + " ";
66 | else if (this_level <= level){ // higher level than before; end parent ol
67 | for(i = this_level; i < level; i++) {
68 | html += " "+settings.listType+">"
69 | }
70 | html += "" + txt + " ";
71 | }
72 | else if (this_level > level) { // lower level than before; expand the previous to contain a ol
73 | for(i = this_level; i > level; i--) {
74 | html += "<"+settings.listType+">"+((i-level == 2) ? "" : " ")
75 | }
76 | html += "" + txt + " ";
77 | }
78 | level = this_level; // update for the next one
79 | });
80 | html += ""+settings.listType+">";
81 | if (!settings.noBackToTopLinks) {
82 | $(document).on('click', '.back-to-top', function() {
83 | $(window).scrollTop(0);
84 | window.location.hash = '';
85 | });
86 | }
87 |
88 | render[settings.showEffect]();
89 | };
90 | })(jQuery);
91 |
--------------------------------------------------------------------------------
/docs/_layouts/default.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | {% include head.html %}
5 |
41 |
46 |
57 | {% if page.datatable == true %}
58 |
59 |
60 |
61 |
66 |
76 | {% endif %}
77 |
78 |
79 |
80 | {% include topnav.html %}
81 |
82 |
83 |
84 |
85 |
86 | {% assign content_col_size = "col-md-12" %}
87 | {% unless page.hide_sidebar %}
88 |
89 |
92 | {% assign content_col_size = "col-md-9" %}
93 | {% endunless %}
94 |
95 |
96 |
97 | {{content}}
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | {% if site.google_analytics %}
108 | {% include google_analytics.html %}
109 | {% endif %}
110 |
111 |
--------------------------------------------------------------------------------
/docs/_includes/head.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | {{ page.title }} | {{ site.site_title }}
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | {% if site.use_math %}
25 |
26 |
27 |
28 |
39 | {% endif %}
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
56 |
57 |
58 |
59 |
60 | {% if site.twitter_username %}
61 |
62 |
63 |
64 | {% endif %}
65 |
66 | {% if page.summary %}
67 |
68 | {% else %}
69 |
70 | {% endif %}
71 |
72 | {% if page.image %}
73 |
74 |
75 | {% else %}
76 |
77 |
78 | {% endif %}
79 |
80 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/js/jekyll-search.js:
--------------------------------------------------------------------------------
1 | !function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a="function"==typeof require&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}for(var i="function"==typeof require&&require,o=0;o=0}var self=this;self.matches=function(string,crit){return"string"!=typeof string?!1:(string=string.trim(),doMatch(string,crit))}}module.exports=new LiteralSearchStrategy},{}],4:[function(require,module){module.exports=function(){function findMatches(store,crit,strategy){for(var data=store.get(),i=0;i{title} ',noResultsText:"No results found",limit:10,fuzzy:!1};self.init=function(_opt){validateOptions(_opt),assignOptions(_opt),isJSON(opt.dataSource)?initWithJSON(opt.dataSource):initWithURL(opt.dataSource)}}var Searcher=require("./Searcher"),Templater=require("./Templater"),Store=require("./Store"),JSONLoader=require("./JSONLoader"),searcher=new Searcher,templater=new Templater,store=new Store,jsonLoader=new JSONLoader;window.SimpleJekyllSearch=new SimpleJekyllSearch}(window,document)},{"./JSONLoader":1,"./Searcher":4,"./Store":5,"./Templater":6}]},{},[7]);
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # dtype_diet
2 | > Attempt to shrink Pandas `dtypes` without losing data so you have more RAM (and maybe more speed)
3 |
4 |
5 | This file will become your README and also the index of your documentation.
6 |
7 | ## Install
8 |
9 | `pip install dtype_diet`
10 |
11 | # Documentation
12 | https://noklam.github.io/dtype_diet/
13 |
14 | ## How to use
15 |
16 | > This is a fork of https://github.com/ianozsvald/dtype_diet to continue supoprt and develop the library with approval from the original author @ianozsvald.
17 |
18 | This tool checks each column to see if larger dtypes (e.g. 8 byte `float64` and `int64`) could be shrunk to smaller `dtypes` without causing any data loss.
19 | Dropping an 8 byte type to a 4 (or 2 or 1 byte) type will keep halving the RAM requirement for that column. Categoricals are proposed for `object` columns which can bring significant speed and RAM benefits.
20 |
21 |
22 | Here's an minimal example with 3 lines of code running on a Kaggle dataset showing a reduction of 957 -> 85MB, you can find the notebook in the [repository](https://github.com/noklam/dtype_diet/01_example.ipynb):
23 |
24 | ```python
25 | #slow
26 | # sell_prices.csv.zip
27 | # Source data: https://www.kaggle.com/c/m5-forecasting-uncertainty/
28 | import pandas as pd
29 | from dtype_diet import report_on_dataframe, optimize_dtypes
30 | df = pd.read_csv('data/sell_prices.csv')
31 | proposed_df = report_on_dataframe(df, unit="MB")
32 | new_df = optimize_dtypes(df, proposed_df)
33 | print(f'Original df memory: {df.memory_usage(deep=True).sum()/1024/1024} MB')
34 | print(f'Propsed df memory: {new_df.memory_usage(deep=True).sum()/1024/1024} MB')
35 | ```
36 |
37 | Original df memory: 957.5197134017944 MB
38 | Propsed df memory: 85.09655094146729 MB
39 |
40 |
41 | ```python
42 | #slow
43 | proposed_df
44 | ```
45 |
46 |
47 |
48 |
49 |
50 |
63 |
64 |
65 |
66 |
67 | Current dtype
68 | Proposed dtype
69 | Current Memory (MB)
70 | Proposed Memory (MB)
71 | Ram Usage Improvement (MB)
72 | Ram Usage Improvement (%)
73 |
74 |
75 | Column
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 | store_id
87 | object
88 | category
89 | 203763.920410
90 | 3340.907715
91 | 200423.012695
92 | 98.360403
93 |
94 |
95 | item_id
96 | object
97 | category
98 | 233039.977539
99 | 6824.677734
100 | 226215.299805
101 | 97.071456
102 |
103 |
104 | wm_yr_wk
105 | int64
106 | int16
107 | 26723.191406
108 | 6680.844727
109 | 20042.346680
110 | 74.999825
111 |
112 |
113 | sell_price
114 | float64
115 | None
116 | 26723.191406
117 | NaN
118 | NaN
119 | NaN
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 | Recommendations:
128 |
129 | * Run `report_on_dataframe(your_df)` to get recommendations
130 | * Run `optimize_dtypes(df, proposed_df)` to convert to recommeded dtypes.
131 | * Consider if Categoricals will save you RAM (see Caveats below)
132 | * Consider if f32 or f16 will be useful (see Caveats - f32 is _probably_ a reasonable choice unless you have huge ranges of floats)
133 | * Consider if int32, int16, int8 will be useful (see Caveats - overflow may be an issue)
134 | * Look at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.convert_dtypes.html which recommends Pandas nullable dtype alternatives (e.g. to avoid promoting an int64 with NaN items to float64, instead you get Int64 with NaNs and no data loss)
135 | * Look at Extension arrays like https://github.com/JDASoftwareGroup/rle-array (thanks @repererum [for the tweet](https://twitter.com/crepererum/status/1267441357339201536))
136 |
137 | Look at `report_on_dataframe(your_df)` to get a printed report - no changes are made to your dataframe.
138 |
139 | ## Caveats
140 |
141 | * reduced numeric ranges might lead to overflow (TODO document)
142 | * category dtype can have unexpected effects e.g. need for observed=True in groupby (TODO document)
143 | * f16 is likely to be simulated on modern hardware so calculations will be 2-3* slower than on f32 or f64
144 | * we could do with a link that explains binary representation of float & int for those wanting to learn more
145 |
146 | ## Development
147 |
148 |
149 | ### Contributors
150 |
151 | * Antony Milbourne https://github.com/amilbourne
152 | * Mani https://github.com/neomatrix369
153 |
154 | ### Local Setup
155 |
156 | ```
157 | $ conda create -n dtype_diet python=3.8 pandas jupyter pyarrow pytest
158 | $ conda activate dtype_diet
159 | ```
160 |
161 | ## Release
162 | ```
163 | make release
164 | ```
165 | # Contributing
166 | The repository is developed with `nbdev`, a system for developing library with notebook.
167 |
168 | Make sure you run this if you want to contribute to the library. For details, please refer to nbdev documentation (https://github.com/fastai/nbdev)
169 | ```
170 | nbdev_install_git_hooks
171 | ```
172 |
173 | Some other useful commands
174 | ```
175 | nbdev_build_docs
176 | nbdev_build_lib
177 | nbdev_test_nbs
178 | ```
179 |
--------------------------------------------------------------------------------
/dtype_diet/core.py:
--------------------------------------------------------------------------------
1 | # AUTOGENERATED! DO NOT EDIT! File to edit: 00_core.ipynb (unless otherwise specified).
2 |
3 | __all__ = ['count_errors', 'map_dtypes_to_choices', 'AsType', 'Row', 'get_smallest_valid_conversion', 'get_improvement',
4 | 'report_on_dataframe', 'optimize_dtypes']
5 |
6 | # Cell
7 | """Propose RAM-saving changes in a DataFrame"""
8 |
9 | import pandas as pd
10 | import numpy as np
11 | from collections import namedtuple
12 |
13 |
14 | # TODO
15 | # more tests
16 | # test float64->float32->float16
17 | # consider uint64/32/16/8
18 | # does the "object" check work if col has non-str items?
19 | # enable approx-equal with np.close (note for big nbrs, a big delta is "acceptable" with this)
20 |
21 | # convert_dtypes converts e.g. int64 to Int64 (nullable) regardless of nulls, also obj->string
22 | # so it doesn't save RAM but it does suggest new safer datatypes
23 | # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.convert_dtypes.html
24 |
25 | # For a dtype count the nbr of conversions that aren't equal, the RAM cost
26 | # of the conversion and the column name
27 | AsType = namedtuple("AsType", ["dtype", "nbr_different", "nbytes", "col"])
28 | _fields = (
29 | "column",
30 | "current_dtype",
31 | "proposed_dtype",
32 | "current_memory",
33 | "proposed_memory",
34 | "ram_usage_improvement",
35 | )
36 | Row = namedtuple("Row", _fields, defaults=(None,) * len(_fields))
37 |
38 |
39 | def count_errors(ser: pd.Series, new_dtype):
40 | """After converting ser to new dtype, count whether items have isclose()"""
41 | tmp_ser = ser.astype(new_dtype)
42 | # metric will be a list of Trues if the change has equivalent value, False otherwise
43 | # checks for approx equal which may not be what we want
44 | # metric = np.isclose(ser, tmp_ser)
45 |
46 | ndiff = len(ser.compare(tmp_ser)) # pandas >= 1.1.0
47 | nbytes = tmp_ser.memory_usage(deep=True)
48 | as_type = AsType(new_dtype, ndiff, nbytes, ser.name)
49 | return as_type
50 |
51 |
52 | def map_dtypes_to_choices(ser: pd.Series, optimize: str):
53 | if optimize == "memory":
54 | new_dtypes = {
55 | "int64": ["int32", "int16", "int8"],
56 | "float64": ["float32", "float16"],
57 | "object": ["category"],
58 | }
59 | elif optimize == "computation":
60 | new_dtypes = {
61 | "int64": ["int32", "int16", "int8"],
62 | "float64": ["float32", "float16"],
63 | "object": ["category"],
64 | }
65 |
66 | return new_dtypes.get(ser.dtype.name)
67 |
68 | # Cell
69 | def get_smallest_valid_conversion(ser: pd.Series, optimize: str):
70 | new_dtypes = map_dtypes_to_choices(ser, optimize)
71 | if new_dtypes:
72 | for new_dtype in reversed(new_dtypes):
73 | as_type = count_errors(ser, new_dtype)
74 | if as_type.nbr_different == 0:
75 | return as_type
76 | return None
77 |
78 |
79 | def get_improvement(as_type: AsType, current_nbytes: int) -> pd.DataFrame:
80 | report = (None, None, None)
81 | ram_usage_improvement = current_nbytes - as_type.nbytes
82 | if ram_usage_improvement > 0:
83 | report = (
84 | as_type.nbytes,
85 | as_type.dtype,
86 | ram_usage_improvement,
87 | )
88 | return report
89 |
90 | # Cell
91 | def report_on_dataframe(
92 | df: pd.DataFrame, unit: str = "MB", optimize: str = "memory"
93 | ) -> pd.DataFrame:
94 |
95 | """[Report on columns that might be converted]
96 | Args:
97 | df ([type]): [description]
98 | unit (str, optional): [byte, MB, GB]. Defaults to "MB".
99 | optimize (str, optional): [memory, computation]. Defaults to memory.
100 | [memory]: The lowest memory dtype for float is fp16.
101 | [computation]: The lowest memory dtype for float is fp32.
102 | """
103 |
104 | unit_map = {"KB": 1024 ** 1, "MB": 1024 * 2, "GB": 1024 ** 3, "byte": 1}
105 | divide_by = unit_map[unit]
106 | optimize_dtypes = []
107 |
108 | for col in df.columns:
109 | as_type = get_smallest_valid_conversion(df[col], optimize)
110 | nbytes = df[col].memory_usage(deep=True)
111 | proposed_memory, proposed_dtype, ram_usage_improvement = None, None, None
112 | if as_type:
113 | (
114 | proposed_bytes,
115 | proposed_dtype,
116 | ram_usage_improvement,
117 | ) = get_improvement(as_type, nbytes)
118 | # If improvement is found, replace the attributes
119 | proposed_memory = proposed_bytes / divide_by if proposed_bytes else None
120 | ram_usage_improvement = (
121 | ram_usage_improvement / divide_by if ram_usage_improvement else None
122 | )
123 | proposed_dtype = proposed_dtype
124 | row = Row(
125 | column=col,
126 | current_dtype=df[col].dtype,
127 | current_memory=nbytes / divide_by,
128 | proposed_memory=proposed_memory,
129 | proposed_dtype=proposed_dtype,
130 | ram_usage_improvement=ram_usage_improvement,
131 | )
132 | optimize_dtypes.append(row)
133 | columns = [
134 | "Column",
135 | "Current dtype",
136 | "Proposed dtype",
137 | f"Current Memory ({unit})",
138 | f"Proposed Memory ({unit})",
139 | f"Ram Usage Improvement ({unit})",
140 | ]
141 | report_df = pd.DataFrame(optimize_dtypes, columns=columns)
142 | report_df["Ram Usage Improvement (%)"] = (
143 | report_df[f"Ram Usage Improvement ({unit})"]
144 | / report_df[f"Current Memory ({unit})"]
145 | * 100
146 | )
147 | report_df = report_df.set_index("Column")
148 | return report_df
149 |
150 | # Cell
151 | def optimize_dtypes(df: pd.DataFrame, proposed_df: pd.DataFrame) -> pd.DataFrame:
152 | new_df = df.copy()
153 | for col in df.columns:
154 | new_dtype = proposed_df.loc[col, "Proposed dtype"]
155 | if new_dtype:
156 | new_df[col] = new_df[col].astype(new_dtype)
157 | return new_df
--------------------------------------------------------------------------------
/01_example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Minimal Example\n",
8 | "\n",
9 | "> A minimal example to show how to use dtype_type to optimize memory footprint."
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "name": "stdout",
19 | "output_type": "stream",
20 | "text": [
21 | "The autoreload extension is already loaded. To reload it, use:\n",
22 | " %reload_ext autoreload\n"
23 | ]
24 | }
25 | ],
26 | "source": [
27 | "#hide\n",
28 | "%load_ext autoreload\n",
29 | "%autoreload 2\n",
30 | "\n",
31 | "from dtype_diet import report_on_dataframe, optimize_dtypes\n",
32 | "import pandas as pd"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": null,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "name": "stdout",
42 | "output_type": "stream",
43 | "text": [
44 | "\n",
45 | "RangeIndex: 6841121 entries, 0 to 6841120\n",
46 | "Data columns (total 4 columns):\n",
47 | " # Column Dtype \n",
48 | "--- ------ ----- \n",
49 | " 0 store_id object \n",
50 | " 1 item_id object \n",
51 | " 2 wm_yr_wk int64 \n",
52 | " 3 sell_price float64\n",
53 | "dtypes: float64(1), int64(1), object(2)\n",
54 | "memory usage: 957.5 MB\n"
55 | ]
56 | }
57 | ],
58 | "source": [
59 | "#slow\n",
60 | "# sell_prices.csv.zip \n",
61 | "# Source data: https://www.kaggle.com/c/m5-forecasting-uncertainty/\n",
62 | "df = pd.read_csv('data/sell_prices.csv')\n",
63 | "df.info(memory_usage='deep')"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "#slow\n",
73 | "proposed_df = report_on_dataframe(df, unit=\"MB\")"
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": null,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "data": {
83 | "text/html": [
84 | "\n",
85 | "\n",
98 | "
\n",
99 | " \n",
100 | " \n",
101 | " \n",
102 | " Current dtype \n",
103 | " Proposed dtype \n",
104 | " Current Memory (MB) \n",
105 | " Proposed Memory (MB) \n",
106 | " Ram Usage Improvement (MB) \n",
107 | " Ram Usage Improvement (%) \n",
108 | " \n",
109 | " \n",
110 | " Column \n",
111 | " \n",
112 | " \n",
113 | " \n",
114 | " \n",
115 | " \n",
116 | " \n",
117 | " \n",
118 | " \n",
119 | " \n",
120 | " \n",
121 | " store_id \n",
122 | " object \n",
123 | " category \n",
124 | " 203763.920410 \n",
125 | " 3340.907715 \n",
126 | " 200423.012695 \n",
127 | " 98.360403 \n",
128 | " \n",
129 | " \n",
130 | " item_id \n",
131 | " object \n",
132 | " category \n",
133 | " 233039.977539 \n",
134 | " 6824.677734 \n",
135 | " 226215.299805 \n",
136 | " 97.071456 \n",
137 | " \n",
138 | " \n",
139 | " wm_yr_wk \n",
140 | " int64 \n",
141 | " int16 \n",
142 | " 26723.191406 \n",
143 | " 6680.844727 \n",
144 | " 20042.346680 \n",
145 | " 74.999825 \n",
146 | " \n",
147 | " \n",
148 | " sell_price \n",
149 | " float64 \n",
150 | " None \n",
151 | " 26723.191406 \n",
152 | " NaN \n",
153 | " NaN \n",
154 | " NaN \n",
155 | " \n",
156 | " \n",
157 | "
\n",
158 | "
"
159 | ],
160 | "text/plain": [
161 | " Current dtype Proposed dtype Current Memory (MB) \\\n",
162 | "Column \n",
163 | "store_id object category 203763.920410 \n",
164 | "item_id object category 233039.977539 \n",
165 | "wm_yr_wk int64 int16 26723.191406 \n",
166 | "sell_price float64 None 26723.191406 \n",
167 | "\n",
168 | " Proposed Memory (MB) Ram Usage Improvement (MB) \\\n",
169 | "Column \n",
170 | "store_id 3340.907715 200423.012695 \n",
171 | "item_id 6824.677734 226215.299805 \n",
172 | "wm_yr_wk 6680.844727 20042.346680 \n",
173 | "sell_price NaN NaN \n",
174 | "\n",
175 | " Ram Usage Improvement (%) \n",
176 | "Column \n",
177 | "store_id 98.360403 \n",
178 | "item_id 97.071456 \n",
179 | "wm_yr_wk 74.999825 \n",
180 | "sell_price NaN "
181 | ]
182 | },
183 | "execution_count": null,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "#slow\n",
190 | "proposed_df"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "It shows potential dtypes for conversion, you should review if it will cause overflow issue in the future and modify accordingly if needed. "
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "execution_count": null,
203 | "metadata": {},
204 | "outputs": [],
205 | "source": [
206 | "#slow\n",
207 | "new_df = optimize_dtypes(df, proposed_df)"
208 | ]
209 | },
210 | {
211 | "cell_type": "markdown",
212 | "metadata": {},
213 | "source": [
214 | "optimize_dtypes take your `df` and the `proposed_df` as an argument to convert the dataframe to the proposed dtypes."
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [
222 | {
223 | "name": "stdout",
224 | "output_type": "stream",
225 | "text": [
226 | "Original df memory: 957.5197134017944 MB\n",
227 | "Propsed df memory: 85.09655094146729 MB\n"
228 | ]
229 | }
230 | ],
231 | "source": [
232 | "#slow\n",
233 | "print(f'Original df memory: {df.memory_usage(deep=True).sum()/1024/1024} MB')\n",
234 | "print(f'Propsed df memory: {new_df.memory_usage(deep=True).sum()/1024/1024} MB')"
235 | ]
236 | }
237 | ],
238 | "metadata": {
239 | "kernelspec": {
240 | "display_name": "Python 3",
241 | "language": "python",
242 | "name": "python3"
243 | }
244 | },
245 | "nbformat": 4,
246 | "nbformat_minor": 2
247 | }
248 |
--------------------------------------------------------------------------------
/docs/Gemfile.lock:
--------------------------------------------------------------------------------
1 | GEM
2 | remote: https://rubygems.org/
3 | specs:
4 | activesupport (6.0.3.2)
5 | concurrent-ruby (~> 1.0, >= 1.0.2)
6 | i18n (>= 0.7, < 2)
7 | minitest (~> 5.1)
8 | tzinfo (~> 1.1)
9 | zeitwerk (~> 2.2, >= 2.2.2)
10 | addressable (2.7.0)
11 | public_suffix (>= 2.0.2, < 5.0)
12 | coffee-script (2.4.1)
13 | coffee-script-source
14 | execjs
15 | coffee-script-source (1.11.1)
16 | colorator (1.1.0)
17 | commonmarker (0.17.13)
18 | ruby-enum (~> 0.5)
19 | concurrent-ruby (1.1.7)
20 | dnsruby (1.61.4)
21 | simpleidn (~> 0.1)
22 | em-websocket (0.5.1)
23 | eventmachine (>= 0.12.9)
24 | http_parser.rb (~> 0.6.0)
25 | ethon (0.12.0)
26 | ffi (>= 1.3.0)
27 | eventmachine (1.2.7)
28 | execjs (2.7.0)
29 | faraday (1.0.1)
30 | multipart-post (>= 1.2, < 3)
31 | ffi (1.13.1)
32 | forwardable-extended (2.6.0)
33 | gemoji (3.0.1)
34 | github-pages (207)
35 | github-pages-health-check (= 1.16.1)
36 | jekyll (= 3.9.0)
37 | jekyll-avatar (= 0.7.0)
38 | jekyll-coffeescript (= 1.1.1)
39 | jekyll-commonmark-ghpages (= 0.1.6)
40 | jekyll-default-layout (= 0.1.4)
41 | jekyll-feed (= 0.13.0)
42 | jekyll-gist (= 1.5.0)
43 | jekyll-github-metadata (= 2.13.0)
44 | jekyll-mentions (= 1.5.1)
45 | jekyll-optional-front-matter (= 0.3.2)
46 | jekyll-paginate (= 1.1.0)
47 | jekyll-readme-index (= 0.3.0)
48 | jekyll-redirect-from (= 0.15.0)
49 | jekyll-relative-links (= 0.6.1)
50 | jekyll-remote-theme (= 0.4.1)
51 | jekyll-sass-converter (= 1.5.2)
52 | jekyll-seo-tag (= 2.6.1)
53 | jekyll-sitemap (= 1.4.0)
54 | jekyll-swiss (= 1.0.0)
55 | jekyll-theme-architect (= 0.1.1)
56 | jekyll-theme-cayman (= 0.1.1)
57 | jekyll-theme-dinky (= 0.1.1)
58 | jekyll-theme-hacker (= 0.1.1)
59 | jekyll-theme-leap-day (= 0.1.1)
60 | jekyll-theme-merlot (= 0.1.1)
61 | jekyll-theme-midnight (= 0.1.1)
62 | jekyll-theme-minimal (= 0.1.1)
63 | jekyll-theme-modernist (= 0.1.1)
64 | jekyll-theme-primer (= 0.5.4)
65 | jekyll-theme-slate (= 0.1.1)
66 | jekyll-theme-tactile (= 0.1.1)
67 | jekyll-theme-time-machine (= 0.1.1)
68 | jekyll-titles-from-headings (= 0.5.3)
69 | jemoji (= 0.11.1)
70 | kramdown (= 2.3.0)
71 | kramdown-parser-gfm (= 1.1.0)
72 | liquid (= 4.0.3)
73 | mercenary (~> 0.3)
74 | minima (= 2.5.1)
75 | nokogiri (>= 1.10.4, < 2.0)
76 | rouge (= 3.19.0)
77 | terminal-table (~> 1.4)
78 | github-pages-health-check (1.16.1)
79 | addressable (~> 2.3)
80 | dnsruby (~> 1.60)
81 | octokit (~> 4.0)
82 | public_suffix (~> 3.0)
83 | typhoeus (~> 1.3)
84 | html-pipeline (2.14.0)
85 | activesupport (>= 2)
86 | nokogiri (>= 1.4)
87 | http_parser.rb (0.6.0)
88 | i18n (0.9.5)
89 | concurrent-ruby (~> 1.0)
90 | jekyll (3.9.0)
91 | addressable (~> 2.4)
92 | colorator (~> 1.0)
93 | em-websocket (~> 0.5)
94 | i18n (~> 0.7)
95 | jekyll-sass-converter (~> 1.0)
96 | jekyll-watch (~> 2.0)
97 | kramdown (>= 1.17, < 3)
98 | liquid (~> 4.0)
99 | mercenary (~> 0.3.3)
100 | pathutil (~> 0.9)
101 | rouge (>= 1.7, < 4)
102 | safe_yaml (~> 1.0)
103 | jekyll-avatar (0.7.0)
104 | jekyll (>= 3.0, < 5.0)
105 | jekyll-coffeescript (1.1.1)
106 | coffee-script (~> 2.2)
107 | coffee-script-source (~> 1.11.1)
108 | jekyll-commonmark (1.3.1)
109 | commonmarker (~> 0.14)
110 | jekyll (>= 3.7, < 5.0)
111 | jekyll-commonmark-ghpages (0.1.6)
112 | commonmarker (~> 0.17.6)
113 | jekyll-commonmark (~> 1.2)
114 | rouge (>= 2.0, < 4.0)
115 | jekyll-default-layout (0.1.4)
116 | jekyll (~> 3.0)
117 | jekyll-feed (0.13.0)
118 | jekyll (>= 3.7, < 5.0)
119 | jekyll-gist (1.5.0)
120 | octokit (~> 4.2)
121 | jekyll-github-metadata (2.13.0)
122 | jekyll (>= 3.4, < 5.0)
123 | octokit (~> 4.0, != 4.4.0)
124 | jekyll-mentions (1.5.1)
125 | html-pipeline (~> 2.3)
126 | jekyll (>= 3.7, < 5.0)
127 | jekyll-optional-front-matter (0.3.2)
128 | jekyll (>= 3.0, < 5.0)
129 | jekyll-paginate (1.1.0)
130 | jekyll-readme-index (0.3.0)
131 | jekyll (>= 3.0, < 5.0)
132 | jekyll-redirect-from (0.15.0)
133 | jekyll (>= 3.3, < 5.0)
134 | jekyll-relative-links (0.6.1)
135 | jekyll (>= 3.3, < 5.0)
136 | jekyll-remote-theme (0.4.1)
137 | addressable (~> 2.0)
138 | jekyll (>= 3.5, < 5.0)
139 | rubyzip (>= 1.3.0)
140 | jekyll-sass-converter (1.5.2)
141 | sass (~> 3.4)
142 | jekyll-seo-tag (2.6.1)
143 | jekyll (>= 3.3, < 5.0)
144 | jekyll-sitemap (1.4.0)
145 | jekyll (>= 3.7, < 5.0)
146 | jekyll-swiss (1.0.0)
147 | jekyll-theme-architect (0.1.1)
148 | jekyll (~> 3.5)
149 | jekyll-seo-tag (~> 2.0)
150 | jekyll-theme-cayman (0.1.1)
151 | jekyll (~> 3.5)
152 | jekyll-seo-tag (~> 2.0)
153 | jekyll-theme-dinky (0.1.1)
154 | jekyll (~> 3.5)
155 | jekyll-seo-tag (~> 2.0)
156 | jekyll-theme-hacker (0.1.1)
157 | jekyll (~> 3.5)
158 | jekyll-seo-tag (~> 2.0)
159 | jekyll-theme-leap-day (0.1.1)
160 | jekyll (~> 3.5)
161 | jekyll-seo-tag (~> 2.0)
162 | jekyll-theme-merlot (0.1.1)
163 | jekyll (~> 3.5)
164 | jekyll-seo-tag (~> 2.0)
165 | jekyll-theme-midnight (0.1.1)
166 | jekyll (~> 3.5)
167 | jekyll-seo-tag (~> 2.0)
168 | jekyll-theme-minimal (0.1.1)
169 | jekyll (~> 3.5)
170 | jekyll-seo-tag (~> 2.0)
171 | jekyll-theme-modernist (0.1.1)
172 | jekyll (~> 3.5)
173 | jekyll-seo-tag (~> 2.0)
174 | jekyll-theme-primer (0.5.4)
175 | jekyll (> 3.5, < 5.0)
176 | jekyll-github-metadata (~> 2.9)
177 | jekyll-seo-tag (~> 2.0)
178 | jekyll-theme-slate (0.1.1)
179 | jekyll (~> 3.5)
180 | jekyll-seo-tag (~> 2.0)
181 | jekyll-theme-tactile (0.1.1)
182 | jekyll (~> 3.5)
183 | jekyll-seo-tag (~> 2.0)
184 | jekyll-theme-time-machine (0.1.1)
185 | jekyll (~> 3.5)
186 | jekyll-seo-tag (~> 2.0)
187 | jekyll-titles-from-headings (0.5.3)
188 | jekyll (>= 3.3, < 5.0)
189 | jekyll-watch (2.2.1)
190 | listen (~> 3.0)
191 | jemoji (0.11.1)
192 | gemoji (~> 3.0)
193 | html-pipeline (~> 2.2)
194 | jekyll (>= 3.0, < 5.0)
195 | kramdown (2.3.0)
196 | rexml
197 | kramdown-parser-gfm (1.1.0)
198 | kramdown (~> 2.0)
199 | liquid (4.0.3)
200 | listen (3.2.1)
201 | rb-fsevent (~> 0.10, >= 0.10.3)
202 | rb-inotify (~> 0.9, >= 0.9.10)
203 | mercenary (0.3.6)
204 | mini_portile2 (2.4.0)
205 | minima (2.5.1)
206 | jekyll (>= 3.5, < 5.0)
207 | jekyll-feed (~> 0.9)
208 | jekyll-seo-tag (~> 2.1)
209 | minitest (5.14.1)
210 | multipart-post (2.1.1)
211 | nokogiri (1.10.8)
212 | mini_portile2 (~> 2.4.0)
213 | octokit (4.18.0)
214 | faraday (>= 0.9)
215 | sawyer (~> 0.8.0, >= 0.5.3)
216 | pathutil (0.16.2)
217 | forwardable-extended (~> 2.6)
218 | public_suffix (3.1.1)
219 | rb-fsevent (0.10.4)
220 | rb-inotify (0.10.1)
221 | ffi (~> 1.0)
222 | rexml (3.2.4)
223 | rouge (3.19.0)
224 | ruby-enum (0.8.0)
225 | i18n
226 | rubyzip (2.3.0)
227 | safe_yaml (1.0.5)
228 | sass (3.7.4)
229 | sass-listen (~> 4.0.0)
230 | sass-listen (4.0.0)
231 | rb-fsevent (~> 0.9, >= 0.9.4)
232 | rb-inotify (~> 0.9, >= 0.9.7)
233 | sawyer (0.8.2)
234 | addressable (>= 2.3.5)
235 | faraday (> 0.8, < 2.0)
236 | simpleidn (0.1.1)
237 | unf (~> 0.1.4)
238 | terminal-table (1.8.0)
239 | unicode-display_width (~> 1.1, >= 1.1.1)
240 | thread_safe (0.3.6)
241 | typhoeus (1.4.0)
242 | ethon (>= 0.9.0)
243 | tzinfo (1.2.7)
244 | thread_safe (~> 0.1)
245 | unf (0.1.4)
246 | unf_ext
247 | unf_ext (0.0.7.7)
248 | unicode-display_width (1.7.0)
249 | zeitwerk (2.4.0)
250 |
251 | PLATFORMS
252 | ruby
253 | x86_64-linux
254 |
255 | DEPENDENCIES
256 | github-pages
257 | jekyll (>= 3.7)
258 | kramdown (>= 2.3.0)
259 | nokogiri (< 1.10.9)
260 |
261 | BUNDLED WITH
262 | 2.2.0
263 |
--------------------------------------------------------------------------------
/docs/example.html:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | title: Minimal Example
4 |
5 |
6 | keywords: fastai
7 | sidebar: home_sidebar
8 |
9 | summary: "A minimal example to show how to use dtype_type to optimize memory footprint."
10 | description: "A minimal example to show how to use dtype_type to optimize memory footprint."
11 | nb_path: "01_example.ipynb"
12 | ---
13 |
22 |
23 |
24 |
25 | {% raw %}
26 |
27 |
28 |
29 |
30 | {% endraw %}
31 |
32 | {% raw %}
33 |
34 |
35 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
<class 'pandas.core.frame.DataFrame'>
56 | RangeIndex: 6841121 entries, 0 to 6841120
57 | Data columns (total 4 columns):
58 | # Column Dtype
59 | --- ------ -----
60 | 0 store_id object
61 | 1 item_id object
62 | 2 wm_yr_wk int64
63 | 3 sell_price float64
64 | dtypes: float64(1), int64(1), object(2)
65 | memory usage: 957.5 MB
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 | {% endraw %}
75 |
76 | {% raw %}
77 |
78 |
91 | {% endraw %}
92 |
93 | {% raw %}
94 |
95 |
96 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
128 |
129 |
130 |
131 |
132 | Current dtype
133 | Proposed dtype
134 | Current Memory (MB)
135 | Proposed Memory (MB)
136 | Ram Usage Improvement (MB)
137 | Ram Usage Improvement (%)
138 |
139 |
140 | Column
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 | store_id
152 | object
153 | category
154 | 203763.920410
155 | 3340.907715
156 | 200423.012695
157 | 98.360403
158 |
159 |
160 | item_id
161 | object
162 | category
163 | 233039.977539
164 | 6824.677734
165 | 226215.299805
166 | 97.071456
167 |
168 |
169 | wm_yr_wk
170 | int64
171 | int16
172 | 26723.191406
173 | 6680.844727
174 | 20042.346680
175 | 74.999825
176 |
177 |
178 | sell_price
179 | float64
180 | None
181 | 26723.191406
182 | NaN
183 | NaN
184 | NaN
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 | {% endraw %}
198 |
199 |
200 |
201 |
It shows potential dtypes for conversion, you should review if it will cause overflow issue in the future and modify accordingly if needed.
202 |
203 |
204 |
205 |
206 | {% raw %}
207 |
208 |
221 | {% endraw %}
222 |
223 |
224 |
225 |
optimize_dtypes take your df and the proposed_df as an argument to convert the dataframe to the proposed dtypes.
226 |
227 |
228 |
229 |
230 | {% raw %}
231 |
232 |
233 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
Original df memory: 957.5197134017944 MB
252 | Propsed df memory: 85.09655094146729 MB
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 | {% endraw %}
262 |
263 |
264 |
265 |
266 |
--------------------------------------------------------------------------------
/index.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#hide\n",
10 | "from dtype_diet.core import *"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {},
16 | "source": [
17 | "# dtype_diet\n",
18 | "\n",
19 | "> Attempt to shrink Pandas `dtypes` without losing data so you have more RAM (and maybe more speed)"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "This file will become your README and also the index of your documentation."
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "## Install"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {},
39 | "source": [
40 | "`pip install dtype_diet`"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "# Documentation\n",
48 | "https://noklam.github.io/dtype_diet/"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "## How to use"
56 | ]
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {},
61 | "source": [
62 | "> This is a fork of https://github.com/ianozsvald/dtype_diet to continue supoprt and develop the library with approval from the original author @ianozsvald.\n",
63 | "\n",
64 | "\n",
65 | "This tool checks each column to see if larger dtypes (e.g. 8 byte `float64` and `int64`) could be shrunk to smaller `dtypes` without causing any data loss. \n",
66 | "Dropping an 8 byte type to a 4 (or 2 or 1 byte) type will keep halving the RAM requirement for that column. Categoricals are proposed for `object` columns which can bring significant speed and RAM benefits.\n",
67 | "\n",
68 | "\n",
69 | "Here's an minimal example with 3 lines of code running on a Kaggle dataset showing a reduction of 957 -> 85MB, you can find the notebook in the [repository](https://github.com/noklam/dtype_diet/01_example.ipynb):"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "metadata": {},
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "Original df memory: 957.5197134017944 MB\n",
82 | "Propsed df memory: 85.09655094146729 MB\n"
83 | ]
84 | }
85 | ],
86 | "source": [
87 | "#slow\n",
88 | "# sell_prices.csv.zip \n",
89 | "# Source data: https://www.kaggle.com/c/m5-forecasting-uncertainty/\n",
90 | "import pandas as pd\n",
91 | "from dtype_diet import report_on_dataframe, optimize_dtypes\n",
92 | "df = pd.read_csv('data/sell_prices.csv')\n",
93 | "proposed_df = report_on_dataframe(df, unit=\"MB\")\n",
94 | "new_df = optimize_dtypes(df, proposed_df)\n",
95 | "print(f'Original df memory: {df.memory_usage(deep=True).sum()/1024/1024} MB')\n",
96 | "print(f'Propsed df memory: {new_df.memory_usage(deep=True).sum()/1024/1024} MB')"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": null,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "data": {
106 | "text/html": [
107 | "\n",
108 | "\n",
121 | "
\n",
122 | " \n",
123 | " \n",
124 | " \n",
125 | " Current dtype \n",
126 | " Proposed dtype \n",
127 | " Current Memory (MB) \n",
128 | " Proposed Memory (MB) \n",
129 | " Ram Usage Improvement (MB) \n",
130 | " Ram Usage Improvement (%) \n",
131 | " \n",
132 | " \n",
133 | " Column \n",
134 | " \n",
135 | " \n",
136 | " \n",
137 | " \n",
138 | " \n",
139 | " \n",
140 | " \n",
141 | " \n",
142 | " \n",
143 | " \n",
144 | " store_id \n",
145 | " object \n",
146 | " category \n",
147 | " 203763.920410 \n",
148 | " 3340.907715 \n",
149 | " 200423.012695 \n",
150 | " 98.360403 \n",
151 | " \n",
152 | " \n",
153 | " item_id \n",
154 | " object \n",
155 | " category \n",
156 | " 233039.977539 \n",
157 | " 6824.677734 \n",
158 | " 226215.299805 \n",
159 | " 97.071456 \n",
160 | " \n",
161 | " \n",
162 | " wm_yr_wk \n",
163 | " int64 \n",
164 | " int16 \n",
165 | " 26723.191406 \n",
166 | " 6680.844727 \n",
167 | " 20042.346680 \n",
168 | " 74.999825 \n",
169 | " \n",
170 | " \n",
171 | " sell_price \n",
172 | " float64 \n",
173 | " None \n",
174 | " 26723.191406 \n",
175 | " NaN \n",
176 | " NaN \n",
177 | " NaN \n",
178 | " \n",
179 | " \n",
180 | "
\n",
181 | "
"
182 | ],
183 | "text/plain": [
184 | " Current dtype Proposed dtype Current Memory (MB) \\\n",
185 | "Column \n",
186 | "store_id object category 203763.920410 \n",
187 | "item_id object category 233039.977539 \n",
188 | "wm_yr_wk int64 int16 26723.191406 \n",
189 | "sell_price float64 None 26723.191406 \n",
190 | "\n",
191 | " Proposed Memory (MB) Ram Usage Improvement (MB) \\\n",
192 | "Column \n",
193 | "store_id 3340.907715 200423.012695 \n",
194 | "item_id 6824.677734 226215.299805 \n",
195 | "wm_yr_wk 6680.844727 20042.346680 \n",
196 | "sell_price NaN NaN \n",
197 | "\n",
198 | " Ram Usage Improvement (%) \n",
199 | "Column \n",
200 | "store_id 98.360403 \n",
201 | "item_id 97.071456 \n",
202 | "wm_yr_wk 74.999825 \n",
203 | "sell_price NaN "
204 | ]
205 | },
206 | "execution_count": null,
207 | "metadata": {},
208 | "output_type": "execute_result"
209 | }
210 | ],
211 | "source": [
212 | "#slow\n",
213 | "proposed_df"
214 | ]
215 | },
216 | {
217 | "cell_type": "markdown",
218 | "metadata": {},
219 | "source": [
220 | "Recommendations:\n",
221 | "\n",
222 | "* Run `report_on_dataframe(your_df)` to get recommendations\n",
223 | "* Run `optimize_dtypes(df, proposed_df)` to convert to recommeded dtypes.\n",
224 | "* Consider if Categoricals will save you RAM (see Caveats below)\n",
225 | "* Consider if f32 or f16 will be useful (see Caveats - f32 is _probably_ a reasonable choice unless you have huge ranges of floats)\n",
226 | "* Consider if int32, int16, int8 will be useful (see Caveats - overflow may be an issue)\n",
227 | "* Look at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.convert_dtypes.html which recommends Pandas nullable dtype alternatives (e.g. to avoid promoting an int64 with NaN items to float64, instead you get Int64 with NaNs and no data loss)\n",
228 | "* Look at Extension arrays like https://github.com/JDASoftwareGroup/rle-array (thanks @repererum [for the tweet](https://twitter.com/crepererum/status/1267441357339201536))\n",
229 | "\n",
230 | "Look at `report_on_dataframe(your_df)` to get a printed report - no changes are made to your dataframe."
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "## Caveats\n",
238 | "\n",
239 | "* reduced numeric ranges might lead to overflow (TODO document)\n",
240 | "* category dtype can have unexpected effects e.g. need for observed=True in groupby (TODO document)\n",
241 | "* f16 is likely to be simulated on modern hardware so calculations will be 2-3* slower than on f32 or f64\n",
242 | "* we could do with a link that explains binary representation of float & int for those wanting to learn more\n",
243 | "\n",
244 | "## Development \n",
245 | "\n",
246 | "\n",
247 | "### Contributors\n",
248 | "\n",
249 | "* Antony Milbourne https://github.com/amilbourne\n",
250 | "* Mani https://github.com/neomatrix369\n",
251 | "\n",
252 | "### Local Setup\n",
253 | "\n",
254 | "```\n",
255 | "$ conda create -n dtype_diet python=3.8 pandas jupyter pyarrow pytest\n",
256 | "$ conda activate dtype_diet\n",
257 | "```\n",
258 | "\n",
259 | "## Release\n",
260 | "```\n",
261 | "make release\n",
262 | "```\n",
263 | "# Contributing\n",
264 | "The repository is developed with `nbdev`, a system for developing library with notebook.\n",
265 | "\n",
266 | "Make sure you run this if you want to contribute to the library. For details, please refer to nbdev documentation (https://github.com/fastai/nbdev)\n",
267 | "```\n",
268 | "nbdev_install_git_hooks\n",
269 | "```\n",
270 | "\n",
271 | "Some other useful commands\n",
272 | "```\n",
273 | "nbdev_build_docs\n",
274 | "nbdev_build_lib\n",
275 | "nbdev_test_nbs\n",
276 | "```"
277 | ]
278 | }
279 | ],
280 | "metadata": {
281 | "kernelspec": {
282 | "display_name": "Python 3",
283 | "language": "python",
284 | "name": "python3"
285 | }
286 | },
287 | "nbformat": 4,
288 | "nbformat_minor": 2
289 | }
290 |
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | title: dtype_diet
4 |
5 |
6 | keywords: fastai
7 | sidebar: home_sidebar
8 |
9 | summary: "Attempt to shrink Pandas `dtypes` without losing data so you have more RAM (and maybe more speed)"
10 | description: "Attempt to shrink Pandas `dtypes` without losing data so you have more RAM (and maybe more speed)"
11 | nb_path: "index.ipynb"
12 | ---
13 |
22 |
23 |
24 |
25 | {% raw %}
26 |
27 |
28 |
29 |
30 | {% endraw %}
31 |
32 |
33 |
34 |
This file will become your README and also the index of your documentation.
35 |
36 |
37 |
38 |
39 |
45 |
46 |
47 |
pip install dtype_diet
48 |
49 |
50 |
51 |
52 |
59 |
65 |
66 |
67 |
This is a fork of https://github.com/ianozsvald/dtype_diet to continue supoprt and develop the library with approval from the original author @ianozsvald.
68 |
69 |
This tool checks each column to see if larger dtypes (e.g. 8 byte float64 and int64) could be shrunk to smaller dtypes without causing any data loss.
70 | Dropping an 8 byte type to a 4 (or 2 or 1 byte) type will keep halving the RAM requirement for that column. Categoricals are proposed for object columns which can bring significant speed and RAM benefits.
71 |
Here's an minimal example with 3 lines of code running on a Kaggle dataset showing a reduction of 957 -> 85MB, you can find the notebook in the repository :
72 |
73 |
74 |
75 |
76 | {% raw %}
77 |
78 |
79 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
Original df memory: 957.5197134017944 MB
105 | Propsed df memory: 85.09655094146729 MB
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 | {% endraw %}
115 |
116 | {% raw %}
117 |
118 |
119 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
151 |
152 |
153 |
154 |
155 | Current dtype
156 | Proposed dtype
157 | Current Memory (MB)
158 | Proposed Memory (MB)
159 | Ram Usage Improvement (MB)
160 | Ram Usage Improvement (%)
161 |
162 |
163 | Column
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 | store_id
175 | object
176 | category
177 | 203763.920410
178 | 3340.907715
179 | 200423.012695
180 | 98.360403
181 |
182 |
183 | item_id
184 | object
185 | category
186 | 233039.977539
187 | 6824.677734
188 | 226215.299805
189 | 97.071456
190 |
191 |
192 | wm_yr_wk
193 | int64
194 | int16
195 | 26723.191406
196 | 6680.844727
197 | 20042.346680
198 | 74.999825
199 |
200 |
201 | sell_price
202 | float64
203 | None
204 | 26723.191406
205 | NaN
206 | NaN
207 | NaN
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 | {% endraw %}
221 |
222 |
223 |
224 |
Recommendations:
225 |
226 | Run report_on_dataframe(your_df) to get recommendations
227 | Run optimize_dtypes(df, proposed_df) to convert to recommeded dtypes.
228 | Consider if Categoricals will save you RAM (see Caveats below)
229 | Consider if f32 or f16 will be useful (see Caveats - f32 is probably a reasonable choice unless you have huge ranges of floats)
230 | Consider if int32, int16, int8 will be useful (see Caveats - overflow may be an issue)
231 | Look at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.convert_dtypes.html which recommends Pandas nullable dtype alternatives (e.g. to avoid promoting an int64 with NaN items to float64, instead you get Int64 with NaNs and no data loss)
232 | Look at Extension arrays like https://github.com/JDASoftwareGroup/rle-array (thanks @repererum for the tweet )
233 |
234 |
Look at report_on_dataframe(your_df) to get a printed report - no changes are made to your dataframe.
235 |
236 |
237 |
238 |
239 |
240 |
241 |
Caveats
242 | reduced numeric ranges might lead to overflow (TODO document)
243 | category dtype can have unexpected effects e.g. need for observed=True in groupby (TODO document)
244 | f16 is likely to be simulated on modern hardware so calculations will be 2-3* slower than on f32 or f64
245 | we could do with a link that explains binary representation of float & int for those wanting to learn more
246 |
247 |
Development Contributors
251 |
Local Setup
252 |
$ conda create -n dtype_diet python=3.8 pandas jupyter pyarrow pytest
253 | $ conda activate dtype_diet
254 |
Release
255 |
make release
256 |
Contributing The repository is developed with nbdev, a system for developing library with notebook.
257 |
Make sure you run this if you want to contribute to the library. For details, please refer to nbdev documentation (https://github.com/fastai/nbdev )
258 |
259 |
nbdev_install_git_hooks
260 |
Some other useful commands
261 |
262 |
nbdev_build_docs
263 | nbdev_build_lib
264 | nbdev_test_nbs
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
--------------------------------------------------------------------------------
/docs/core.html:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | title: dtype_diet
4 |
5 |
6 | keywords: fastai
7 | sidebar: home_sidebar
8 |
9 | summary: "Optimize your dataset memory print with minimal dtype."
10 | description: "Optimize your dataset memory print with minimal dtype."
11 | nb_path: "00_core.ipynb"
12 | ---
13 |
22 |
23 |
24 |
25 | {% raw %}
26 |
27 |
28 |
29 |
30 | {% endraw %}
31 |
32 | {% raw %}
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
count_errors(ser :Series, new_dtype )
44 |
45 |
After converting ser to new dtype, count whether items have isclose()
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 | {% endraw %}
56 |
57 | {% raw %}
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
map_dtypes_to_choices(ser :Series, optimize :str)
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 | {% endraw %}
80 |
81 | {% raw %}
82 |
83 |
84 |
85 |
86 | {% endraw %}
87 |
88 | {% raw %}
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
get_smallest_valid_conversion(ser :Series, optimize :str)
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 | {% endraw %}
111 |
112 | {% raw %}
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
get_improvement(as_type :AsType , current_nbytes :int)
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 | {% endraw %}
135 |
136 | {% raw %}
137 |
138 |
139 |
140 |
141 | {% endraw %}
142 |
143 | {% raw %}
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
report_on_dataframe(df :DataFrame, unit :str='MB' , optimize :str='memory' )
155 |
156 |
[Report on columns that might be converted]
157 | Args:
158 | df ([type]): [description]
159 | unit (str, optional): [byte, MB, GB]. Defaults to "MB".
160 | optimize (str, optional): [memory, computation]. Defaults to memory.
161 | [memory]: The lowest memory dtype for float is fp16.
162 | [computation]: The lowest memory dtype for float is fp32.
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 | {% endraw %}
173 |
174 | {% raw %}
175 |
176 |
177 |
178 |
179 | {% endraw %}
180 |
181 | {% raw %}
182 |
183 |
199 | {% endraw %}
200 |
201 | {% raw %}
202 |
203 |
204 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
236 |
237 |
238 |
239 |
240 | Current dtype
241 | Proposed dtype
242 | Current Memory (MB)
243 | Proposed Memory (MB)
244 | Ram Usage Improvement (MB)
245 | Ram Usage Improvement (%)
246 |
247 |
248 | Column
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 | store_id
260 | object
261 | category
262 | 203763.920410
263 | 3340.907715
264 | 200423.012695
265 | 98.360403
266 |
267 |
268 | item_id
269 | object
270 | category
271 | 233039.977539
272 | 6824.677734
273 | 226215.299805
274 | 97.071456
275 |
276 |
277 | wm_yr_wk
278 | int64
279 | int16
280 | 26723.191406
281 | 6680.844727
282 | 20042.346680
283 | 74.999825
284 |
285 |
286 | sell_price
287 | float64
288 | None
289 | 26723.191406
290 | NaN
291 | NaN
292 | NaN
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 | {% endraw %}
306 |
307 |
308 |
309 |
report_on_dataframe shows you the possible dtype conversion and the improvement. Note that the library try to optimize the memory base on current values of the data, you should still be careful about overflow for further transformation.
310 |
311 |
312 |
313 |
314 | {% raw %}
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
optimize_dtypes(df :DataFrame, proposed_df :DataFrame)
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 | {% endraw %}
337 |
338 | {% raw %}
339 |
340 |
341 |
342 |
343 | {% endraw %}
344 |
345 | {% raw %}
346 |
347 |
348 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
Given a dataframe, check for lowest possible conversions:
381 | convert_dtypes does a slightly different job:
382 | a b c d e str_a str_b
383 | 0 0 256 65536 1100 100101 hello 0
384 | 1 0 256 65536 1100 100101 hello 1
385 | 2 0 256 65536 1100 100101 hello 2
386 | 3 0 256 65536 1100 100101 hello 3
387 | 4 0 256 65536 1100 100101 hello 4
388 | .. .. ... ... ... ... ... ...
389 | 95 0 256 65536 1100 100101 hello 95
390 | 96 0 256 65536 1100 100101 hello 96
391 | 97 0 256 65536 1100 100101 hello 97
392 | 98 0 256 65536 1100 100101 hello 98
393 | 99 0 256 65536 1100 100101 hello 99
394 |
395 | [100 rows x 7 columns]
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 | {% endraw %}
405 |
406 |
407 |
408 |
409 |
--------------------------------------------------------------------------------
/00_core.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# default_exp core"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# dtype_diet\n",
17 | "\n",
18 | "> Optimize your dataset memory print with minimal dtype."
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": null,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "#hide\n",
28 | "from nbdev.showdoc import *"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": null,
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "#export\n",
38 | "\"\"\"Propose RAM-saving changes in a DataFrame\"\"\"\n",
39 | "\n",
40 | "import pandas as pd\n",
41 | "import numpy as np\n",
42 | "from collections import namedtuple\n",
43 | "\n",
44 | "\n",
45 | "# TODO\n",
46 | "# more tests\n",
47 | "# test float64->float32->float16\n",
48 | "# consider uint64/32/16/8\n",
49 | "# does the \"object\" check work if col has non-str items?\n",
50 | "# enable approx-equal with np.close (note for big nbrs, a big delta is \"acceptable\" with this)\n",
51 | "\n",
52 | "# convert_dtypes converts e.g. int64 to Int64 (nullable) regardless of nulls, also obj->string\n",
53 | "# so it doesn't save RAM but it does suggest new safer datatypes\n",
54 | "# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.convert_dtypes.html\n",
55 | "\n",
56 | "# For a dtype count the nbr of conversions that aren't equal, the RAM cost\n",
57 | "# of the conversion and the column name\n",
58 | "AsType = namedtuple(\"AsType\", [\"dtype\", \"nbr_different\", \"nbytes\", \"col\"])\n",
59 | "_fields = (\n",
60 | " \"column\",\n",
61 | " \"current_dtype\",\n",
62 | " \"proposed_dtype\",\n",
63 | " \"current_memory\",\n",
64 | " \"proposed_memory\",\n",
65 | " \"ram_usage_improvement\",\n",
66 | ")\n",
67 | "Row = namedtuple(\"Row\", _fields, defaults=(None,) * len(_fields))\n",
68 | "\n",
69 | "\n",
70 | "def count_errors(ser: pd.Series, new_dtype):\n",
71 | " \"\"\"After converting ser to new dtype, count whether items have isclose()\"\"\"\n",
72 | " tmp_ser = ser.astype(new_dtype)\n",
73 | " # metric will be a list of Trues if the change has equivalent value, False otherwise\n",
74 | " # checks for approx equal which may not be what we want\n",
75 | " # metric = np.isclose(ser, tmp_ser)\n",
76 | "\n",
77 | " ndiff = len(ser.compare(tmp_ser)) # pandas >= 1.1.0\n",
78 | " nbytes = tmp_ser.memory_usage(deep=True)\n",
79 | " as_type = AsType(new_dtype, ndiff, nbytes, ser.name)\n",
80 | " return as_type\n",
81 | "\n",
82 | "\n",
83 | "def map_dtypes_to_choices(ser: pd.Series, optimize: str):\n",
84 | " if optimize == \"memory\":\n",
85 | " new_dtypes = {\n",
86 | " \"int64\": [\"int32\", \"int16\", \"int8\"],\n",
87 | " \"float64\": [\"float32\", \"float16\"],\n",
88 | " \"object\": [\"category\"],\n",
89 | " }\n",
90 | " elif optimize == \"computation\":\n",
91 | " new_dtypes = {\n",
92 | " \"int64\": [\"int32\", \"int16\", \"int8\"],\n",
93 | " \"float64\": [\"float32\", \"float16\"],\n",
94 | " \"object\": [\"category\"],\n",
95 | " }\n",
96 | "\n",
97 | " return new_dtypes.get(ser.dtype.name)"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "#export\n",
107 | "def get_smallest_valid_conversion(ser: pd.Series, optimize: str):\n",
108 | " new_dtypes = map_dtypes_to_choices(ser, optimize)\n",
109 | " if new_dtypes:\n",
110 | " for new_dtype in reversed(new_dtypes):\n",
111 | " as_type = count_errors(ser, new_dtype)\n",
112 | " if as_type.nbr_different == 0:\n",
113 | " return as_type\n",
114 | " return None\n",
115 | "\n",
116 | "\n",
117 | "def get_improvement(as_type: AsType, current_nbytes: int) -> pd.DataFrame:\n",
118 | " report = (None, None, None)\n",
119 | " ram_usage_improvement = current_nbytes - as_type.nbytes\n",
120 | " if ram_usage_improvement > 0:\n",
121 | " report = (\n",
122 | " as_type.nbytes,\n",
123 | " as_type.dtype,\n",
124 | " ram_usage_improvement,\n",
125 | " )\n",
126 | " return report"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": null,
132 | "metadata": {},
133 | "outputs": [],
134 | "source": [
135 | "#export\n",
136 | "def report_on_dataframe(\n",
137 | " df: pd.DataFrame, unit: str = \"MB\", optimize: str = \"memory\"\n",
138 | ") -> pd.DataFrame:\n",
139 | "\n",
140 | " \"\"\"[Report on columns that might be converted]\n",
141 | " Args:\n",
142 | " df ([type]): [description]\n",
143 | " unit (str, optional): [byte, MB, GB]. Defaults to \"MB\".\n",
144 | " optimize (str, optional): [memory, computation]. Defaults to memory.\n",
145 | " [memory]: The lowest memory dtype for float is fp16.\n",
146 | " [computation]: The lowest memory dtype for float is fp32.\n",
147 | " \"\"\"\n",
148 | "\n",
149 | " unit_map = {\"KB\": 1024 ** 1, \"MB\": 1024 * 2, \"GB\": 1024 ** 3, \"byte\": 1}\n",
150 | " divide_by = unit_map[unit]\n",
151 | " optimize_dtypes = []\n",
152 | "\n",
153 | " for col in df.columns:\n",
154 | " as_type = get_smallest_valid_conversion(df[col], optimize)\n",
155 | " nbytes = df[col].memory_usage(deep=True)\n",
156 | " proposed_memory, proposed_dtype, ram_usage_improvement = None, None, None\n",
157 | " if as_type:\n",
158 | " (\n",
159 | " proposed_bytes,\n",
160 | " proposed_dtype,\n",
161 | " ram_usage_improvement,\n",
162 | " ) = get_improvement(as_type, nbytes)\n",
163 | " # If improvement is found, replace the attributes\n",
164 | " proposed_memory = proposed_bytes / divide_by if proposed_bytes else None\n",
165 | " ram_usage_improvement = (\n",
166 | " ram_usage_improvement / divide_by if ram_usage_improvement else None\n",
167 | " )\n",
168 | " proposed_dtype = proposed_dtype\n",
169 | " row = Row(\n",
170 | " column=col,\n",
171 | " current_dtype=df[col].dtype,\n",
172 | " current_memory=nbytes / divide_by,\n",
173 | " proposed_memory=proposed_memory,\n",
174 | " proposed_dtype=proposed_dtype,\n",
175 | " ram_usage_improvement=ram_usage_improvement,\n",
176 | " )\n",
177 | " optimize_dtypes.append(row)\n",
178 | " columns = [\n",
179 | " \"Column\",\n",
180 | " \"Current dtype\",\n",
181 | " \"Proposed dtype\",\n",
182 | " f\"Current Memory ({unit})\",\n",
183 | " f\"Proposed Memory ({unit})\",\n",
184 | " f\"Ram Usage Improvement ({unit})\",\n",
185 | " ]\n",
186 | " report_df = pd.DataFrame(optimize_dtypes, columns=columns)\n",
187 | " report_df[\"Ram Usage Improvement (%)\"] = (\n",
188 | " report_df[f\"Ram Usage Improvement ({unit})\"]\n",
189 | " / report_df[f\"Current Memory ({unit})\"]\n",
190 | " * 100\n",
191 | " )\n",
192 | " report_df = report_df.set_index(\"Column\")\n",
193 | " return report_df"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": null,
199 | "metadata": {},
200 | "outputs": [],
201 | "source": [
202 | "#slow\n",
203 | "# sell_prices.csv.zip \n",
204 | "# Source data: https://www.kaggle.com/c/m5-forecasting-uncertainty/\n",
205 | "\n",
206 | "df = pd.read_csv('data/sell_prices.csv')"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "execution_count": null,
212 | "metadata": {},
213 | "outputs": [
214 | {
215 | "data": {
216 | "text/html": [
217 | "\n",
218 | "\n",
231 | "
\n",
232 | " \n",
233 | " \n",
234 | " \n",
235 | " Current dtype \n",
236 | " Proposed dtype \n",
237 | " Current Memory (MB) \n",
238 | " Proposed Memory (MB) \n",
239 | " Ram Usage Improvement (MB) \n",
240 | " Ram Usage Improvement (%) \n",
241 | " \n",
242 | " \n",
243 | " Column \n",
244 | " \n",
245 | " \n",
246 | " \n",
247 | " \n",
248 | " \n",
249 | " \n",
250 | " \n",
251 | " \n",
252 | " \n",
253 | " \n",
254 | " store_id \n",
255 | " object \n",
256 | " category \n",
257 | " 203763.920410 \n",
258 | " 3340.907715 \n",
259 | " 200423.012695 \n",
260 | " 98.360403 \n",
261 | " \n",
262 | " \n",
263 | " item_id \n",
264 | " object \n",
265 | " category \n",
266 | " 233039.977539 \n",
267 | " 6824.677734 \n",
268 | " 226215.299805 \n",
269 | " 97.071456 \n",
270 | " \n",
271 | " \n",
272 | " wm_yr_wk \n",
273 | " int64 \n",
274 | " int16 \n",
275 | " 26723.191406 \n",
276 | " 6680.844727 \n",
277 | " 20042.346680 \n",
278 | " 74.999825 \n",
279 | " \n",
280 | " \n",
281 | " sell_price \n",
282 | " float64 \n",
283 | " None \n",
284 | " 26723.191406 \n",
285 | " NaN \n",
286 | " NaN \n",
287 | " NaN \n",
288 | " \n",
289 | " \n",
290 | "
\n",
291 | "
"
292 | ],
293 | "text/plain": [
294 | " Current dtype Proposed dtype Current Memory (MB) \\\n",
295 | "Column \n",
296 | "store_id object category 203763.920410 \n",
297 | "item_id object category 233039.977539 \n",
298 | "wm_yr_wk int64 int16 26723.191406 \n",
299 | "sell_price float64 None 26723.191406 \n",
300 | "\n",
301 | " Proposed Memory (MB) Ram Usage Improvement (MB) \\\n",
302 | "Column \n",
303 | "store_id 3340.907715 200423.012695 \n",
304 | "item_id 6824.677734 226215.299805 \n",
305 | "wm_yr_wk 6680.844727 20042.346680 \n",
306 | "sell_price NaN NaN \n",
307 | "\n",
308 | " Ram Usage Improvement (%) \n",
309 | "Column \n",
310 | "store_id 98.360403 \n",
311 | "item_id 97.071456 \n",
312 | "wm_yr_wk 74.999825 \n",
313 | "sell_price NaN "
314 | ]
315 | },
316 | "execution_count": null,
317 | "metadata": {},
318 | "output_type": "execute_result"
319 | }
320 | ],
321 | "source": [
322 | "#slow\n",
323 | "report_on_dataframe(df)"
324 | ]
325 | },
326 | {
327 | "cell_type": "markdown",
328 | "metadata": {},
329 | "source": [
330 | "`report_on_dataframe` shows you the possible dtype conversion and the improvement. Note that the library try to optimize the memory base on __current__ values of the data, you should still be careful about overflow for further transformation. "
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": null,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "#export\n",
340 | "def optimize_dtypes(df: pd.DataFrame, proposed_df: pd.DataFrame) -> pd.DataFrame:\n",
341 | " new_df = df.copy()\n",
342 | " for col in df.columns:\n",
343 | " new_dtype = proposed_df.loc[col, \"Proposed dtype\"]\n",
344 | " if new_dtype:\n",
345 | " new_df[col] = new_df[col].astype(new_dtype)\n",
346 | " return new_df"
347 | ]
348 | },
349 | {
350 | "cell_type": "code",
351 | "execution_count": null,
352 | "metadata": {},
353 | "outputs": [],
354 | "source": [
355 | "#hide\n",
356 | "def test_ser_ints():\n",
357 | " # check for low simple int\n",
358 | " ser = pd.Series([1] * 3)\n",
359 | " as_type = count_errors(ser, \"int32\")\n",
360 | " assert as_type.nbr_different == 0\n",
361 | " as_type = count_errors(ser, \"int16\")\n",
362 | " assert as_type.nbr_different == 0\n",
363 | " as_type = count_errors(ser, \"int8\")\n",
364 | " assert as_type.nbr_different == 0\n",
365 | "\n",
366 | " # check for int needing bigger than int16\n",
367 | " ser = pd.Series([65536] * 3)\n",
368 | " as_type = count_errors(ser, \"int32\")\n",
369 | " assert as_type.nbr_different == 0\n",
370 | " as_type = count_errors(ser, \"int16\")\n",
371 | " assert as_type.nbr_different == 3\n",
372 | " as_type = count_errors(ser, \"int8\")\n",
373 | " assert as_type.nbr_different == 3"
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "execution_count": null,
379 | "metadata": {},
380 | "outputs": [
381 | {
382 | "name": "stdout",
383 | "output_type": "stream",
384 | "text": [
385 | "Given a dataframe, check for lowest possible conversions:\n",
386 | "convert_dtypes does a slightly different job:\n",
387 | " a b c d e str_a str_b\n",
388 | "0 0 256 65536 1100 100101 hello 0\n",
389 | "1 0 256 65536 1100 100101 hello 1\n",
390 | "2 0 256 65536 1100 100101 hello 2\n",
391 | "3 0 256 65536 1100 100101 hello 3\n",
392 | "4 0 256 65536 1100 100101 hello 4\n",
393 | ".. .. ... ... ... ... ... ...\n",
394 | "95 0 256 65536 1100 100101 hello 95\n",
395 | "96 0 256 65536 1100 100101 hello 96\n",
396 | "97 0 256 65536 1100 100101 hello 97\n",
397 | "98 0 256 65536 1100 100101 hello 98\n",
398 | "99 0 256 65536 1100 100101 hello 99\n",
399 | "\n",
400 | "[100 rows x 7 columns]\n"
401 | ]
402 | }
403 | ],
404 | "source": [
405 | "if __name__ == \"__main__\":\n",
406 | " print(\"Given a dataframe, check for lowest possible conversions:\")\n",
407 | "\n",
408 | " nbr_rows = 100\n",
409 | " df = pd.DataFrame()\n",
410 | " df[\"a\"] = [0] * nbr_rows\n",
411 | " df[\"b\"] = [256] * nbr_rows\n",
412 | " df[\"c\"] = [65_536] * nbr_rows\n",
413 | " df[\"d\"] = [1_100.0] * nbr_rows\n",
414 | " df[\"e\"] = [100_101.0] * nbr_rows\n",
415 | " df[\"str_a\"] = [\"hello\"] * nbr_rows\n",
416 | " df[\"str_b\"] = [str(n) for n in range(nbr_rows)]\n",
417 | " report_on_dataframe(df)\n",
418 | "\n",
419 | " print(\"convert_dtypes does a slightly different job:\")\n",
420 | " print(df.convert_dtypes())"
421 | ]
422 | }
423 | ],
424 | "metadata": {
425 | "kernelspec": {
426 | "display_name": "Python 3",
427 | "language": "python",
428 | "name": "python3"
429 | }
430 | },
431 | "nbformat": 4,
432 | "nbformat_minor": 2
433 | }
434 |
--------------------------------------------------------------------------------
/docs/css/font-awesome.min.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome
3 | * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
4 | */@font-face{font-family:'FontAwesome';src:url('fonts/fontawesome-webfont.eot?v=4.7.0');src:url('fonts/fontawesome-webfont.eot?#iefix&v=4.7.0') format('embedded-opentype'),url('fonts/fontawesome-webfont.woff2?v=4.7.0') format('woff2'),url('fonts/fontawesome-webfont.woff?v=4.7.0') format('woff'),url('fonts/fontawesome-webfont.ttf?v=4.7.0') format('truetype'),url('fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular') format('svg');font-weight:normal;font-style:normal}.fa{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571429em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14285714em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14285714em;width:2.14285714em;top:.14285714em;text-align:center}.fa-li.fa-lg{left:-1.85714286em}.fa-border{padding:.2em .25em .15em;border:solid .08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left{margin-right:.3em}.fa.fa-pull-right{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left{margin-right:.3em}.fa.pull-right{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:"\f000"}.fa-music:before{content:"\f001"}.fa-search:before{content:"\f002"}.fa-envelope-o:before{content:"\f003"}.fa-heart:before{content:"\f004"}.fa-star:before{content:"\f005"}.fa-star-o:before{content:"\f006"}.fa-user:before{content:"\f007"}.fa-film:before{content:"\f008"}.fa-th-large:before{content:"\f009"}.fa-th:before{content:"\f00a"}.fa-th-list:before{content:"\f00b"}.fa-check:before{content:"\f00c"}.fa-remove:before,.fa-close:before,.fa-times:before{content:"\f00d"}.fa-search-plus:before{content:"\f00e"}.fa-search-minus:before{content:"\f010"}.fa-power-off:before{content:"\f011"}.fa-signal:before{content:"\f012"}.fa-gear:before,.fa-cog:before{content:"\f013"}.fa-trash-o:before{content:"\f014"}.fa-home:before{content:"\f015"}.fa-file-o:before{content:"\f016"}.fa-clock-o:before{content:"\f017"}.fa-road:before{content:"\f018"}.fa-download:before{content:"\f019"}.fa-arrow-circle-o-down:before{content:"\f01a"}.fa-arrow-circle-o-up:before{content:"\f01b"}.fa-inbox:before{content:"\f01c"}.fa-play-circle-o:before{content:"\f01d"}.fa-rotate-right:before,.fa-repeat:before{content:"\f01e"}.fa-refresh:before{content:"\f021"}.fa-list-alt:before{content:"\f022"}.fa-lock:before{content:"\f023"}.fa-flag:before{content:"\f024"}.fa-headphones:before{content:"\f025"}.fa-volume-off:before{content:"\f026"}.fa-volume-down:before{content:"\f027"}.fa-volume-up:before{content:"\f028"}.fa-qrcode:before{content:"\f029"}.fa-barcode:before{content:"\f02a"}.fa-tag:before{content:"\f02b"}.fa-tags:before{content:"\f02c"}.fa-book:before{content:"\f02d"}.fa-bookmark:before{content:"\f02e"}.fa-print:before{content:"\f02f"}.fa-camera:before{content:"\f030"}.fa-font:before{content:"\f031"}.fa-bold:before{content:"\f032"}.fa-italic:before{content:"\f033"}.fa-text-height:before{content:"\f034"}.fa-text-width:before{content:"\f035"}.fa-align-left:before{content:"\f036"}.fa-align-center:before{content:"\f037"}.fa-align-right:before{content:"\f038"}.fa-align-justify:before{content:"\f039"}.fa-list:before{content:"\f03a"}.fa-dedent:before,.fa-outdent:before{content:"\f03b"}.fa-indent:before{content:"\f03c"}.fa-video-camera:before{content:"\f03d"}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:"\f03e"}.fa-pencil:before{content:"\f040"}.fa-map-marker:before{content:"\f041"}.fa-adjust:before{content:"\f042"}.fa-tint:before{content:"\f043"}.fa-edit:before,.fa-pencil-square-o:before{content:"\f044"}.fa-share-square-o:before{content:"\f045"}.fa-check-square-o:before{content:"\f046"}.fa-arrows:before{content:"\f047"}.fa-step-backward:before{content:"\f048"}.fa-fast-backward:before{content:"\f049"}.fa-backward:before{content:"\f04a"}.fa-play:before{content:"\f04b"}.fa-pause:before{content:"\f04c"}.fa-stop:before{content:"\f04d"}.fa-forward:before{content:"\f04e"}.fa-fast-forward:before{content:"\f050"}.fa-step-forward:before{content:"\f051"}.fa-eject:before{content:"\f052"}.fa-chevron-left:before{content:"\f053"}.fa-chevron-right:before{content:"\f054"}.fa-plus-circle:before{content:"\f055"}.fa-minus-circle:before{content:"\f056"}.fa-times-circle:before{content:"\f057"}.fa-check-circle:before{content:"\f058"}.fa-question-circle:before{content:"\f059"}.fa-info-circle:before{content:"\f05a"}.fa-crosshairs:before{content:"\f05b"}.fa-times-circle-o:before{content:"\f05c"}.fa-check-circle-o:before{content:"\f05d"}.fa-ban:before{content:"\f05e"}.fa-arrow-left:before{content:"\f060"}.fa-arrow-right:before{content:"\f061"}.fa-arrow-up:before{content:"\f062"}.fa-arrow-down:before{content:"\f063"}.fa-mail-forward:before,.fa-share:before{content:"\f064"}.fa-expand:before{content:"\f065"}.fa-compress:before{content:"\f066"}.fa-plus:before{content:"\f067"}.fa-minus:before{content:"\f068"}.fa-asterisk:before{content:"\f069"}.fa-exclamation-circle:before{content:"\f06a"}.fa-gift:before{content:"\f06b"}.fa-leaf:before{content:"\f06c"}.fa-fire:before{content:"\f06d"}.fa-eye:before{content:"\f06e"}.fa-eye-slash:before{content:"\f070"}.fa-warning:before,.fa-exclamation-triangle:before{content:"\f071"}.fa-plane:before{content:"\f072"}.fa-calendar:before{content:"\f073"}.fa-random:before{content:"\f074"}.fa-comment:before{content:"\f075"}.fa-magnet:before{content:"\f076"}.fa-chevron-up:before{content:"\f077"}.fa-chevron-down:before{content:"\f078"}.fa-retweet:before{content:"\f079"}.fa-shopping-cart:before{content:"\f07a"}.fa-folder:before{content:"\f07b"}.fa-folder-open:before{content:"\f07c"}.fa-arrows-v:before{content:"\f07d"}.fa-arrows-h:before{content:"\f07e"}.fa-bar-chart-o:before,.fa-bar-chart:before{content:"\f080"}.fa-twitter-square:before{content:"\f081"}.fa-facebook-square:before{content:"\f082"}.fa-camera-retro:before{content:"\f083"}.fa-key:before{content:"\f084"}.fa-gears:before,.fa-cogs:before{content:"\f085"}.fa-comments:before{content:"\f086"}.fa-thumbs-o-up:before{content:"\f087"}.fa-thumbs-o-down:before{content:"\f088"}.fa-star-half:before{content:"\f089"}.fa-heart-o:before{content:"\f08a"}.fa-sign-out:before{content:"\f08b"}.fa-linkedin-square:before{content:"\f08c"}.fa-thumb-tack:before{content:"\f08d"}.fa-external-link:before{content:"\f08e"}.fa-sign-in:before{content:"\f090"}.fa-trophy:before{content:"\f091"}.fa-github-square:before{content:"\f092"}.fa-upload:before{content:"\f093"}.fa-lemon-o:before{content:"\f094"}.fa-phone:before{content:"\f095"}.fa-square-o:before{content:"\f096"}.fa-bookmark-o:before{content:"\f097"}.fa-phone-square:before{content:"\f098"}.fa-twitter:before{content:"\f099"}.fa-facebook-f:before,.fa-facebook:before{content:"\f09a"}.fa-github:before{content:"\f09b"}.fa-unlock:before{content:"\f09c"}.fa-credit-card:before{content:"\f09d"}.fa-feed:before,.fa-rss:before{content:"\f09e"}.fa-hdd-o:before{content:"\f0a0"}.fa-bullhorn:before{content:"\f0a1"}.fa-bell:before{content:"\f0f3"}.fa-certificate:before{content:"\f0a3"}.fa-hand-o-right:before{content:"\f0a4"}.fa-hand-o-left:before{content:"\f0a5"}.fa-hand-o-up:before{content:"\f0a6"}.fa-hand-o-down:before{content:"\f0a7"}.fa-arrow-circle-left:before{content:"\f0a8"}.fa-arrow-circle-right:before{content:"\f0a9"}.fa-arrow-circle-up:before{content:"\f0aa"}.fa-arrow-circle-down:before{content:"\f0ab"}.fa-globe:before{content:"\f0ac"}.fa-wrench:before{content:"\f0ad"}.fa-tasks:before{content:"\f0ae"}.fa-filter:before{content:"\f0b0"}.fa-briefcase:before{content:"\f0b1"}.fa-arrows-alt:before{content:"\f0b2"}.fa-group:before,.fa-users:before{content:"\f0c0"}.fa-chain:before,.fa-link:before{content:"\f0c1"}.fa-cloud:before{content:"\f0c2"}.fa-flask:before{content:"\f0c3"}.fa-cut:before,.fa-scissors:before{content:"\f0c4"}.fa-copy:before,.fa-files-o:before{content:"\f0c5"}.fa-paperclip:before{content:"\f0c6"}.fa-save:before,.fa-floppy-o:before{content:"\f0c7"}.fa-square:before{content:"\f0c8"}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:"\f0c9"}.fa-list-ul:before{content:"\f0ca"}.fa-list-ol:before{content:"\f0cb"}.fa-strikethrough:before{content:"\f0cc"}.fa-underline:before{content:"\f0cd"}.fa-table:before{content:"\f0ce"}.fa-magic:before{content:"\f0d0"}.fa-truck:before{content:"\f0d1"}.fa-pinterest:before{content:"\f0d2"}.fa-pinterest-square:before{content:"\f0d3"}.fa-google-plus-square:before{content:"\f0d4"}.fa-google-plus:before{content:"\f0d5"}.fa-money:before{content:"\f0d6"}.fa-caret-down:before{content:"\f0d7"}.fa-caret-up:before{content:"\f0d8"}.fa-caret-left:before{content:"\f0d9"}.fa-caret-right:before{content:"\f0da"}.fa-columns:before{content:"\f0db"}.fa-unsorted:before,.fa-sort:before{content:"\f0dc"}.fa-sort-down:before,.fa-sort-desc:before{content:"\f0dd"}.fa-sort-up:before,.fa-sort-asc:before{content:"\f0de"}.fa-envelope:before{content:"\f0e0"}.fa-linkedin:before{content:"\f0e1"}.fa-rotate-left:before,.fa-undo:before{content:"\f0e2"}.fa-legal:before,.fa-gavel:before{content:"\f0e3"}.fa-dashboard:before,.fa-tachometer:before{content:"\f0e4"}.fa-comment-o:before{content:"\f0e5"}.fa-comments-o:before{content:"\f0e6"}.fa-flash:before,.fa-bolt:before{content:"\f0e7"}.fa-sitemap:before{content:"\f0e8"}.fa-umbrella:before{content:"\f0e9"}.fa-paste:before,.fa-clipboard:before{content:"\f0ea"}.fa-lightbulb-o:before{content:"\f0eb"}.fa-exchange:before{content:"\f0ec"}.fa-cloud-download:before{content:"\f0ed"}.fa-cloud-upload:before{content:"\f0ee"}.fa-user-md:before{content:"\f0f0"}.fa-stethoscope:before{content:"\f0f1"}.fa-suitcase:before{content:"\f0f2"}.fa-bell-o:before{content:"\f0a2"}.fa-coffee:before{content:"\f0f4"}.fa-cutlery:before{content:"\f0f5"}.fa-file-text-o:before{content:"\f0f6"}.fa-building-o:before{content:"\f0f7"}.fa-hospital-o:before{content:"\f0f8"}.fa-ambulance:before{content:"\f0f9"}.fa-medkit:before{content:"\f0fa"}.fa-fighter-jet:before{content:"\f0fb"}.fa-beer:before{content:"\f0fc"}.fa-h-square:before{content:"\f0fd"}.fa-plus-square:before{content:"\f0fe"}.fa-angle-double-left:before{content:"\f100"}.fa-angle-double-right:before{content:"\f101"}.fa-angle-double-up:before{content:"\f102"}.fa-angle-double-down:before{content:"\f103"}.fa-angle-left:before{content:"\f104"}.fa-angle-right:before{content:"\f105"}.fa-angle-up:before{content:"\f106"}.fa-angle-down:before{content:"\f107"}.fa-desktop:before{content:"\f108"}.fa-laptop:before{content:"\f109"}.fa-tablet:before{content:"\f10a"}.fa-mobile-phone:before,.fa-mobile:before{content:"\f10b"}.fa-circle-o:before{content:"\f10c"}.fa-quote-left:before{content:"\f10d"}.fa-quote-right:before{content:"\f10e"}.fa-spinner:before{content:"\f110"}.fa-circle:before{content:"\f111"}.fa-mail-reply:before,.fa-reply:before{content:"\f112"}.fa-github-alt:before{content:"\f113"}.fa-folder-o:before{content:"\f114"}.fa-folder-open-o:before{content:"\f115"}.fa-smile-o:before{content:"\f118"}.fa-frown-o:before{content:"\f119"}.fa-meh-o:before{content:"\f11a"}.fa-gamepad:before{content:"\f11b"}.fa-keyboard-o:before{content:"\f11c"}.fa-flag-o:before{content:"\f11d"}.fa-flag-checkered:before{content:"\f11e"}.fa-terminal:before{content:"\f120"}.fa-code:before{content:"\f121"}.fa-mail-reply-all:before,.fa-reply-all:before{content:"\f122"}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:"\f123"}.fa-location-arrow:before{content:"\f124"}.fa-crop:before{content:"\f125"}.fa-code-fork:before{content:"\f126"}.fa-unlink:before,.fa-chain-broken:before{content:"\f127"}.fa-question:before{content:"\f128"}.fa-info:before{content:"\f129"}.fa-exclamation:before{content:"\f12a"}.fa-superscript:before{content:"\f12b"}.fa-subscript:before{content:"\f12c"}.fa-eraser:before{content:"\f12d"}.fa-puzzle-piece:before{content:"\f12e"}.fa-microphone:before{content:"\f130"}.fa-microphone-slash:before{content:"\f131"}.fa-shield:before{content:"\f132"}.fa-calendar-o:before{content:"\f133"}.fa-fire-extinguisher:before{content:"\f134"}.fa-rocket:before{content:"\f135"}.fa-maxcdn:before{content:"\f136"}.fa-chevron-circle-left:before{content:"\f137"}.fa-chevron-circle-right:before{content:"\f138"}.fa-chevron-circle-up:before{content:"\f139"}.fa-chevron-circle-down:before{content:"\f13a"}.fa-html5:before{content:"\f13b"}.fa-css3:before{content:"\f13c"}.fa-anchor:before{content:"\f13d"}.fa-unlock-alt:before{content:"\f13e"}.fa-bullseye:before{content:"\f140"}.fa-ellipsis-h:before{content:"\f141"}.fa-ellipsis-v:before{content:"\f142"}.fa-rss-square:before{content:"\f143"}.fa-play-circle:before{content:"\f144"}.fa-ticket:before{content:"\f145"}.fa-minus-square:before{content:"\f146"}.fa-minus-square-o:before{content:"\f147"}.fa-level-up:before{content:"\f148"}.fa-level-down:before{content:"\f149"}.fa-check-square:before{content:"\f14a"}.fa-pencil-square:before{content:"\f14b"}.fa-external-link-square:before{content:"\f14c"}.fa-share-square:before{content:"\f14d"}.fa-compass:before{content:"\f14e"}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:"\f150"}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:"\f151"}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:"\f152"}.fa-euro:before,.fa-eur:before{content:"\f153"}.fa-gbp:before{content:"\f154"}.fa-dollar:before,.fa-usd:before{content:"\f155"}.fa-rupee:before,.fa-inr:before{content:"\f156"}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:"\f157"}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:"\f158"}.fa-won:before,.fa-krw:before{content:"\f159"}.fa-bitcoin:before,.fa-btc:before{content:"\f15a"}.fa-file:before{content:"\f15b"}.fa-file-text:before{content:"\f15c"}.fa-sort-alpha-asc:before{content:"\f15d"}.fa-sort-alpha-desc:before{content:"\f15e"}.fa-sort-amount-asc:before{content:"\f160"}.fa-sort-amount-desc:before{content:"\f161"}.fa-sort-numeric-asc:before{content:"\f162"}.fa-sort-numeric-desc:before{content:"\f163"}.fa-thumbs-up:before{content:"\f164"}.fa-thumbs-down:before{content:"\f165"}.fa-youtube-square:before{content:"\f166"}.fa-youtube:before{content:"\f167"}.fa-xing:before{content:"\f168"}.fa-xing-square:before{content:"\f169"}.fa-youtube-play:before{content:"\f16a"}.fa-dropbox:before{content:"\f16b"}.fa-stack-overflow:before{content:"\f16c"}.fa-instagram:before{content:"\f16d"}.fa-flickr:before{content:"\f16e"}.fa-adn:before{content:"\f170"}.fa-bitbucket:before{content:"\f171"}.fa-bitbucket-square:before{content:"\f172"}.fa-tumblr:before{content:"\f173"}.fa-tumblr-square:before{content:"\f174"}.fa-long-arrow-down:before{content:"\f175"}.fa-long-arrow-up:before{content:"\f176"}.fa-long-arrow-left:before{content:"\f177"}.fa-long-arrow-right:before{content:"\f178"}.fa-apple:before{content:"\f179"}.fa-windows:before{content:"\f17a"}.fa-android:before{content:"\f17b"}.fa-linux:before{content:"\f17c"}.fa-dribbble:before{content:"\f17d"}.fa-skype:before{content:"\f17e"}.fa-foursquare:before{content:"\f180"}.fa-trello:before{content:"\f181"}.fa-female:before{content:"\f182"}.fa-male:before{content:"\f183"}.fa-gittip:before,.fa-gratipay:before{content:"\f184"}.fa-sun-o:before{content:"\f185"}.fa-moon-o:before{content:"\f186"}.fa-archive:before{content:"\f187"}.fa-bug:before{content:"\f188"}.fa-vk:before{content:"\f189"}.fa-weibo:before{content:"\f18a"}.fa-renren:before{content:"\f18b"}.fa-pagelines:before{content:"\f18c"}.fa-stack-exchange:before{content:"\f18d"}.fa-arrow-circle-o-right:before{content:"\f18e"}.fa-arrow-circle-o-left:before{content:"\f190"}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:"\f191"}.fa-dot-circle-o:before{content:"\f192"}.fa-wheelchair:before{content:"\f193"}.fa-vimeo-square:before{content:"\f194"}.fa-turkish-lira:before,.fa-try:before{content:"\f195"}.fa-plus-square-o:before{content:"\f196"}.fa-space-shuttle:before{content:"\f197"}.fa-slack:before{content:"\f198"}.fa-envelope-square:before{content:"\f199"}.fa-wordpress:before{content:"\f19a"}.fa-openid:before{content:"\f19b"}.fa-institution:before,.fa-bank:before,.fa-university:before{content:"\f19c"}.fa-mortar-board:before,.fa-graduation-cap:before{content:"\f19d"}.fa-yahoo:before{content:"\f19e"}.fa-google:before{content:"\f1a0"}.fa-reddit:before{content:"\f1a1"}.fa-reddit-square:before{content:"\f1a2"}.fa-stumbleupon-circle:before{content:"\f1a3"}.fa-stumbleupon:before{content:"\f1a4"}.fa-delicious:before{content:"\f1a5"}.fa-digg:before{content:"\f1a6"}.fa-pied-piper-pp:before{content:"\f1a7"}.fa-pied-piper-alt:before{content:"\f1a8"}.fa-drupal:before{content:"\f1a9"}.fa-joomla:before{content:"\f1aa"}.fa-language:before{content:"\f1ab"}.fa-fax:before{content:"\f1ac"}.fa-building:before{content:"\f1ad"}.fa-child:before{content:"\f1ae"}.fa-paw:before{content:"\f1b0"}.fa-spoon:before{content:"\f1b1"}.fa-cube:before{content:"\f1b2"}.fa-cubes:before{content:"\f1b3"}.fa-behance:before{content:"\f1b4"}.fa-behance-square:before{content:"\f1b5"}.fa-steam:before{content:"\f1b6"}.fa-steam-square:before{content:"\f1b7"}.fa-recycle:before{content:"\f1b8"}.fa-automobile:before,.fa-car:before{content:"\f1b9"}.fa-cab:before,.fa-taxi:before{content:"\f1ba"}.fa-tree:before{content:"\f1bb"}.fa-spotify:before{content:"\f1bc"}.fa-deviantart:before{content:"\f1bd"}.fa-soundcloud:before{content:"\f1be"}.fa-database:before{content:"\f1c0"}.fa-file-pdf-o:before{content:"\f1c1"}.fa-file-word-o:before{content:"\f1c2"}.fa-file-excel-o:before{content:"\f1c3"}.fa-file-powerpoint-o:before{content:"\f1c4"}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:"\f1c5"}.fa-file-zip-o:before,.fa-file-archive-o:before{content:"\f1c6"}.fa-file-sound-o:before,.fa-file-audio-o:before{content:"\f1c7"}.fa-file-movie-o:before,.fa-file-video-o:before{content:"\f1c8"}.fa-file-code-o:before{content:"\f1c9"}.fa-vine:before{content:"\f1ca"}.fa-codepen:before{content:"\f1cb"}.fa-jsfiddle:before{content:"\f1cc"}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:"\f1cd"}.fa-circle-o-notch:before{content:"\f1ce"}.fa-ra:before,.fa-resistance:before,.fa-rebel:before{content:"\f1d0"}.fa-ge:before,.fa-empire:before{content:"\f1d1"}.fa-git-square:before{content:"\f1d2"}.fa-git:before{content:"\f1d3"}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:"\f1d4"}.fa-tencent-weibo:before{content:"\f1d5"}.fa-qq:before{content:"\f1d6"}.fa-wechat:before,.fa-weixin:before{content:"\f1d7"}.fa-send:before,.fa-paper-plane:before{content:"\f1d8"}.fa-send-o:before,.fa-paper-plane-o:before{content:"\f1d9"}.fa-history:before{content:"\f1da"}.fa-circle-thin:before{content:"\f1db"}.fa-header:before{content:"\f1dc"}.fa-paragraph:before{content:"\f1dd"}.fa-sliders:before{content:"\f1de"}.fa-share-alt:before{content:"\f1e0"}.fa-share-alt-square:before{content:"\f1e1"}.fa-bomb:before{content:"\f1e2"}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:"\f1e3"}.fa-tty:before{content:"\f1e4"}.fa-binoculars:before{content:"\f1e5"}.fa-plug:before{content:"\f1e6"}.fa-slideshare:before{content:"\f1e7"}.fa-twitch:before{content:"\f1e8"}.fa-yelp:before{content:"\f1e9"}.fa-newspaper-o:before{content:"\f1ea"}.fa-wifi:before{content:"\f1eb"}.fa-calculator:before{content:"\f1ec"}.fa-paypal:before{content:"\f1ed"}.fa-google-wallet:before{content:"\f1ee"}.fa-cc-visa:before{content:"\f1f0"}.fa-cc-mastercard:before{content:"\f1f1"}.fa-cc-discover:before{content:"\f1f2"}.fa-cc-amex:before{content:"\f1f3"}.fa-cc-paypal:before{content:"\f1f4"}.fa-cc-stripe:before{content:"\f1f5"}.fa-bell-slash:before{content:"\f1f6"}.fa-bell-slash-o:before{content:"\f1f7"}.fa-trash:before{content:"\f1f8"}.fa-copyright:before{content:"\f1f9"}.fa-at:before{content:"\f1fa"}.fa-eyedropper:before{content:"\f1fb"}.fa-paint-brush:before{content:"\f1fc"}.fa-birthday-cake:before{content:"\f1fd"}.fa-area-chart:before{content:"\f1fe"}.fa-pie-chart:before{content:"\f200"}.fa-line-chart:before{content:"\f201"}.fa-lastfm:before{content:"\f202"}.fa-lastfm-square:before{content:"\f203"}.fa-toggle-off:before{content:"\f204"}.fa-toggle-on:before{content:"\f205"}.fa-bicycle:before{content:"\f206"}.fa-bus:before{content:"\f207"}.fa-ioxhost:before{content:"\f208"}.fa-angellist:before{content:"\f209"}.fa-cc:before{content:"\f20a"}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:"\f20b"}.fa-meanpath:before{content:"\f20c"}.fa-buysellads:before{content:"\f20d"}.fa-connectdevelop:before{content:"\f20e"}.fa-dashcube:before{content:"\f210"}.fa-forumbee:before{content:"\f211"}.fa-leanpub:before{content:"\f212"}.fa-sellsy:before{content:"\f213"}.fa-shirtsinbulk:before{content:"\f214"}.fa-simplybuilt:before{content:"\f215"}.fa-skyatlas:before{content:"\f216"}.fa-cart-plus:before{content:"\f217"}.fa-cart-arrow-down:before{content:"\f218"}.fa-diamond:before{content:"\f219"}.fa-ship:before{content:"\f21a"}.fa-user-secret:before{content:"\f21b"}.fa-motorcycle:before{content:"\f21c"}.fa-street-view:before{content:"\f21d"}.fa-heartbeat:before{content:"\f21e"}.fa-venus:before{content:"\f221"}.fa-mars:before{content:"\f222"}.fa-mercury:before{content:"\f223"}.fa-intersex:before,.fa-transgender:before{content:"\f224"}.fa-transgender-alt:before{content:"\f225"}.fa-venus-double:before{content:"\f226"}.fa-mars-double:before{content:"\f227"}.fa-venus-mars:before{content:"\f228"}.fa-mars-stroke:before{content:"\f229"}.fa-mars-stroke-v:before{content:"\f22a"}.fa-mars-stroke-h:before{content:"\f22b"}.fa-neuter:before{content:"\f22c"}.fa-genderless:before{content:"\f22d"}.fa-facebook-official:before{content:"\f230"}.fa-pinterest-p:before{content:"\f231"}.fa-whatsapp:before{content:"\f232"}.fa-server:before{content:"\f233"}.fa-user-plus:before{content:"\f234"}.fa-user-times:before{content:"\f235"}.fa-hotel:before,.fa-bed:before{content:"\f236"}.fa-viacoin:before{content:"\f237"}.fa-train:before{content:"\f238"}.fa-subway:before{content:"\f239"}.fa-medium:before{content:"\f23a"}.fa-yc:before,.fa-y-combinator:before{content:"\f23b"}.fa-optin-monster:before{content:"\f23c"}.fa-opencart:before{content:"\f23d"}.fa-expeditedssl:before{content:"\f23e"}.fa-battery-4:before,.fa-battery:before,.fa-battery-full:before{content:"\f240"}.fa-battery-3:before,.fa-battery-three-quarters:before{content:"\f241"}.fa-battery-2:before,.fa-battery-half:before{content:"\f242"}.fa-battery-1:before,.fa-battery-quarter:before{content:"\f243"}.fa-battery-0:before,.fa-battery-empty:before{content:"\f244"}.fa-mouse-pointer:before{content:"\f245"}.fa-i-cursor:before{content:"\f246"}.fa-object-group:before{content:"\f247"}.fa-object-ungroup:before{content:"\f248"}.fa-sticky-note:before{content:"\f249"}.fa-sticky-note-o:before{content:"\f24a"}.fa-cc-jcb:before{content:"\f24b"}.fa-cc-diners-club:before{content:"\f24c"}.fa-clone:before{content:"\f24d"}.fa-balance-scale:before{content:"\f24e"}.fa-hourglass-o:before{content:"\f250"}.fa-hourglass-1:before,.fa-hourglass-start:before{content:"\f251"}.fa-hourglass-2:before,.fa-hourglass-half:before{content:"\f252"}.fa-hourglass-3:before,.fa-hourglass-end:before{content:"\f253"}.fa-hourglass:before{content:"\f254"}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:"\f255"}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:"\f256"}.fa-hand-scissors-o:before{content:"\f257"}.fa-hand-lizard-o:before{content:"\f258"}.fa-hand-spock-o:before{content:"\f259"}.fa-hand-pointer-o:before{content:"\f25a"}.fa-hand-peace-o:before{content:"\f25b"}.fa-trademark:before{content:"\f25c"}.fa-registered:before{content:"\f25d"}.fa-creative-commons:before{content:"\f25e"}.fa-gg:before{content:"\f260"}.fa-gg-circle:before{content:"\f261"}.fa-tripadvisor:before{content:"\f262"}.fa-odnoklassniki:before{content:"\f263"}.fa-odnoklassniki-square:before{content:"\f264"}.fa-get-pocket:before{content:"\f265"}.fa-wikipedia-w:before{content:"\f266"}.fa-safari:before{content:"\f267"}.fa-chrome:before{content:"\f268"}.fa-firefox:before{content:"\f269"}.fa-opera:before{content:"\f26a"}.fa-internet-explorer:before{content:"\f26b"}.fa-tv:before,.fa-television:before{content:"\f26c"}.fa-contao:before{content:"\f26d"}.fa-500px:before{content:"\f26e"}.fa-amazon:before{content:"\f270"}.fa-calendar-plus-o:before{content:"\f271"}.fa-calendar-minus-o:before{content:"\f272"}.fa-calendar-times-o:before{content:"\f273"}.fa-calendar-check-o:before{content:"\f274"}.fa-industry:before{content:"\f275"}.fa-map-pin:before{content:"\f276"}.fa-map-signs:before{content:"\f277"}.fa-map-o:before{content:"\f278"}.fa-map:before{content:"\f279"}.fa-commenting:before{content:"\f27a"}.fa-commenting-o:before{content:"\f27b"}.fa-houzz:before{content:"\f27c"}.fa-vimeo:before{content:"\f27d"}.fa-black-tie:before{content:"\f27e"}.fa-fonticons:before{content:"\f280"}.fa-reddit-alien:before{content:"\f281"}.fa-edge:before{content:"\f282"}.fa-credit-card-alt:before{content:"\f283"}.fa-codiepie:before{content:"\f284"}.fa-modx:before{content:"\f285"}.fa-fort-awesome:before{content:"\f286"}.fa-usb:before{content:"\f287"}.fa-product-hunt:before{content:"\f288"}.fa-mixcloud:before{content:"\f289"}.fa-scribd:before{content:"\f28a"}.fa-pause-circle:before{content:"\f28b"}.fa-pause-circle-o:before{content:"\f28c"}.fa-stop-circle:before{content:"\f28d"}.fa-stop-circle-o:before{content:"\f28e"}.fa-shopping-bag:before{content:"\f290"}.fa-shopping-basket:before{content:"\f291"}.fa-hashtag:before{content:"\f292"}.fa-bluetooth:before{content:"\f293"}.fa-bluetooth-b:before{content:"\f294"}.fa-percent:before{content:"\f295"}.fa-gitlab:before{content:"\f296"}.fa-wpbeginner:before{content:"\f297"}.fa-wpforms:before{content:"\f298"}.fa-envira:before{content:"\f299"}.fa-universal-access:before{content:"\f29a"}.fa-wheelchair-alt:before{content:"\f29b"}.fa-question-circle-o:before{content:"\f29c"}.fa-blind:before{content:"\f29d"}.fa-audio-description:before{content:"\f29e"}.fa-volume-control-phone:before{content:"\f2a0"}.fa-braille:before{content:"\f2a1"}.fa-assistive-listening-systems:before{content:"\f2a2"}.fa-asl-interpreting:before,.fa-american-sign-language-interpreting:before{content:"\f2a3"}.fa-deafness:before,.fa-hard-of-hearing:before,.fa-deaf:before{content:"\f2a4"}.fa-glide:before{content:"\f2a5"}.fa-glide-g:before{content:"\f2a6"}.fa-signing:before,.fa-sign-language:before{content:"\f2a7"}.fa-low-vision:before{content:"\f2a8"}.fa-viadeo:before{content:"\f2a9"}.fa-viadeo-square:before{content:"\f2aa"}.fa-snapchat:before{content:"\f2ab"}.fa-snapchat-ghost:before{content:"\f2ac"}.fa-snapchat-square:before{content:"\f2ad"}.fa-pied-piper:before{content:"\f2ae"}.fa-first-order:before{content:"\f2b0"}.fa-yoast:before{content:"\f2b1"}.fa-themeisle:before{content:"\f2b2"}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:"\f2b3"}.fa-fa:before,.fa-font-awesome:before{content:"\f2b4"}.fa-handshake-o:before{content:"\f2b5"}.fa-envelope-open:before{content:"\f2b6"}.fa-envelope-open-o:before{content:"\f2b7"}.fa-linode:before{content:"\f2b8"}.fa-address-book:before{content:"\f2b9"}.fa-address-book-o:before{content:"\f2ba"}.fa-vcard:before,.fa-address-card:before{content:"\f2bb"}.fa-vcard-o:before,.fa-address-card-o:before{content:"\f2bc"}.fa-user-circle:before{content:"\f2bd"}.fa-user-circle-o:before{content:"\f2be"}.fa-user-o:before{content:"\f2c0"}.fa-id-badge:before{content:"\f2c1"}.fa-drivers-license:before,.fa-id-card:before{content:"\f2c2"}.fa-drivers-license-o:before,.fa-id-card-o:before{content:"\f2c3"}.fa-quora:before{content:"\f2c4"}.fa-free-code-camp:before{content:"\f2c5"}.fa-telegram:before{content:"\f2c6"}.fa-thermometer-4:before,.fa-thermometer:before,.fa-thermometer-full:before{content:"\f2c7"}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:"\f2c8"}.fa-thermometer-2:before,.fa-thermometer-half:before{content:"\f2c9"}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:"\f2ca"}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:"\f2cb"}.fa-shower:before{content:"\f2cc"}.fa-bathtub:before,.fa-s15:before,.fa-bath:before{content:"\f2cd"}.fa-podcast:before{content:"\f2ce"}.fa-window-maximize:before{content:"\f2d0"}.fa-window-minimize:before{content:"\f2d1"}.fa-window-restore:before{content:"\f2d2"}.fa-times-rectangle:before,.fa-window-close:before{content:"\f2d3"}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:"\f2d4"}.fa-bandcamp:before{content:"\f2d5"}.fa-grav:before{content:"\f2d6"}.fa-etsy:before{content:"\f2d7"}.fa-imdb:before{content:"\f2d8"}.fa-ravelry:before{content:"\f2d9"}.fa-eercast:before{content:"\f2da"}.fa-microchip:before{content:"\f2db"}.fa-snowflake-o:before{content:"\f2dc"}.fa-superpowers:before{content:"\f2dd"}.fa-wpexplorer:before{content:"\f2de"}.fa-meetup:before{content:"\f2e0"}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}
5 |
--------------------------------------------------------------------------------