├── .nojeklyll
├── test_files
    └── requirements.txt
├── fastkaggle
    ├── __init__.py
    ├── _nbdev.py
    ├── _modidx.py
    └── core.py
├── images
    ├── library-fastkaggle.png
    └── libraries-pawpularity.png
├── MANIFEST.in
├── styles.css
├── install_quarto.sh
├── CHANGELOG.md
├── _quarto.yml
├── settings.ini
├── .github
    └── workflows
    │   └── deploy.yaml
├── Makefile
├── .gitignore
├── setup.py
├── README.md
├── index.ipynb
├── LICENSE
└── 00_core.ipynb


/.nojeklyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test_files/requirements.txt:
--------------------------------------------------------------------------------
1 | fastcore
2 | timm


--------------------------------------------------------------------------------
/fastkaggle/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.8"
2 | from .core import *
3 | 
4 | 


--------------------------------------------------------------------------------
/images/library-fastkaggle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastai/fastkaggle/master/images/library-fastkaggle.png


--------------------------------------------------------------------------------
/images/libraries-pawpularity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastai/fastkaggle/master/images/libraries-pawpularity.png


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include settings.ini
2 | include LICENSE
3 | include CONTRIBUTING.md
4 | include README.md
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
 1 | .cell-output pre {
 2 |     margin-left: 0.8rem;
 3 |     margin-top: 0;
 4 |     background: none;
 5 |     border-left: 2px solid lightsalmon;
 6 |     border-top-left-radius: 0;
 7 |     border-top-right-radius: 0;
 8 |   }
 9 |   
10 |   .cell-output .sourceCode {
11 |     background: none;
12 |     margin-top: 0;
13 |   }
14 |   
15 |   .cell > .sourceCode {
16 |     margin-bottom: 0;
17 |   }
18 |   


--------------------------------------------------------------------------------
/fastkaggle/_nbdev.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT!
 2 | 
 3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"]
 4 | 
 5 | index = {"iskaggle": "00_core.ipynb",
 6 |          "setup_comp": "00_core.ipynb",
 7 |          "nb_meta": "00_core.ipynb",
 8 |          "push_notebook": "00_core.ipynb"}
 9 | 
10 | modules = ["core.py"]
11 | 
12 | doc_url = "https://fastai.github.io/fastkaggle/"
13 | 
14 | git_url = "https://github.com/fastai/fastkaggle/tree/master/"
15 | 
16 | def custom_doc_links(name): return None
17 | 


--------------------------------------------------------------------------------
/install_quarto.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | install_linux() {
 4 | 		echo "...installing Quarto"
 5 | 		wget -nv https://www.quarto.org/download/latest/quarto-linux-amd64.deb
 6 | 		sudo dpkg -i *64.deb
 7 | 		rm *64.deb
 8 | }
 9 | 
10 | install_mac() {
11 | 		echo "...downloading Quarto installer"
12 | 		wget -nv https://www.quarto.org/download/latest/quarto-macos.pkg
13 | 		echo "...opening installer for Quarto"
14 | 		open quarto-macos.pkg
15 | }
16 | 
17 | case "$OSTYPE" in
18 | linux*)   install_linux ;;
19 | darwin*)  install_mac ;;
20 | *)        echo "make sure you install the latest version of quarto: https://quarto.org/docs/get-started/" ;;
21 | esac
22 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Release notes
 2 | 
 3 | <!-- do not remove -->
 4 | 
 5 | ## 0.0.7
 6 | 
 7 | ### New Features
 8 | 
 9 | - Datasets functionality + Docs ([#9](https://github.com/fastai/fastkaggle/pull/9)), thanks to [@Isaac-Flath](https://github.com/Isaac-Flath)
10 |   - 2 high level functions allow to either pass a list of libraries or a requirements.txt to maintain and update their own libraries as kaggle datasets.
11 | 
12 | 
13 | ## 0.0.6
14 | 
15 | ### Bugs Squashed
16 | 
17 | - fix comp should be competition in setup_comp" ([#3](https://github.com/fastai/fastkaggle/pull/3)), thanks to [@n-e-w](https://github.com/n-e-w)
18 | 
19 | 
20 | ## 0.0.4
21 | 
22 | ### Bugs Squashed
23 | 
24 | - api not exported ([#1](https://github.com/fastai/fastkaggle/issues/1))
25 | 
26 | 
27 | ## 0.0.1
28 | 
29 | - init release
30 | 
31 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
 1 | ipynb-filters: [nbdev_filter]
 2 | 
 3 | project:
 4 |   type: website
 5 |   output-dir: docs
 6 |   preview:
 7 |     port: 3000
 8 |     browser: false
 9 | 
10 | format:
11 |   html:
12 |     theme: cosmo
13 |     css: styles.css
14 |     toc: true
15 |     toc-depth: 4
16 | 
17 | website:
18 |   title: "fastkaggle"
19 |   site-url: "https://fastai.github.io/fastkaggle/"
20 |   description: "Kaggling for fast kagglers!"
21 |   execute: 
22 |     enabled: false
23 |   twitter-card: true
24 |   open-graph: true
25 |   reader-mode: true
26 |   repo-branch: master
27 |   repo-url: "https://github.com/fastai/fastkaggle/tree/master/"
28 |   repo-actions: [issue]
29 |   navbar:
30 |     background: primary
31 |     search: true
32 |     right:
33 |       - icon: github
34 |         href: "https://github.com/fastai/fastkaggle/tree/master/"
35 |   sidebar:
36 |     style: "floating"
37 | 
38 | metadata-files: 
39 |   - sidebar.yml
40 |   - custom.yml
41 | 


--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | host = github
 3 | lib_name = fastkaggle
 4 | description = Kaggling for fast kagglers!
 5 | copyright = Jeremy Howard, 2022 onwards
 6 | keywords = machine-learning kaggle fastai nbdev
 7 | user = fastai
 8 | author = Jeremy Howard
 9 | author_email = info@fast.ai
10 | branch = master
11 | version = 0.0.8
12 | min_python = 3.7
13 | audience = Developers
14 | language = English
15 | requirements = fastcore>=1.4.5 kaggle
16 | custom_sidebar = False
17 | license = apache2
18 | status = 2
19 | nbs_path = .
20 | doc_path = docs
21 | recursive = False
22 | tst_flags = notest
23 | doc_host = https://fastai.github.io
24 | doc_baseurl = /fastkaggle/
25 | git_url = https://github.com/fastai/fastkaggle/tree/master/
26 | lib_path = fastkaggle
27 | title = fastkaggle
28 | black_formatting = False
29 | readme_nb = index.ipynb
30 | allowed_metadata_keys = 
31 | allowed_cell_metadata_keys = 
32 | jupyter_hooks = True
33 | clean_ids = True
34 | 
35 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy to GitHub Pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |       - main
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   deploy:
12 |     name: Deploy to GitHub Pages
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/checkout@v3
16 |       - uses: actions/setup-python@v3
17 |       - name: Install Dependencies
18 |         run: |
19 |           python -m pip install --upgrade pip
20 |           pip install nbdev
21 |           make install
22 |       - name: Build website
23 |         env:
24 |           KAGGLE_USERNAME: test
25 |           KAGGLE_KEY: test
26 |         run: make docs
27 |       - name: Deploy to GitHub Pages
28 |         uses: peaceiris/actions-gh-pages@v3
29 |         with:
30 |           github_token: ${{ secrets.GITHUB_TOKEN }}
31 |           force_orphan: true
32 |           publish_dir: ./docs
33 |           # The following lines assign commit authorship to the official
34 |           # GH-Actions bot for deploys to `gh-pages` branch:
35 |           # https://github.com/actions/checkout/issues/13#issuecomment-724415212
36 |           # The GH actions bot is used by default if you didn't specify the two fields.
37 |           # You can swap them out with your own user credentials.
38 |           user_name: github-actions[bot]
39 |           user_email: 41898282+github-actions[bot]@users.noreply.github.com
40 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .ONESHELL:
 2 | SHELL := /bin/bash
 3 | 
 4 | exp:
 5 | 	nbdev_clean
 6 | 	nbdev_export
 7 | 
 8 | help: ## Show this help
 9 | 	@egrep -h '\s##\s' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
10 | 
11 | sync: ## Propagates any change in the modules (.py files) to the notebooks that created them 
12 | 	nbdev_update
13 | 
14 | deploy: docs ## Push local docs to gh-pages branch
15 | 	nbdev_ghp_deploy
16 | 
17 | preview: ## Live preview quarto docs with hot reloading.
18 | 	nbdev_sidebar
19 | 	nbdev_export
20 | 	IN_TEST=1 &&  nbdev_quarto --preview
21 | 
22 | docs: .FORCE ## Build quarto docs and put them into folder specified in `doc_path` in settings.ini
23 | 	nbdev_export
24 | 	nbdev_quarto
25 | 
26 | prepare: ## Export notebooks to python modules, test code and clean notebooks.
27 | 	nbdev_export
28 | 	nbdev_test
29 | 	nbdev_clean
30 | 	
31 | test: ## Test notebooks
32 | 	nbdev_test
33 | 
34 | release_all: pypi release_conda ## Release python package on pypi and conda.  Also bumps version number automatically.
35 | 	nbdev_bump_version
36 | 	nbdev_export
37 | 
38 | release_pypi: pypi ## Release python package on pypi.  Also bumps version number automatically.
39 | 	nbdev_export
40 | 	nbdev_bump_version
41 | 
42 | release_conda:
43 | 	fastrelease_conda_package
44 | 
45 | pypi: dist
46 | 	twine upload --repository pypi dist/*
47 | 
48 | dist: clean
49 | 	python setup.py sdist bdist_wheel
50 | 
51 | clean:
52 | 	rm -rf dist
53 | 	
54 | 
55 | install: install_quarto ## Install quarto and the latest version of the local python pckage as an editable install
56 | 	pip install -e ".[dev]"
57 | 
58 | install_py: .FORCE
59 | 	nbdev_export
60 | 	pip install -e ".[dev]"
61 | 
62 | install_quarto: .FORCE ## Install the latest version of quarto for Mac and Linux.  Go to https://quarto.org/docs/get-started/ for Windows.
63 | 	./install_quarto.sh
64 | 
65 | .FORCE:
66 | 
67 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | sidebar.yml
  2 | conda/
  3 | titanic*
  4 | docs/
  5 | 
  6 | *.bak
  7 | .gitattributes
  8 | .last_checked
  9 | .gitconfig
 10 | *.bak
 11 | *.log
 12 | *~
 13 | ~*
 14 | _tmp*
 15 | tmp*
 16 | tags
 17 | *.pkg
 18 | 
 19 | # Byte-compiled / optimized / DLL files
 20 | __pycache__/
 21 | *.py[cod]
 22 | *$py.class
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | env/
 30 | build/
 31 | develop-eggs/
 32 | dist/
 33 | downloads/
 34 | eggs/
 35 | .eggs/
 36 | lib/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | wheels/
 42 | *.egg-info/
 43 | .installed.cfg
 44 | *.egg
 45 | 
 46 | # PyInstaller
 47 | #  Usually these files are written by a python script from a template
 48 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 49 | *.manifest
 50 | *.spec
 51 | 
 52 | # Installer logs
 53 | pip-log.txt
 54 | pip-delete-this-directory.txt
 55 | 
 56 | # Unit test / coverage reports
 57 | htmlcov/
 58 | .tox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | 
 75 | # Flask stuff:
 76 | instance/
 77 | .webassets-cache
 78 | 
 79 | # Scrapy stuff:
 80 | .scrapy
 81 | 
 82 | # Sphinx documentation
 83 | docs/_build/
 84 | 
 85 | # PyBuilder
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # dotenv
101 | .env
102 | 
103 | # virtualenv
104 | .venv
105 | venv/
106 | ENV/
107 | 
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 | 
112 | # Rope project settings
113 | .ropeproject
114 | 
115 | # mkdocs documentation
116 | /site
117 | 
118 | # mypy
119 | .mypy_cache/
120 | 
121 | .vscode
122 | *.swp
123 | 
124 | # osx generated files
125 | .DS_Store
126 | .DS_Store?
127 | .Trashes
128 | ehthumbs.db
129 | Thumbs.db
130 | .idea
131 | 
132 | # pytest
133 | .pytest_cache
134 | 
135 | # tools/trust-doc-nbs
136 | docs_src/.last_checked
137 | 
138 | # symlinks to fastai
139 | docs_src/fastai
140 | tools/fastai
141 | 
142 | # link checker
143 | checklink/cookies.txt
144 | 
145 | # .gitconfig is now autogenerated
146 | .gitconfig
147 | 
148 | 
149 | /.quarto/
150 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import parse_version
 2 | from configparser import ConfigParser
 3 | import setuptools
 4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
 5 | 
 6 | # note: all settings are in settings.ini; edit there, not here
 7 | config = ConfigParser(delimiters=['='])
 8 | config.read('settings.ini')
 9 | cfg = config['DEFAULT']
10 | 
11 | cfg_keys = 'version description keywords author author_email'.split()
12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
15 | 
16 | licenses = {
17 |     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
18 |     'mit': ('MIT License', 'OSI Approved :: MIT License'),
19 |     'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
20 |     'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
21 |     'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
22 | }
23 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
24 |     '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
25 | py_versions = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8'.split()
26 | 
27 | requirements = cfg.get('requirements','').split()
28 | min_python = cfg['min_python']
29 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
30 | dev_requirements = (cfg.get('dev_requirements') or '').split()
31 | 
32 | setuptools.setup(
33 |     name = cfg['lib_name'],
34 |     license = lic[0],
35 |     classifiers = [
36 |         'Development Status :: ' + statuses[int(cfg['status'])],
37 |         'Intended Audience :: ' + cfg['audience'].title(),
38 |         'Natural Language :: ' + cfg['language'].title(),
39 |     ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
40 |     url = cfg['git_url'],
41 |     packages = setuptools.find_packages(),
42 |     include_package_data = True,
43 |     install_requires = requirements,
44 |     extras_require={ 'dev': dev_requirements },
45 |     dependency_links = cfg.get('dep_links','').split(),
46 |     python_requires  = '>=' + cfg['min_python'],
47 |     long_description = open('README.md').read(),
48 |     long_description_content_type = 'text/markdown',
49 |     zip_safe = False,
50 |     entry_points = {
51 |         'console_scripts': cfg.get('console_scripts','').split(),
52 |         'mkdocs.plugins': [ 'rm_num_prefix = nbdev.mkdocs:RmNumPrefix' ],
53 |         'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
54 |     },
55 |     **setup_cfg)
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/fastkaggle/_modidx.py:
--------------------------------------------------------------------------------
 1 | # Autogenerated by nbdev
 2 | 
 3 | d = { 'settings': { 'allowed_cell_metadata_keys': '',
 4 |                 'allowed_metadata_keys': '',
 5 |                 'audience': 'Developers',
 6 |                 'author': 'Jeremy Howard',
 7 |                 'author_email': 'info@fast.ai',
 8 |                 'black_formatting': 'False',
 9 |                 'branch': 'master',
10 |                 'clean_ids': 'True',
11 |                 'copyright': 'Jeremy Howard, 2022 onwards',
12 |                 'custom_sidebar': 'False',
13 |                 'description': 'Kaggling for fast kagglers!',
14 |                 'doc_baseurl': '/fastkaggle/',
15 |                 'doc_host': 'https://fastai.github.io',
16 |                 'doc_path': 'docs',
17 |                 'git_url': 'https://github.com/fastai/fastkaggle/tree/master/',
18 |                 'host': 'github',
19 |                 'jupyter_hooks': 'True',
20 |                 'keywords': 'machine-learning kaggle fastai nbdev',
21 |                 'language': 'English',
22 |                 'lib_name': 'fastkaggle',
23 |                 'lib_path': 'fastkaggle',
24 |                 'license': 'apache2',
25 |                 'min_python': '3.7',
26 |                 'nbs_path': '.',
27 |                 'readme_nb': 'index.ipynb',
28 |                 'recursive': 'False',
29 |                 'requirements': 'fastcore>=1.4.5 kaggle',
30 |                 'status': '2',
31 |                 'title': 'fastkaggle',
32 |                 'tst_flags': 'notest',
33 |                 'user': 'fastai',
34 |                 'version': '0.0.8'},
35 |   'syms': { 'fastkaggle.core': { 'fastkaggle.core.check_ds_exists': 'https://fastai.github.io/fastkaggle/core.html#check_ds_exists',
36 |                                  'fastkaggle.core.create_libs_datasets': 'https://fastai.github.io/fastkaggle/core.html#create_libs_datasets',
37 |                                  'fastkaggle.core.create_requirements_dataset': 'https://fastai.github.io/fastkaggle/core.html#create_requirements_dataset',
38 |                                  'fastkaggle.core.get_dataset': 'https://fastai.github.io/fastkaggle/core.html#get_dataset',
39 |                                  'fastkaggle.core.get_local_ds_ver': 'https://fastai.github.io/fastkaggle/core.html#get_local_ds_ver',
40 |                                  'fastkaggle.core.get_pip_libraries': 'https://fastai.github.io/fastkaggle/core.html#get_pip_libraries',
41 |                                  'fastkaggle.core.get_pip_library': 'https://fastai.github.io/fastkaggle/core.html#get_pip_library',
42 |                                  'fastkaggle.core.import_kaggle': 'https://fastai.github.io/fastkaggle/core.html#import_kaggle',
43 |                                  'fastkaggle.core.iskaggle': 'https://fastai.github.io/fastkaggle/core.html#iskaggle',
44 |                                  'fastkaggle.core.mk_dataset': 'https://fastai.github.io/fastkaggle/core.html#mk_dataset',
45 |                                  'fastkaggle.core.nb_meta': 'https://fastai.github.io/fastkaggle/core.html#nb_meta',
46 |                                  'fastkaggle.core.push_dataset': 'https://fastai.github.io/fastkaggle/core.html#push_dataset',
47 |                                  'fastkaggle.core.push_notebook': 'https://fastai.github.io/fastkaggle/core.html#push_notebook',
48 |                                  'fastkaggle.core.setup_comp': 'https://fastai.github.io/fastkaggle/core.html#setup_comp'}}}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | fastkaggle
  2 | ================
  3 | 
  4 | <!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->
  5 | 
  6 | ## Install
  7 | 
  8 | Either:
  9 | 
 10 |     pip install fastkaggle
 11 | 
 12 | or:
 13 | 
 14 |     mamba install -c fastai fastkaggle
 15 | 
 16 | (or replace `mamba` with `conda` if you don’t mind it taking much longer
 17 | to run…)
 18 | 
 19 | ## How to use
 20 | 
 21 | ### Competition
 22 | 
 23 | This little library is where I’ll be putting snippets of stuff which are
 24 | useful on Kaggle. Functionality includes the following:
 25 | 
 26 | It defines
 27 | [`iskaggle`](https://fastai.github.io/fastkaggle/core.html#iskaggle)
 28 | which is `True` if you’re running on Kaggle:
 29 | 
 30 | ``` python
 31 | 'Kaggle' if iskaggle else 'Not Kaggle'
 32 | ```
 33 | 
 34 |     'Not Kaggle'
 35 | 
 36 | It provides a
 37 | [`setup_comp`](https://fastai.github.io/fastkaggle/core.html#setup_comp)
 38 | function which gets a path to the data for a competition, downloading it
 39 | if needed, and also installs any modules that might be missing or out of
 40 | data if running on Kaggle:
 41 | 
 42 | ``` python
 43 | setup_comp('titanic')
 44 | ```
 45 | 
 46 |     Path('titanic')
 47 | 
 48 | There’s also
 49 | [`push_notebook`](https://fastai.github.io/fastkaggle/core.html#push_notebook)
 50 | to push a notebook to Kaggle Notebooks, and
 51 | [`import_kaggle`](https://fastai.github.io/fastkaggle/core.html#import_kaggle)
 52 | to use the Kaggle API (even when you’re on Kaggle!) See the
 53 | `fastkaggle.core` docs for details.
 54 | 
 55 | ### Datasets
 56 | 
 57 | This section is designed to make uploading pip libraries to kaggle
 58 | datasets easy. There’s 2 primary high level functions to be used. First
 59 | we can define our kaggle username and the local path we want to use to
 60 | store datasets when we create them.
 61 | 
 62 | <div>
 63 | 
 64 | > **Usage tip**
 65 | >
 66 | > The purpose of this is to create datasets that can be used in no
 67 | > internet inference competitions to install libraries using
 68 | > `pip install -Uqq library --no-index --find-links=file:///kaggle/input/your_dataset/`
 69 | 
 70 | </div>
 71 | 
 72 | ``` python
 73 | lib_path = Path('/root/kaggle_datasets')
 74 | username = 'isaacflath'
 75 | ```
 76 | 
 77 | #### List of Libraries
 78 | 
 79 | We can take a list of libraries and upload them as seperate datasets.
 80 | For example the below will create a `library-fastcore` and
 81 | `library-timm` dataset. If they already exist, it will push a new
 82 | version if there is a more recent version available.
 83 | 
 84 | ``` python
 85 | libs = ['fastcore','timm']
 86 | create_libs_datasets(libs,lib_path,username)
 87 | ```
 88 | 
 89 |     Processing fastcore as library-fastcore at /root/kaggle_datasets/library-fastcore
 90 |     -----Downloading or Creating Dataset
 91 |     -----Checking dataset version against pip
 92 |     -----Kaggle dataset already up to date 1.5.16 to 1.5.16
 93 |     Processing timm as library-timm at /root/kaggle_datasets/library-timm
 94 |     -----Downloading or Creating Dataset
 95 |     -----Checking dataset version against pip
 96 |     -----Kaggle dataset already up to date 0.6.7 to 0.6.7
 97 |     Complete
 98 | 
 99 | This creates datasets in kaggle with the needed files.
100 | 
101 | ![Pawpularity Dataset](images/libraries-pawpularity.png)
102 | 
103 | #### requirements.txt
104 | 
105 | We can also create a singular dataset with multiple libraries based on a
106 | `requirements.txt` file for the project. If there are any different
107 | files it will push a new version.
108 | 
109 | ``` python
110 | create_requirements_dataset('test_files/requirements.txt',lib_path,'libraries-pawpularity', username)
111 | ```
112 | 
113 |     Processing libraries-pawpularity at /root/kaggle_datasets/libraries-pawpularity
114 |     -----Downloading or Creating Dataset
115 |     Data package template written to: /root/kaggle_datasets/libraries-pawpularity/dataset-metadata.json
116 |     -----Checking dataset version against pip
117 |     -----Updating libraries-pawpularity in Kaggle
118 |     Complete
119 | 
120 | This creats a dataset in kaggle with the needed files.
121 | 
122 | ![Fastkaggle Dataset](images/library-fastkaggle.png)
123 | 


--------------------------------------------------------------------------------
/index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#|hide\n",
 10 |     "from fastkaggle.core import *\n",
 11 |     "from pathlib import Path"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "markdown",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# fastkaggle\n",
 19 |     "\n",
 20 |     "> Kaggling for fast kagglers!"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Install"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "Either:\n",
 35 |     "\n",
 36 |     "    pip install fastkaggle\n",
 37 |     "\n",
 38 |     "or:\n",
 39 |     "\n",
 40 |     "    mamba install -c fastai fastkaggle\n",
 41 |     "\n",
 42 |     "(or replace `mamba` with `conda` if you don't mind it taking much longer to run...)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## How to use"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Competition"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "This little library is where I'll be putting snippets of stuff which are useful on Kaggle. Functionality includes the following:\n",
 64 |     "\n",
 65 |     "It defines `iskaggle` which is `True` if you're running on Kaggle:"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [
 73 |     {
 74 |      "data": {
 75 |       "text/plain": [
 76 |        "'Not Kaggle'"
 77 |       ]
 78 |      },
 79 |      "execution_count": null,
 80 |      "metadata": {},
 81 |      "output_type": "execute_result"
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "'Kaggle' if iskaggle else 'Not Kaggle'"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "It provides a `setup_comp` function which gets a path to the data for a competition, downloading it if needed, and also installs any modules that might be missing or out of data if running on Kaggle:"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "data": {
102 |       "text/plain": [
103 |        "Path('titanic')"
104 |       ]
105 |      },
106 |      "execution_count": null,
107 |      "metadata": {},
108 |      "output_type": "execute_result"
109 |     }
110 |    ],
111 |    "source": [
112 |     "setup_comp('titanic')"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "markdown",
117 |    "metadata": {},
118 |    "source": [
119 |     "There's also `push_notebook` to push a notebook to Kaggle Notebooks, and `import_kaggle` to use the Kaggle API (even when you're on Kaggle!) See the `fastkaggle.core` docs for details."
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "### Datasets"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "This section is designed to make uploading pip libraries to kaggle datasets easy.  There's 2 primary high level functions to be used.  First we can define our kaggle username and the local path we want to use to store datasets when we create them. \n",
134 |     "\n",
135 |     ":::{.callout-tip}\n",
136 |     "## Usage tip\n",
137 |     "The purpose of this is to create datasets that can be used in no internet inference competitions to install libraries using `pip install -Uqq library --no-index --find-links=file:///kaggle/input/your_dataset/`\n",
138 |     ":::"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "lib_path = Path.home()/'kaggle_datasets'\n",
148 |     "username = 'isaacflath'"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "#### List of Libraries\n",
156 |     "\n",
157 |     "We can take a list of libraries and upload them as seperate datasets.  For example the below will create a `library-fastcore` and `library-timm` dataset.  If they already exist, it will push a new version if there is a more recent version available."
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "Processing fastcore as library-fastcore at /Users/isaacflath/kaggle_datasets/library-fastcore\n",
170 |       "-----Downloading or Creating Dataset\n",
171 |       "-----Checking dataset version against pip\n",
172 |       "-----Kaggle dataset already up to date 1.5.16 to 1.5.16\n",
173 |       "Processing flask as library-flask at /Users/isaacflath/kaggle_datasets/library-flask\n",
174 |       "-----Downloading or Creating Dataset\n",
175 |       "-----Checking dataset version against pip\n",
176 |       "-----Kaggle dataset already up to date 2.2.2 to 2.2.2\n",
177 |       "Processing fastkaggle as library-fastkaggle at /Users/isaacflath/kaggle_datasets/library-fastkaggle\n",
178 |       "-----Downloading or Creating Dataset\n",
179 |       "-----Checking dataset version against pip\n",
180 |       "-----Kaggle dataset already up to date 0.0.6 to 0.0.6\n",
181 |       "Complete\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "libs = ['fastcore','flask','fastkaggle']\n",
187 |     "create_libs_datasets(libs,lib_path,username)"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "This creats datasets in kaggle with the needed files.  For example the library `fastkaggle` looks like this in kaggle.\n",
195 |     "\n",
196 |     "![Fastkaggle Dataset](images/library-fastkaggle.png)"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "#### requirements.txt \n",
204 |     "\n",
205 |     "We can also create a singular dataset with multiple libraries based on a `requirements.txt` file for the project.  If there are any different files it will push a new version."
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {},
212 |    "outputs": [
213 |     {
214 |      "name": "stdout",
215 |      "output_type": "stream",
216 |      "text": [
217 |       "Processing libraries-pawpularity at /root/kaggle_datasets/libraries-pawpularity\n",
218 |       "-----Downloading or Creating Dataset\n",
219 |       "Data package template written to: /root/kaggle_datasets/libraries-pawpularity/dataset-metadata.json\n",
220 |       "-----Checking dataset version against pip\n",
221 |       "-----Updating libraries-pawpularity in Kaggle\n",
222 |       "Complete\n"
223 |      ]
224 |     }
225 |    ],
226 |    "source": [
227 |     "create_requirements_dataset('test_files/requirements.txt',lib_path,'libraries-pawpularity', username)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "markdown",
232 |    "metadata": {},
233 |    "source": [
234 |     "This creates a dataset in kaggle with the needed files.\n",
235 |     "\n",
236 |     "![Pawpularity Dataset](images/libraries-pawpularity.png)"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {},
243 |    "outputs": [],
244 |    "source": []
245 |   }
246 |  ],
247 |  "metadata": {
248 |   "kernelspec": {
249 |    "display_name": "Python 3 (ipykernel)",
250 |    "language": "python",
251 |    "name": "python3"
252 |   }
253 |  },
254 |  "nbformat": 4,
255 |  "nbformat_minor": 4
256 | }
257 | 


--------------------------------------------------------------------------------
/fastkaggle/core.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: ../00_core.ipynb.
  2 | 
  3 | # %% auto 0
  4 | __all__ = ['iskaggle', 'import_kaggle', 'setup_comp', 'nb_meta', 'push_notebook', 'check_ds_exists', 'mk_dataset', 'get_dataset',
  5 |            'get_pip_library', 'get_pip_libraries', 'push_dataset', 'get_local_ds_ver', 'create_libs_datasets',
  6 |            'create_requirements_dataset']
  7 | 
  8 | # %% ../00_core.ipynb 3
  9 | import os,json,subprocess, shutil
 10 | import re
 11 | from fastcore.utils import *
 12 | # from fastcore.all import *
 13 | 
 14 | # %% ../00_core.ipynb 4
 15 | iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
 16 | 
 17 | # %% ../00_core.ipynb 5
 18 | def import_kaggle():
 19 |     "Import kaggle API, using Kaggle secrets `kaggle_username` and `kaggle_key` if needed"
 20 |     if iskaggle:
 21 |         from kaggle_secrets import UserSecretsClient
 22 |         sec = UserSecretsClient()
 23 |         os.environ['KAGGLE_USERNAME'] = sec.get_secret("kaggle_username")
 24 |         if not os.environ['KAGGLE_USERNAME']: raise Exception("Please insert your Kaggle username and key into Kaggle secrets")
 25 |         os.environ['KAGGLE_KEY'] = sec.get_secret("kaggle_key")
 26 |     from kaggle import api
 27 |     return api
 28 | 
 29 | # %% ../00_core.ipynb 7
 30 | def setup_comp(competition, install=''):
 31 |     "Get a path to data for `competition`, downloading it if needed"
 32 |     if iskaggle:
 33 |         if install:
 34 |             os.system(f'pip install -Uqq {install}')
 35 |         return Path('../input')/competition
 36 |     else:
 37 |         path = Path(competition)
 38 |         api = import_kaggle()
 39 |         if not path.exists():
 40 |             import zipfile
 41 |             api.competition_download_cli(str(competition))
 42 |             zipfile.ZipFile(f'{competition}.zip').extractall(str(competition))
 43 |         return path
 44 | 
 45 | # %% ../00_core.ipynb 10
 46 | def nb_meta(user, id, title, file, competition=None, private=True, gpu=False, internet=True, linked_datasets=None):
 47 |     "Get the `dict` required for a kernel-metadata.json file"
 48 |     d = {
 49 |       "id": f"{user}/{id}",
 50 |       "title": title,
 51 |       "code_file": file,
 52 |       "language": "python",
 53 |       "kernel_type": "notebook",
 54 |       "is_private": private,
 55 |       "enable_gpu": gpu,
 56 |       "enable_internet": internet,
 57 |       "keywords": [],
 58 |       "dataset_sources": linked_datasets if linked_datasets else [],
 59 |       "kernel_sources": []
 60 |     }
 61 |     if competition: d["competition_sources"] = [f"competitions/{competition}"]
 62 |     return d
 63 | 
 64 | # %% ../00_core.ipynb 12
 65 | def push_notebook(user, id, title, file, path='.', competition=None, private=True, gpu=False, internet=True, linked_datasets=None):
 66 |     "Push notebook `file` to Kaggle Notebooks"
 67 |     meta = nb_meta(user, id, title, file=file, competition=competition, private=private, gpu=gpu, internet=internet, linked_datasets=linked_datasets)
 68 |     path = Path(path)
 69 |     nm = 'kernel-metadata.json'
 70 |     path.mkdir(exist_ok=True, parents=True)
 71 |     with open(path/nm, 'w') as f: json.dump(meta, f, indent=2)
 72 |     api = import_kaggle()
 73 |     api.kernels_push_cli(str(path))
 74 | 
 75 | # %% ../00_core.ipynb 16
 76 | def check_ds_exists(dataset_slug # Dataset slug (ie "zillow/zecon")
 77 |                    ):
 78 |     '''Checks if a dataset exists in kaggle and returns boolean'''
 79 |     api = import_kaggle()
 80 |     ds_search = L(api.dataset_list(mine=True)).filter(lambda x: str(x)==dataset_slug)
 81 |     if len(ds_search)==1: return True
 82 |     elif len(ds_search)==0: return False
 83 |     else: raise exception("Multiple datasets found - Check Manually")
 84 | 
 85 | # %% ../00_core.ipynb 17
 86 | def mk_dataset(dataset_path, # Local path to create dataset in
 87 |                title, # Name of the dataset
 88 |                force=False, # Should it overwrite or error if exists?
 89 |                upload=True # Should it upload and create on kaggle
 90 |               ):
 91 |     '''Creates minimal dataset metadata needed to push new dataset to kaggle'''
 92 |     dataset_path = Path(dataset_path)
 93 |     dataset_path.mkdir(exist_ok=force,parents=True)
 94 |     api = import_kaggle()
 95 |     api.dataset_initialize(dataset_path)
 96 |     md = json.load(open(dataset_path/'dataset-metadata.json'))
 97 |     md['title'] = title
 98 |     md['id'] = md['id'].replace('INSERT_SLUG_HERE',title)
 99 |     json.dump(md,open(dataset_path/'dataset-metadata.json','w'))
100 |     if upload: (dataset_path/'empty.txt').touch()
101 |     api.dataset_create_new(str(dataset_path),public=True,dir_mode='zip',quiet=True)
102 | 
103 | # %% ../00_core.ipynb 19
104 | def get_dataset(dataset_path, # Local path to download dataset to
105 |                 dataset_slug, # Dataset slug (ie "zillow/zecon")
106 |                 unzip=True, # Should it unzip after downloading?
107 |                 force=False # Should it overwrite or error if dataset_path exists?
108 |                ):
109 |     '''Downloads an existing dataset and metadata from kaggle'''
110 |     if not force: assert not Path(dataset_path).exists()
111 |     api = import_kaggle()
112 |     api.dataset_metadata(dataset_slug,str(dataset_path))
113 |     api.dataset_download_files(dataset_slug,str(dataset_path))
114 |     if unzip:
115 |         zipped_file = Path(dataset_path)/f"{dataset_slug.split('/')[-1]}.zip"
116 |         import zipfile
117 |         with zipfile.ZipFile(zipped_file, 'r') as zip_ref:
118 |             zip_ref.extractall(Path(dataset_path))
119 |         zipped_file.unlink()
120 |     
121 | 
122 | # %% ../00_core.ipynb 20
123 | def get_pip_library(dataset_path, # Local path to download pip library to
124 |                     pip_library, # name of library for pip to install
125 |                     pip_cmd="pip" # pip base to use (ie "pip3" or "pip")
126 |                    ):    
127 |     '''Download the whl files for pip_library and store in dataset_path'''
128 |     bashCommand = f"{pip_cmd} download {pip_library} -d {dataset_path}"
129 |     process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
130 |     output, error = process.communicate()
131 | 
132 | # %% ../00_core.ipynb 21
133 | def get_pip_libraries(dataset_path, # Local path to download pip library to
134 |                     requirements_path, # path to requirements file
135 |                       pip_cmd="pip" # pip base to use (ie "pip3" or "pip")
136 |                      ):
137 |     '''Download whl files for a requirements.txt file and store in dataset_path'''
138 |     bashCommand = f"{pip_cmd} download -r {requirements_path} -d {dataset_path}"
139 |     process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
140 |     output, error = process.communicate()
141 | 
142 | # %% ../00_core.ipynb 23
143 | def push_dataset(dataset_path, # Local path where dataset is stored 
144 |                  version_comment # Comment associated with this dataset update
145 |                 ):
146 |     '''Push dataset update to kaggle.  Dataset path must contain dataset metadata file'''
147 |     api = import_kaggle()
148 |     api.dataset_create_version(str(dataset_path),version_comment,dir_mode='zip',quiet=True)
149 | 
150 | # %% ../00_core.ipynb 24
151 | def get_local_ds_ver(lib_path, # Local path dataset is stored in
152 |                      lib # Name of library (ie "fastcore")
153 |                     ):
154 |     '''checks a local copy of kaggle dataset for library version number'''
155 |     wheel_lib_name = lib.replace('-','_')
156 |     local_path = (lib_path/f"library-{lib}")
157 |     lib_whl = local_path.ls().filter(lambda x: wheel_lib_name in x.name.lower())
158 |     if 1==len(lib_whl):
159 |         return re.search(f"(?<={wheel_lib_name}-)[\d+.]+\d",lib_whl[0].name.lower())[0]
160 |     elif 0<len(local_path.ls().filter(lambda x: 'dist' in x.name)):
161 |         lib_whl = (local_path/'dist').ls().filter(lambda x: wheel_lib_name in x.name.lower())
162 |         if 1==len(lib_whl):
163 |             return re.search(f"(?<={wheel_lib_name}-)[\d+.]+\d",lib_whl[0].name.lower())[0]
164 |     return None
165 | 
166 | # %% ../00_core.ipynb 26
167 | def create_libs_datasets(libs, # library or list of libraries to create datasets for (ie 'fastcore or ['fastcore','fastkaggle']
168 |                          lib_path, # Local path to dl/create dataset
169 |                          username, # You username
170 |                          clear_after=False # Delete local copies after sync with kaggle?
171 |                         ):
172 |     '''For each library, create or update a kaggle dataset with the latest version'''
173 |     if type(libs)==str: libs = [libs] 
174 |     
175 |     retain = ["dataset-metadata.json"]
176 |     for lib in libs:
177 |         title = f"library-{lib}"
178 |         local_path = lib_path/title
179 |         print(f"{lib} | Processing as {title} at {local_path}")
180 |         if Path(local_path).exists(): shutil.rmtree(local_path)
181 | 
182 |         print(f"{lib} | Downloading or Creating Dataset")
183 |         try: get_dataset(local_path,f"{username}/{title}",force=True)
184 |         except Exception as ex:
185 |             if '404' in str(ex): mk_dataset(local_path,title,force=True)
186 |             else: raise ex
187 |             
188 |         print(f"{lib} | Checking dataset version against pip")
189 |         ver_local_orig = get_local_ds_ver(lib_path,lib)
190 | 
191 |         for item in local_path.ls():
192 |             if item.name not in retain: 
193 |                 if item.is_dir(): shutil.rmtree(item)
194 |                 else: item.unlink()
195 |         get_pip_library(local_path,lib)
196 |         ver_local_new = get_local_ds_ver(lib_path,lib)
197 |         if (ver_local_new != ver_local_orig) or (ver_local_new==None and ver_local_orig==None): 
198 |             print(f"{lib} | Updating {lib} in Kaggle from {ver_local_orig} to {ver_local_new}")
199 |             
200 |             push_dataset(local_path,ifnone (ver_local_new, "Version Unknown"))
201 |         else: print(f"{lib} | Kaggle dataset already up to date {ver_local_orig} to {ver_local_new}")
202 |         if clear_after: shutil.rmtree(local_path)
203 |         print(f"{lib} | Complete")
204 | 
205 | # %% ../00_core.ipynb 27
206 | def create_requirements_dataset(req_fpath, # Path to requirements.txt file
207 |                                 lib_path,#Local path to dl/create dataset
208 |                                 title, # Title you want the kaggle dataset named
209 |                                 username, # you username
210 |                                 retain = ["dataset-metadata.json"], # Files that should not be removed
211 |                                 version_notes = "New Update"
212 |                                ):
213 |     '''Download everything needed in a `requirements.txt` file to a dataset and upload to kaggle'''
214 |     local_path = lib_path/title
215 |     print(f"Processing {title} at {local_path}")
216 |     if Path(local_path).exists(): shutil.rmtree(local_path)
217 | 
218 |     print(f"-----Downloading or Creating Dataset")
219 |     if check_ds_exists(f"{username}/{title}"): 
220 |         get_dataset(local_path,f"{username}/{title}",force=True)
221 |     else:                                       
222 |         mk_dataset(local_path,title,force=True)
223 | 
224 |     print(f"-----Checking dataset version against pip")
225 |     orig_ds = Path(local_path).ls().sorted()
226 |     for item in local_path.ls():
227 |         if item.name not in retain: 
228 |             if item.is_dir(): shutil.rmtree(item)
229 |             else: item.unlink()
230 |     get_pip_libraries(local_path,req_fpath) 
231 |     
232 |     new_ds = Path(local_path).ls().sorted()
233 |     
234 |     if orig_ds != new_ds: 
235 |         print(f"-----Updating {title} in Kaggle")
236 |         push_dataset(local_path,version_notes)
237 |     else: print(f"-----Kaggle dataset already up to date")
238 |     print('Complete')
239 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/00_core.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "#| default_exp core"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# fastkaggle.core\n",
 17 |     "\n",
 18 |     "> API details for fastkaggle."
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#|hide\n",
 28 |     "from nbdev.showdoc import *"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "#|export\n",
 38 |     "import os,json,subprocess, shutil\n",
 39 |     "import re\n",
 40 |     "from fastcore.utils import *\n",
 41 |     "# from fastcore.all import *"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": null,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "#|export\n",
 51 |     "iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "#|export\n",
 61 |     "def import_kaggle():\n",
 62 |     "    \"Import kaggle API, using Kaggle secrets `kaggle_username` and `kaggle_key` if needed\"\n",
 63 |     "    if iskaggle:\n",
 64 |     "        from kaggle_secrets import UserSecretsClient\n",
 65 |     "        sec = UserSecretsClient()\n",
 66 |     "        os.environ['KAGGLE_USERNAME'] = sec.get_secret(\"kaggle_username\")\n",
 67 |     "        if not os.environ['KAGGLE_USERNAME']: raise Exception(\"Please insert your Kaggle username and key into Kaggle secrets\")\n",
 68 |     "        os.environ['KAGGLE_KEY'] = sec.get_secret(\"kaggle_key\")\n",
 69 |     "    from kaggle import api\n",
 70 |     "    return api"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "data": {
 80 |       "text/plain": [
 81 |        "(#20) [contradictory-my-dear-watson,gan-getting-started,store-sales-time-series-forecasting,tpu-getting-started,digit-recognizer,titanic,house-prices-advanced-regression-techniques,connectx,nlp-getting-started,spaceship-titanic...]"
 82 |       ]
 83 |      },
 84 |      "execution_count": null,
 85 |      "metadata": {},
 86 |      "output_type": "execute_result"
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "api = import_kaggle()\n",
 91 |     "L(api.competitions_list())"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "#|export\n",
101 |     "def setup_comp(competition, install=''):\n",
102 |     "    \"Get a path to data for `competition`, downloading it if needed\"\n",
103 |     "    if iskaggle:\n",
104 |     "        if install:\n",
105 |     "            os.system(f'pip install -Uqq {install}')\n",
106 |     "        return Path('../input')/competition\n",
107 |     "    else:\n",
108 |     "        path = Path(competition)\n",
109 |     "        api = import_kaggle()\n",
110 |     "        if not path.exists():\n",
111 |     "            import zipfile\n",
112 |     "            api.competition_download_cli(str(competition))\n",
113 |     "            zipfile.ZipFile(f'{competition}.zip').extractall(str(competition))\n",
114 |     "        return path"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": null,
120 |    "metadata": {},
121 |    "outputs": [
122 |     {
123 |      "data": {
124 |       "text/plain": [
125 |        "Path('titanic')"
126 |       ]
127 |      },
128 |      "execution_count": null,
129 |      "metadata": {},
130 |      "output_type": "execute_result"
131 |     }
132 |    ],
133 |    "source": [
134 |     "setup_comp('titanic')"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "If you pass a list of space separated modules to `install`, they'll be installed if running on Kaggle."
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": null,
147 |    "metadata": {},
148 |    "outputs": [],
149 |    "source": [
150 |     "#|export\n",
151 |     "def nb_meta(user, id, title, file, competition=None, private=True, gpu=False, internet=True, linked_datasets=None):\n",
152 |     "    \"Get the `dict` required for a kernel-metadata.json file\"\n",
153 |     "    d = {\n",
154 |     "      \"id\": f\"{user}/{id}\",\n",
155 |     "      \"title\": title,\n",
156 |     "      \"code_file\": file,\n",
157 |     "      \"language\": \"python\",\n",
158 |     "      \"kernel_type\": \"notebook\",\n",
159 |     "      \"is_private\": private,\n",
160 |     "      \"enable_gpu\": gpu,\n",
161 |     "      \"enable_internet\": internet,\n",
162 |     "      \"keywords\": [],\n",
163 |     "      \"dataset_sources\": linked_datasets if linked_datasets else [],\n",
164 |     "      \"kernel_sources\": []\n",
165 |     "    }\n",
166 |     "    if competition: d[\"competition_sources\"] = [f\"competitions/{competition}\"]\n",
167 |     "    return d"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "data": {
177 |       "text/plain": [
178 |        "{'id': 'jhoward/my-notebook',\n",
179 |        " 'title': 'My notebook',\n",
180 |        " 'code_file': 'my-notebook.ipynb',\n",
181 |        " 'language': 'python',\n",
182 |        " 'kernel_type': 'notebook',\n",
183 |        " 'is_private': True,\n",
184 |        " 'enable_gpu': False,\n",
185 |        " 'enable_internet': True,\n",
186 |        " 'keywords': [],\n",
187 |        " 'dataset_sources': [],\n",
188 |        " 'kernel_sources': [],\n",
189 |        " 'competition_sources': ['competitions/paddy-disease-classification']}"
190 |       ]
191 |      },
192 |      "execution_count": null,
193 |      "metadata": {},
194 |      "output_type": "execute_result"
195 |     }
196 |    ],
197 |    "source": [
198 |     "nb_meta('jhoward', 'my-notebook', 'My notebook', 'my-notebook.ipynb', competition='paddy-disease-classification')"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "#|export\n",
208 |     "def push_notebook(user, id, title, file, path='.', competition=None, private=True, gpu=False, internet=True, linked_datasets=None):\n",
209 |     "    \"Push notebook `file` to Kaggle Notebooks\"\n",
210 |     "    meta = nb_meta(user, id, title, file=file, competition=competition, private=private, gpu=gpu, internet=internet, linked_datasets=linked_datasets)\n",
211 |     "    path = Path(path)\n",
212 |     "    nm = 'kernel-metadata.json'\n",
213 |     "    path.mkdir(exist_ok=True, parents=True)\n",
214 |     "    with open(path/nm, 'w') as f: json.dump(meta, f, indent=2)\n",
215 |     "    api = import_kaggle()\n",
216 |     "    api.kernels_push_cli(str(path))"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "markdown",
221 |    "metadata": {},
222 |    "source": [
223 |     "Note that Kaggle recommends that the `id` match the *slug* for the title -- i.e it should be the same as the title, but lowercase, no punctuation, and spaces replaced with dashes. E.g:\n",
224 |     "\n",
225 |     "```python\n",
226 |     "push_notebook('jhoward', 'first-steps-road-to-the-top-part-1',\n",
227 |     "              title='First Steps: Road to the Top, Part 1',\n",
228 |     "              file='first-steps-road-to-the-top-part-1.ipynb',\n",
229 |     "              competition='paddy-disease-classification',\n",
230 |     "              private=False, gpu=True)\n",
231 |     "```"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "## Datasets"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "### Core"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": null,
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "#| export\n",
255 |     "def check_ds_exists(dataset_slug # Dataset slug (ie \"zillow/zecon\")\n",
256 |     "                   ):\n",
257 |     "    '''Checks if a dataset exists in kaggle and returns boolean'''\n",
258 |     "    api = import_kaggle()\n",
259 |     "    ds_search = L(api.dataset_list(mine=True)).filter(lambda x: str(x)==dataset_slug)\n",
260 |     "    if len(ds_search)==1: return True\n",
261 |     "    elif len(ds_search)==0: return False\n",
262 |     "    else: raise exception(\"Multiple datasets found - Check Manually\")"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "#| export\n",
272 |     "def mk_dataset(dataset_path, # Local path to create dataset in\n",
273 |     "               title, # Name of the dataset\n",
274 |     "               force=False, # Should it overwrite or error if exists?\n",
275 |     "               upload=True # Should it upload and create on kaggle\n",
276 |     "              ):\n",
277 |     "    '''Creates minimal dataset metadata needed to push new dataset to kaggle'''\n",
278 |     "    dataset_path = Path(dataset_path)\n",
279 |     "    dataset_path.mkdir(exist_ok=force,parents=True)\n",
280 |     "    api = import_kaggle()\n",
281 |     "    api.dataset_initialize(dataset_path)\n",
282 |     "    md = json.load(open(dataset_path/'dataset-metadata.json'))\n",
283 |     "    md['title'] = title\n",
284 |     "    md['id'] = md['id'].replace('INSERT_SLUG_HERE',title)\n",
285 |     "    json.dump(md,open(dataset_path/'dataset-metadata.json','w'))\n",
286 |     "    if upload: (dataset_path/'empty.txt').touch()\n",
287 |     "    api.dataset_create_new(str(dataset_path),public=True,dir_mode='zip',quiet=True)"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "code",
292 |    "execution_count": null,
293 |    "metadata": {},
294 |    "outputs": [
295 |     {
296 |      "name": "stdout",
297 |      "output_type": "stream",
298 |      "text": [
299 |       "Data package template written to: testds/dataset-metadata.json\n"
300 |      ]
301 |     }
302 |    ],
303 |    "source": [
304 |     "mk_dataset('./testds','mytestds',force=True)\n",
305 |     "md = json.load(open('./testds/dataset-metadata.json'))\n",
306 |     "assert md['title'] == 'mytestds'\n",
307 |     "assert md['id'].endswith('/mytestds')"
308 |    ]
309 |   },
310 |   {
311 |    "cell_type": "code",
312 |    "execution_count": null,
313 |    "metadata": {},
314 |    "outputs": [],
315 |    "source": [
316 |     "#| export\n",
317 |     "def get_dataset(dataset_path, # Local path to download dataset to\n",
318 |     "                dataset_slug, # Dataset slug (ie \"zillow/zecon\")\n",
319 |     "                unzip=True, # Should it unzip after downloading?\n",
320 |     "                force=False # Should it overwrite or error if dataset_path exists?\n",
321 |     "               ):\n",
322 |     "    '''Downloads an existing dataset and metadata from kaggle'''\n",
323 |     "    if not force: assert not Path(dataset_path).exists()\n",
324 |     "    api = import_kaggle()\n",
325 |     "    api.dataset_metadata(dataset_slug,str(dataset_path))\n",
326 |     "    api.dataset_download_files(dataset_slug,str(dataset_path))\n",
327 |     "    if unzip:\n",
328 |     "        zipped_file = Path(dataset_path)/f\"{dataset_slug.split('/')[-1]}.zip\"\n",
329 |     "        import zipfile\n",
330 |     "        with zipfile.ZipFile(zipped_file, 'r') as zip_ref:\n",
331 |     "            zip_ref.extractall(Path(dataset_path))\n",
332 |     "        zipped_file.unlink()\n",
333 |     "    "
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": [
342 |     "#| export\n",
343 |     "def get_pip_library(dataset_path, # Local path to download pip library to\n",
344 |     "                    pip_library, # name of library for pip to install\n",
345 |     "                    pip_cmd=\"pip\" # pip base to use (ie \"pip3\" or \"pip\")\n",
346 |     "                   ):    \n",
347 |     "    '''Download the whl files for pip_library and store in dataset_path'''\n",
348 |     "    bashCommand = f\"{pip_cmd} download {pip_library} -d {dataset_path}\"\n",
349 |     "    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)\n",
350 |     "    output, error = process.communicate()"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "#| export\n",
360 |     "def get_pip_libraries(dataset_path, # Local path to download pip library to\n",
361 |     "                    requirements_path, # path to requirements file\n",
362 |     "                      pip_cmd=\"pip\" # pip base to use (ie \"pip3\" or \"pip\")\n",
363 |     "                     ):\n",
364 |     "    '''Download whl files for a requirements.txt file and store in dataset_path'''\n",
365 |     "    bashCommand = f\"{pip_cmd} download -r {requirements_path} -d {dataset_path}\"\n",
366 |     "    process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)\n",
367 |     "    output, error = process.communicate()"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "code",
372 |    "execution_count": null,
373 |    "metadata": {},
374 |    "outputs": [],
375 |    "source": [
376 |     "dl_path = Path('./mylib')\n",
377 |     "get_pip_library(dl_path,'fastkaggle')\n",
378 |     "assert 1==len([o for o in dl_path.ls() if str(o).startswith(f\"{dl_path}/fastkaggle\")])"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "metadata": {},
385 |    "outputs": [],
386 |    "source": [
387 |     "#| export\n",
388 |     "def push_dataset(dataset_path, # Local path where dataset is stored \n",
389 |     "                 version_comment # Comment associated with this dataset update\n",
390 |     "                ):\n",
391 |     "    '''Push dataset update to kaggle.  Dataset path must contain dataset metadata file'''\n",
392 |     "    api = import_kaggle()\n",
393 |     "    api.dataset_create_version(str(dataset_path),version_comment,dir_mode='zip',quiet=True)"
394 |    ]
395 |   },
396 |   {
397 |    "cell_type": "code",
398 |    "execution_count": null,
399 |    "metadata": {},
400 |    "outputs": [],
401 |    "source": [
402 |     "#| export\n",
403 |     "def get_local_ds_ver(lib_path, # Local path dataset is stored in\n",
404 |     "                     lib # Name of library (ie \"fastcore\")\n",
405 |     "                    ):\n",
406 |     "    '''checks a local copy of kaggle dataset for library version number'''\n",
407 |     "    wheel_lib_name = lib.replace('-','_')\n",
408 |     "    local_path = (lib_path/f\"library-{lib}\")\n",
409 |     "    lib_whl = local_path.ls().filter(lambda x: wheel_lib_name in x.name.lower())\n",
410 |     "    if 1==len(lib_whl):\n",
411 |     "        return re.search(f\"(?<={wheel_lib_name}-)[\\d+.]+\\d\",lib_whl[0].name.lower())[0]\n",
412 |     "    elif 0<len(local_path.ls().filter(lambda x: 'dist' in x.name)):\n",
413 |     "        lib_whl = (local_path/'dist').ls().filter(lambda x: wheel_lib_name in x.name.lower())\n",
414 |     "        if 1==len(lib_whl):\n",
415 |     "            return re.search(f\"(?<={wheel_lib_name}-)[\\d+.]+\\d\",lib_whl[0].name.lower())[0]\n",
416 |     "    return None"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "markdown",
421 |    "metadata": {},
422 |    "source": [
423 |     "### High Level"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "#| export\n",
433 |     "def create_libs_datasets(libs, # library or list of libraries to create datasets for (ie 'fastcore or ['fastcore','fastkaggle']\n",
434 |     "                         lib_path, # Local path to dl/create dataset\n",
435 |     "                         username, # You username\n",
436 |     "                         clear_after=False # Delete local copies after sync with kaggle?\n",
437 |     "                        ):\n",
438 |     "    '''For each library, create or update a kaggle dataset with the latest version'''\n",
439 |     "    if type(libs)==str: libs = [libs] \n",
440 |     "    \n",
441 |     "    retain = [\"dataset-metadata.json\"]\n",
442 |     "    for lib in libs:\n",
443 |     "        title = f\"library-{lib}\"\n",
444 |     "        local_path = lib_path/title\n",
445 |     "        print(f\"{lib} | Processing as {title} at {local_path}\")\n",
446 |     "        if Path(local_path).exists(): shutil.rmtree(local_path)\n",
447 |     "\n",
448 |     "        print(f\"{lib} | Downloading or Creating Dataset\")\n",
449 |     "        try: get_dataset(local_path,f\"{username}/{title}\",force=True)\n",
450 |     "        except Exception as ex:\n",
451 |     "            if '404' in str(ex): mk_dataset(local_path,title,force=True)\n",
452 |     "            else: raise ex\n",
453 |     "            \n",
454 |     "        print(f\"{lib} | Checking dataset version against pip\")\n",
455 |     "        ver_local_orig = get_local_ds_ver(lib_path,lib)\n",
456 |     "\n",
457 |     "        for item in local_path.ls():\n",
458 |     "            if item.name not in retain: \n",
459 |     "                if item.is_dir(): shutil.rmtree(item)\n",
460 |     "                else: item.unlink()\n",
461 |     "        get_pip_library(local_path,lib)\n",
462 |     "        ver_local_new = get_local_ds_ver(lib_path,lib)\n",
463 |     "        if (ver_local_new != ver_local_orig) or (ver_local_new==None and ver_local_orig==None): \n",
464 |     "            print(f\"{lib} | Updating {lib} in Kaggle from {ver_local_orig} to {ver_local_new}\")\n",
465 |     "            \n",
466 |     "            push_dataset(local_path,ifnone (ver_local_new, \"Version Unknown\"))\n",
467 |     "        else: print(f\"{lib} | Kaggle dataset already up to date {ver_local_orig} to {ver_local_new}\")\n",
468 |     "        if clear_after: shutil.rmtree(local_path)\n",
469 |     "        print(f\"{lib} | Complete\")"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "code",
474 |    "execution_count": null,
475 |    "metadata": {},
476 |    "outputs": [],
477 |    "source": [
478 |     "#| export\n",
479 |     "def create_requirements_dataset(req_fpath, # Path to requirements.txt file\n",
480 |     "                                lib_path,#Local path to dl/create dataset\n",
481 |     "                                title, # Title you want the kaggle dataset named\n",
482 |     "                                username, # you username\n",
483 |     "                                retain = [\"dataset-metadata.json\"], # Files that should not be removed\n",
484 |     "                                version_notes = \"New Update\"\n",
485 |     "                               ):\n",
486 |     "    '''Download everything needed in a `requirements.txt` file to a dataset and upload to kaggle'''\n",
487 |     "    local_path = lib_path/title\n",
488 |     "    print(f\"Processing {title} at {local_path}\")\n",
489 |     "    if Path(local_path).exists(): shutil.rmtree(local_path)\n",
490 |     "\n",
491 |     "    print(f\"-----Downloading or Creating Dataset\")\n",
492 |     "    if check_ds_exists(f\"{username}/{title}\"): \n",
493 |     "        get_dataset(local_path,f\"{username}/{title}\",force=True)\n",
494 |     "    else:                                       \n",
495 |     "        mk_dataset(local_path,title,force=True)\n",
496 |     "\n",
497 |     "    print(f\"-----Checking dataset version against pip\")\n",
498 |     "    orig_ds = Path(local_path).ls().sorted()\n",
499 |     "    for item in local_path.ls():\n",
500 |     "        if item.name not in retain: \n",
501 |     "            if item.is_dir(): shutil.rmtree(item)\n",
502 |     "            else: item.unlink()\n",
503 |     "    get_pip_libraries(local_path,req_fpath) \n",
504 |     "    \n",
505 |     "    new_ds = Path(local_path).ls().sorted()\n",
506 |     "    \n",
507 |     "    if orig_ds != new_ds: \n",
508 |     "        print(f\"-----Updating {title} in Kaggle\")\n",
509 |     "        push_dataset(local_path,version_notes)\n",
510 |     "    else: print(f\"-----Kaggle dataset already up to date\")\n",
511 |     "    print('Complete')"
512 |    ]
513 |   },
514 |   {
515 |    "cell_type": "markdown",
516 |    "metadata": {},
517 |    "source": [
518 |     "## Export -"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {},
525 |    "outputs": [],
526 |    "source": [
527 |     "#|hide\n",
528 |     "#|eval: false\n",
529 |     "from nbdev.doclinks import nbdev_export\n",
530 |     "nbdev_export()"
531 |    ]
532 |   },
533 |   {
534 |    "cell_type": "code",
535 |    "execution_count": null,
536 |    "metadata": {},
537 |    "outputs": [],
538 |    "source": []
539 |   }
540 |  ],
541 |  "metadata": {
542 |   "kernelspec": {
543 |    "display_name": "Python 3 (ipykernel)",
544 |    "language": "python",
545 |    "name": "python3"
546 |   }
547 |  },
548 |  "nbformat": 4,
549 |  "nbformat_minor": 4
550 | }
551 | 


--------------------------------------------------------------------------------