├── .DS_Store
├── .gitignore
├── CODE_OF_CONDUCT.md
├── LICENSE
├── README.md
├── code
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   ├── a_dogscats_setup_data-checkpoint.ipynb
    │   └── a_setup_data-checkpoint.ipynb
    ├── README.md
    ├── a_dogscats_setup_data.ipynb
    ├── split_data.sh
    ├── subset_data.md
    ├── tree.md
    └── untar_weights_file.md
├── courses
    ├── .DS_Store
    ├── README.md
    ├── cla
    │   └── README.md
    ├── mes_projets
    │   ├── 0_login.md
    │   ├── 1_scp_data_aws.md
    │   ├── 2_split_data.md
    │   ├── README.md
    │   ├── a_dogscats_setup_data.ipynb
    │   ├── comments.md
    │   ├── get_data.md
    │   ├── test.md
    │   └── to_do.md
    ├── ml1
    │   ├── lesson_01.md
    │   ├── lesson_02.md
    │   ├── lesson_03.md
    │   ├── lesson_04.md
    │   └── lesson_05.md
    ├── nlp
    │   ├── README.md
    │   ├── videos_01_to_05.md
    │   └── videos_06_to_10.md
    ├── udacity_pytorch
    │   ├── README.md
    │   ├── images
    │   │   ├── .keep
    │   │   └── cnn_formulas.png
    │   ├── notes.md
    │   ├── orientation.md
    │   ├── pytorch_1_nanodegree.md
    │   ├── pytorch_2_.md
    │   ├── pytorch_3.md
    │   ├── pytorch_4_cnn.md
    │   ├── pytorch_5_style_transfer.md
    │   ├── pytorch_6_rnn.md
    │   └── softmax.ipynb
    ├── v2-dl1
    │   ├── .DS_Store
    │   ├── README.md
    │   ├── lesson_1a_course_intro.md
    │   ├── lesson_1b_cnn_tools.md
    │   ├── lesson_2_resnet34_resnext50.md
    │   ├── lesson_3_x.md
    │   ├── lesson_4_x.md
    │   ├── lesson_5_x.md
    │   ├── lesson_6_x.md
    │   └── lesson_7_x.md
    ├── v2-dl2
    │   ├── README.md
    │   ├── lesson_08.md
    │   ├── lesson_09.md
    │   ├── lesson_10_1.md
    │   ├── lesson_10_2.md
    │   ├── lesson_11_1.md
    │   └── lesson_11_2.md
    ├── v3-dl1
    │   ├── .DS_Store
    │   ├── .keep
    │   ├── README.md
    │   ├── gcp_0_setup_notes.md
    │   ├── gcp_1_logging_in.md
    │   ├── images
    │   │   ├── .keep
    │   │   ├── camel.jpeg
    │   │   ├── camels_class.png
    │   │   ├── camels_confusion.png
    │   │   ├── elephant1.png
    │   │   ├── elephant_cm.png
    │   │   ├── elephant_predict.png
    │   │   ├── gcp1.png
    │   │   ├── horse.jpeg
    │   │   ├── horses_txt.png
    │   │   ├── nyc_group.jpeg
    │   │   ├── rs_camel.jpg
    │   │   ├── soumith.jpg
    │   │   └── south_africa.png
    │   ├── kaggle_fruits.md
    │   ├── lesson_1_lecture.md
    │   ├── lesson_1_rs_camels_horses.md
    │   ├── lesson_2_1_lecture.md
    │   ├── lesson_2_2_lecture.md
    │   └── lesson_3_lecture.md
    ├── v3-dl2
    │   ├── README.md
    │   └── lecture_8.md
    └── v4-dl1
    │   ├── README.md
    │   ├── doc_Jupyter_01.md
    │   ├── doc_Jupyter_02_reference.md
    │   ├── image
    │       ├── .keep
    │       └── transforms.png
    │   ├── lesson_01.md
    │   ├── lesson_02.md
    │   ├── lesson_03.md
    │   ├── lesson_04.md
    │   ├── lesson_05_ethics.md
    │   ├── lesson_06.md
    │   ├── lesson_07.md
    │   ├── lesson_08_NLP.md
    │   └── paperspace.md
├── fastai_dl_course_v1.md
├── fastai_dl_course_v2.md
├── fastai_dl_course_v3.md
├── fastai_dl_terms.md
├── fastai_ml_course.md
├── googlefc30e18b4a9edaa2.html
├── helpful_linux_commands.md
├── images
    ├── chrome_curlwget.png
    ├── dl_libraries.png
    ├── image_downloader.png
    ├── lesson_08
    │   ├── lesson08_lr_find.png
    │   ├── lesson8_bbox.png
    │   ├── lesson8_dl_box.png
    │   ├── lesson8_embeddings.png
    │   ├── lesson8_learning.png
    │   ├── lesson8_learning2.png
    │   ├── lesson8_lr_find2.png
    │   ├── lesson8_matplotlib.png
    │   ├── lesson8_md.png
    │   ├── lesson8_motivation.png
    │   ├── lesson8_nb_pascal.png
    │   ├── lesson8_obj_det.png
    │   ├── lesson8_opps.png
    │   ├── lesson8_paper.png
    │   ├── lesson8_part1_2.png
    │   ├── lesson8_part2.png
    │   ├── lesson8_stage1.png
    │   ├── lesson8_step1.png
    │   ├── lesson8_transfer_learning.png
    │   ├── lesson8_visualize.png
    │   └── lesson8_x.png
    ├── lesson_09
    │   ├── .keep
    │   ├── lesson9_archit.png
    │   ├── lesson9_bbox.png
    │   ├── lesson9_data_loader.png
    │   ├── lesson9_know_these1.png
    │   └── lesson9_know_these2.png
    ├── lesson_11
    │   ├── .keep
    │   ├── lesson_11_charloop.png
    │   ├── lesson_11_nt.png
    │   ├── lesson_11_rnn.png
    │   ├── lesson_11_rnn2.png
    │   ├── lesson_11_rnn_stacked.png
    │   └── lesson_11_rnn_stacked2.png
    ├── ncm_gephi.jpg
    ├── paperspace.png
    ├── paperspace_fastai.png
    ├── paperspace_jupyter.png
    ├── pretrained_networks.png
    ├── softmax.png
    ├── tmux_start.png
    ├── tmux_summary.png
    └── triple_backticks.png
├── notes
    ├── competitions.md
    ├── deep_learning_libraries.md
    ├── imagenet.md
    ├── loss_functions.md
    ├── nlp_data.md
    └── nlp_terms.md
├── resources.md
├── takeaways.md
├── tips_faq_beginners.md
├── tips_prereqs.md
├── tips_troubleshooting.md
└── tools
    ├── README.md
    ├── aws_ami_gpu_setup.md
    ├── check_links.py
    ├── copy_files_local_to_cloud.md
    ├── create_keypair.md
    ├── crestle_run.md
    ├── download_data_browser_curlwget.md
    ├── download_data_curl.md
    ├── download_data_kaggle_cli.md
    ├── getting_image_data.md
    ├── jupyter_notebook.md
    ├── paperspace.md
    ├── setup_personal_dl_box.md
    ├── symlinks.md
    ├── temp
        ├── .keep
        └── index.html
    ├── tmux.md
    └── unix_linux.md


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tmp*
2 | tags
3 | data
4 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Reshama Shaikh
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # [fast.ai](http://www.fast.ai)
 2 | - latest course (v3):  https://course.fast.ai
 3 | - fastai on **GitHub:**  [fastai](https://github.com/fastai/fastai) 
 4 | - Data
 5 |     - [Torrents](http://academictorrents.com/browse.php?search=fastai&page=0)
 6 |     - [some fastai files](http://files.fast.ai) (files, models, data)
 7 | 
 8 | ## About Me 
 9 | * [My Blog](https://reshamas.github.io) (Reshama Shaikh)
10 | * Twitter: [@reshamas](https://twitter.com/reshamas)
11 | 
12 | 
13 | ## Projects
14 | - [Deploying Deep Learning Models On Web And Mobile](https://reshamas.github.io/deploying-deep-learning-models-on-web-and-mobile/) with [Nidhin Pattaniyil](https://www.linkedin.com/in/nidhinpattaniyil/)
15 | 
16 | ---
17 | 
18 | ## Courses (my outlines)
19 | 
20 | ### Deep Learning
21 | - [Version 3] (fastai_dl_course_v4.md) (Spring 2020) 
22 | - [Version 3](fastai_dl_course_v3.md) (Fall 2018 to Spring 2019) 
23 | - [Version 2](fastai_dl_course_v2.md) (Fall 2017 to Spring 2018)
24 | - [Version 1](fastai_dl_course_v1.md) (Fall 2016 to Spring 2017)  
25 | 
26 | ### Machine Learning
27 | - [Fall 2017](fastai_ml_course.md)
28 | 
29 | ---
30 | 
31 | ## Helpful Resources
32 | * [Directory of fastai and DL terms](fastai_dl_terms.md)
33 | * [Solving the Most Common Errors](tips_troubleshooting.md)
34 | * [Fastai FAQs for Beginners](tips_faq_beginners.md)
35 | * [30+ Best Practices](http://forums.fast.ai/t/30-best-practices/12344)
36 | * [Resources](resources.md) (Blogs Written by fastai Fellows / Research Papers, etc)
37 | * [Fastai Blog Posts](http://www.fast.ai/topics/) (by Rachel Thomas & Jeremy Howard)
38 |     - podcast with [Jeremy Howard on fastai_v1](https://twimlai.com/twiml-talk-186-the-fastai-v1-deep-learning-framework-with-jeremy-howard/) :red_circle:
39 |     - podcast with [Rachel Thomas](https://twimlai.com/twiml-talk-138-practical-deep-learning-with-rachel-thomas/)
40 | - [Jeremy's PyTorch Tutorial](https://github.com/fastai/fastai_old/blob/master/dev_nb/001a_nn_basics.ipynb) 
41 | 
42 | ## [Technical Tools](tools/)
43 | * [tmux on AWS](tools/tmux.md)
44 | * [Download data using Kaggle CLI](tools/download_data_kaggle_cli.md)
45 | * [Download data using Chrome and wget](tools/download_data_browser_curlwget.md)
46 | * [Jupyter Notebook Commands & Shortcuts](tools/jupyter_notebook.md)
47 | * [How to Create a Keypair](tools/create_keypair.md)
48 | * [Copy Files from Local PC to Cloud PC](tools/copy_files_local_to_cloud.md)
49 | 
50 | 
51 | ## Other Resources
52 | - [Publish notebooks as Github gists with a single button click!](https://jupyter-contrib-nbextensions.readthedocs.io/en/latest/nbextensions/gist_it/readme.html)
53 | - [Tips for building large image datasets](https://forums.fast.ai/t/tips-for-building-large-image-datasets/26688)
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/code/.DS_Store


--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
 1 | # Helpful Code
 2 | 
 3 | ### Utilities
 4 | - https://github.com/prairie-guy/ai_utilities
 5 | 
 6 | A set of scripts useful in deep learning and AI purposes, originally for use with `fast.ai` lectures and libraries.
 7 | - Set up `train` and `valid` directories for use in deep learning models.
 8 | - Download any number of images from Google image search. 
 9 | - Use 'file' to determine the type of picture then filter (keep) only pictures of a specified type.
10 | 
11 | 
12 | ### get size (shape) of image file
13 | ```python
14 | img.shape
15 | ```
16 | ```bash
17 | (198, 179, 3)
18 | ```
19 | 
20 | ### Validator Set Creator
21 | * https://github.com/Renga411/dl1.fastai/blob/master/Validation-set-creator.ipynb
22 | 


--------------------------------------------------------------------------------
/code/split_data.sh:
--------------------------------------------------------------------------------
 1 | # splitting data
 2 | # this is an example of camels/horses dataset
 3 | # ls camels | wc -l 
 4 | # 0.  shuffle the data
 5 | #     split the data into train/valid
 6 | # 1.  split the data for us into train/valid
 7 | # 2.  take a subset and split that into train/valid
 8 | 
 9 | sample_t=${1:-50}
10 | sample_v=${2:-20}
11 | data_f="camelshorses"
12 | sample_f="camelshorses_sample"
13 | category_1="camels"
14 | category_2="horses"
15 | 
16 | n_t=65
17 | n_v=35
18 | 
19 | ns_t=40
20 | ns_v=20
21 | outfile="shuffled.txt"
22 | 
23 | # create the folders
24 | mkdir -p data/$data_f/{train,valid}/{$category_1,$category_2}
25 | mkdir -p data/$sample_f/{train,valid}/{$category_1,$category_2}
26 | 
27 | # shuffle the data for LABEL #1
28 | # copy into train and valid folders
29 | 
30 | echo "print contents of shuffled.txt"
31 | echo "  "
32 | cat /tmp/shuffled.txt
33 | 
34 | start_dir="data/$data_f/train/$category_1/*"
35 | #shuf -e data/$data_f/train/$category_1/ >/tmp/shuffled.txt
36 | shuf -e $start_dir > /tmp/shuffled.txt
37 | 	
38 | echo "print contents of shuffled.txt"
39 | echo "  "
40 | 
41 | 
42 | cat /tmp/shuffled.txt
43 | 
44 | echo "copying images to train/valid for full dataset"
45 | 
46 | head /tmp/shuffled.txt -n $n_t | xargs -i cp {} data/$data_f/train/$category_1
47 | tail /tmp/shuffled.txt -n $n_v | xargs -i cp {} data/$data_f/valid/$category_1
48 | 
49 | echo "copying images to train/valid for subset of data"
50 | head /tmp/shuffled.txt -n $ns_t | xargs -i cp {} data/$sample_f/train/$category_1
51 | tail /tmp/shuffled.txt -n $ns_v | xargs -i cp {} data/$sample_f/valid/$category_1
52 | 
53 | 
54 | # shuffle the data for LABEL #1
55 | # copy into train and valid folders
56 | #shuf -e data/$data_f/train/$category_2/* > /tmp/shuffled2.txt
57 | 
58 | #head /tmp/shuffled2.txt -n $n_t | xargs -i cp {} data/$data_f/train/$category_2
59 | #tail /tmp/shuffled2.txt -n $n_v | xargs -i cp {} data/$data_f/valid/$category_2
60 | 
61 | #head /tmp/shuffled2.txt -n $ns_t | xargs -i cp {} data/$sample_f/train/$category_2
62 | #tail /tmp/shuffled2.txt -n $ns_v | xargs -i cp {} data/$sample_f/valid/$category_2
63 | 
64 | 
65 | 
66 | 
67 | #shuf -n $n_t -e data/$data_f/$category_1/* | xargs -i cp {} data/$sample_f/train/$category_1
68 | #shuf -n $n_t -e data/$data_f/$category_2/* | xargs -i cp {} data/$sample_f/train/$category_2
69 | #shuf -n $n_v -e data/$data_f/$category_1/* | xargs -i cp {} data/$sample_f/valid/$category_1
70 | #shuf -n $n_v -e data/$data_f/$category_2/* | xargs -i cp {} data/$sample_f/valid/$category_2
71 | 
72 | 
73 | 
74 | #mkdir -p data/$sample_f/{train,valid}/{$category_1,$category_2}
75 | #shuf -n 200 -e data/dogscats/train/cats   | xargs -i cp {} data/dogscats_sample/train/cats
76 | 
77 | #shuf -n $sample_t -e data/$data_f/train/$category_1/* | xargs -i cp {} data/$sample_f/train/$category_1
78 | #shuf -n $sample_t -e data/$data_f/train/$category_2/* | xargs -i cp {} data/$sample_f/train/$category_2
79 | #shuf -n $sample_v -e data/$data_f/valid/$category_1/* | xargs -i cp {} data/$sample_f/valid/$category_1
80 | #shuf -n $sample_v -e data/$data_f/valid/$category_2/* | xargs -i cp {} data/$sample_f/valid/$category_2
81 | 


--------------------------------------------------------------------------------
/code/subset_data.md:
--------------------------------------------------------------------------------
 1 | # Subset data using `shuf`
 2 | 
 3 | From the directory of your notebook (from where you have the data folder available) run the following:
 4 | ```bash
 5 | mkdir -p data/dogscats_sample/{valid,train}/{cats,dogs}
 6 | 
 7 | shuf -n 200 -e data/dogscats/train/cats/* | xargs -i cp {} data/dogscats_sample/train/cats
 8 | shuf -n 200 -e data/dogscats/train/dogs/* | xargs -i cp {} data/dogscats_sample/train/dogs
 9 | shuf -n 100 -e data/dogscats/valid/cats/* | xargs -i cp {} data/dogscats_sample/valid/cats
10 | shuf -n 100 -e data/dogscats/valid/dogs/* | xargs -i cp {} data/dogscats_sample/valid/dogs
11 | ```
12 | 
13 | ```
14 | ls camels | wc -l 
15 | ```
16 | 
17 | ```bash
18 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ cp horses/*.jpeg train/horses/
19 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ cp horses/*.jpeg valid/horses/
20 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ cp camels/*.jpeg train/camels/
21 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ cp camels/*.jpeg valid/camels/
22 | ```
23 | ```bash
24 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls horses | wc -l 
25 | 101
26 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls train/horses | wc -l
27 | 101
28 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls valid/horses | wc -l
29 | 101
30 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls camels | wc -l 
31 | 101
32 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls train/camels | wc -l
33 | 101
34 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls valid/camels | wc -l
35 | 101
36 | ```
37 | 
38 | ```bash
39 | # make the sample data directory
40 | mkdir -p data/camelhorse/{valid,train}/{camel,horse}
41 | 
42 | # split original data into train/test
43 | shuf -n 68 -e data/camelhorse/camels | xargs -i cp {} data/camelhorse/train/camel
44 | shuf -n 68 -e data/camelhorse/horses | xargs -i cp {} data/camelhorse/train/horse
45 | shuf -n 33 -e data/camelhorse/camels | xargs -i cp {} data/camelhorse/valid/camel
46 | shuf -n 33 -e data/camelhorse/horses | xargs -i cp {} data/camelhorse/valid/horse
47 | ```
48 | 
49 | 
50 | ```bash
51 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/camels | wc -l
52 | 101
53 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/horses | wc -l
54 | 101
55 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/train | wc -l
56 | 2
57 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/train/camels | wc -l
58 | 0
59 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/train/horses | wc -l
60 | 0
61 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/valid/camels | wc -l
62 | 0
63 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ ls ~/data/camelshorses/valid/horses | wc -l
64 | 0
65 | (fastai) ubuntu@ip-172-31-2-59:~/data/camelshorses$ 
66 | ```
67 | 
68 | In your notebook, change the PATH to PATH = "data/dogscats_sample/"
69 | The awesome command @jeremy shared on Twitter was this (please note the mv that you want to normally 
70 | use when creating the train / valid / test splits):
71 | 
72 | shuf -n 5000 -e all/*.* | xargs -i mv {} all_val/
73 | 


--------------------------------------------------------------------------------
/code/tree.md:
--------------------------------------------------------------------------------
 1 | # tree
 2 | ```bash
 3 | pip install tree
 4 | ```
 5 | 
 6 | ```bash
 7 | (fastai) ubuntu@ip-172-31-2-59:~/data$ tree -d
 8 | .
 9 | ├── camelshorses
10 | │   ├── camels
11 | │   ├── horses
12 | │   ├── train
13 | │   │   ├── camels
14 | │   │   └── horses
15 | │   └── valid
16 | │       ├── camels
17 | │       └── horses
18 | ├── camelshorses_sample
19 | │   ├── train
20 | │   │   ├── camels
21 | │   │   └── horses
22 | │   └── valid
23 | │       ├── camels
24 | │       └── horses
25 | └── dogscats
26 |     ├── models
27 |     ├── sample
28 |     │   ├── models
29 |     │   ├── tmp
30 |     │   ├── train
31 |     │   │   ├── cats
32 |     │   │   └── dogs
33 |     │   └── valid
34 |     │       ├── cats
35 |     │       └── dogs
36 |     ├── test1
37 |     ├── tmp
38 |     │   ├── x_act_resnet34_0_224.bc
39 |     │   │   ├── data
40 |     │   │   └── meta
41 |     │   ├── x_act_test_resnet34_0_224.bc
42 |     │   │   ├── data
43 |     │   │   └── meta
44 |     │   └── x_act_val_resnet34_0_224.bc
45 |     │       ├── data
46 |     │       └── meta
47 |     ├── train
48 |     │   ├── cats
49 |     │   └── dogs
50 |     └── valid
51 |         ├── cats
52 |         └── dogs
53 | 
54 | 44 directories
55 | (fastai) ubuntu@ip-172-31-2-59:~/data$ 
56 | ```
57 | 


--------------------------------------------------------------------------------
/code/untar_weights_file.md:
--------------------------------------------------------------------------------
 1 | # Expand Weights File
 2 | 
 3 | ```bash
 4 | cd /home/paperspace/fastai/courses/dl1/fastai
 5 | ```
 6 | 
 7 | ```bash
 8 | curl -O http://files.fast.ai/models/weights.tgz
 9 | tar zxvf weights.tgz
10 | ```
11 | 


--------------------------------------------------------------------------------
/courses/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/.DS_Store


--------------------------------------------------------------------------------
/courses/README.md:
--------------------------------------------------------------------------------
 1 | # fastai Courses
 2 | 
 3 | 
 4 | ## Deep Learning 
 5 | Note:  Best to use the *latest* version.
 6 | 
 7 | #### Version 3
 8 | * [Deep Learning 1](v3-dl1/)
 9 | 
10 | #### Version 2
11 | * [Deep Learning 1](v2-dl1/)
12 | * [Deep Learning 2](v2-dl2/)
13 | 
14 | ## Machine Learning
15 | * [Machine Learning 1](ml1/)
16 | 
17 | ## Linear Algebra
18 | * [Computational Linear Algebra](cla/)
19 | 


--------------------------------------------------------------------------------
/courses/cla/README.md:
--------------------------------------------------------------------------------
1 | # Computational Linear Algebra
2 | 
3 | http://www.fast.ai/2017/07/17/num-lin-alg/
4 | 


--------------------------------------------------------------------------------
/courses/mes_projets/0_login.md:
--------------------------------------------------------------------------------
  1 | # Logging in to AWS
  2 | 
  3 | ## Step 0:  Initial Set-up Assumptions
  4 | Assuming:  
  5 | - I have launched a p2 instance
  6 | - I have set up my key pair
  7 | - I have created an alias in my startup configuration file  `~/.bash_profile`.  In my case, I use `~/.zshrc`
  8 | 
  9 | My alias:  
 10 | ```bash
 11 | alias fastai='ssh -i "id_rsa" ubuntu@ec2-88-888-888-88.compute-1.amazonaws.com -L8888:localhost:8888'
 12 | ```
 13 | 
 14 | ## Step 1:  AWS Console
 15 | - sign in here:  https://signin.aws.amazon.com/
 16 | - start my `p2.xlarge` instance from before
 17 | 
 18 | ## Step 2:  My terminal on my Mac (local computer)
 19 | 
 20 | ### Go to the appropriate directory
 21 | ```bash
 22 | cd /Users/reshamashaikh/.ssh
 23 | ```
 24 | ### Login to AWS
 25 | Login as the user "ubuntu" rather than the user "root".
 26 | 
 27 | ```bash
 28 | fastai
 29 | ```
 30 | 
 31 | ### Update Ubuntu: `sudo apt-get update`
 32 | ```bash
 33 | sudo apt-get update
 34 | ```
 35 | 
 36 | ### Update fastai repo:  `git pull` 
 37 | ```bash
 38 | cd fastai
 39 | ```
 40 | ```bash
 41 | git pull
 42 | ```
 43 | >my example
 44 | ```bash
 45 | (fastai) ubuntu@ip-172-31-2-59:~$ ls
 46 | data  fastai  src
 47 | (fastai) ubuntu@ip-172-31-2-59:~$ cd fastai
 48 | (fastai) ubuntu@ip-172-31-2-59:~/fastai$ git pull
 49 | (fastai) ubuntu@ip-172-31-2-59:~/fastai$
 50 | ```
 51 | ### Update Anaconda packages:  `conda env update`
 52 | ```bash
 53 | conda env update
 54 | ```
 55 | >my example
 56 | ```bash
 57 | (fastai) ubuntu@ip-172-31-2-59:~/fastai$ conda env update
 58 | Using Anaconda API: https://api.anaconda.org
 59 | Fetching package metadata .................
 60 | Solving package specifications: .
 61 | #
 62 | # To activate this environment, use:
 63 | # > source activate fastai
 64 | #
 65 | # To deactivate an active environment, use:
 66 | # > source deactivate
 67 | #
 68 | (fastai) ubuntu@ip-172-31-2-59:~/fastai$
 69 | ```
 70 | ### Update Anaconda packages:  `conda update --all`
 71 | 
 72 | 
 73 | ## Step 3:  Turn off AWS Instance after completing work!
 74 | 
 75 | ---
 76 | ## `~/.bashrc` File
 77 | ```bash
 78 | nano ~/.bashrc
 79 | ```
 80 | 
 81 | 
 82 | ---
 83 | # My Projects
 84 | 
 85 | ## Go to where my projects are
 86 | ```bash
 87 | cd /home/ubuntu/my_repos/
 88 | ```
 89 | 
 90 | ### Project 1
 91 | ```bash
 92 | /home/ubuntu/my_repos/llis_topicModel
 93 | ```
 94 | 
 95 | ### Project 2 
 96 | ```bash
 97 | (fastai) ubuntu@ip-172-31-2-59:~/git_repos/projects$ pwd 
 98 | /home/ubuntu/git_repos/projects
 99 | (fastai) ubuntu@ip-172-31-2-59:~/git_repos/projects$ ls -l
100 | total 12
101 | drwxrwxr-x 2 ubuntu ubuntu 4096 Jan  8 21:07 camels_h
102 | drwxrwxr-x 3 ubuntu ubuntu 4096 Jan  8 00:44 iceberg
103 | -rw-rw-r-- 1 ubuntu ubuntu   23 Jan  7 21:04 README.md
104 | (fastai) ubuntu@ip-172-31-2-59:~/git_repos/projects$ 
105 | ```
106 | 
107 | ## My data
108 | ```bash
109 | (fastai) ubuntu@ip-172-31-2-59:~/data$ pwd
110 | /home/ubuntu/data
111 | (fastai) ubuntu@ip-172-31-2-59:~/data$ ls -alt
112 | total 20
113 | drwxr-xr-x 20 ubuntu ubuntu 4096 Jan  8 21:11 ..
114 | drwxrwxr-x  2 ubuntu ubuntu 4096 Jan  7 20:44 iceberg
115 | drwxrwxr-x  5 ubuntu ubuntu 4096 Jan  7 20:38 .
116 | drwxrwxr-x  8 ubuntu ubuntu 4096 Dec 21 01:53 camelhorse
117 | drwxrwxr-x  8 ubuntu ubuntu 4096 Dec 20 22:19 dogscats
118 | (fastai) ubuntu@ip-172-31-2-59:~/data$ 
119 | ```
120 | 
121 | ## Launch Jupyter Notebook
122 | ```bash
123 | (fastai) ubuntu@ip-172-31-2-59:~$ pwd
124 | /home/ubuntu
125 | (fastai) ubuntu@ip-172-31-2-59:~$ jupyter notebook
126 | ```
127 | 
128 | 


--------------------------------------------------------------------------------
/courses/mes_projets/1_scp_data_aws.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```bash
 3 | % pwd
 4 | /Users/reshamashaikh/ds/data/camelshorses
 5 | % ls
 6 | total 0
 7 | drwxr-xr-x  103   3502 Nov 25 12:19 camels
 8 | drwxr-xr-x  104   3536 Nov 25 12:16 horses
 9 | % ls camels | wc -l
10 |      102
11 | % ls horses | wc -l
12 |      102
13 | % scp -r . ubuntu@34.198.228.48:~/data/camelhorse 
14 | ```
15 | 


--------------------------------------------------------------------------------
/courses/mes_projets/2_split_data.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```bash
 3 | # make the sub directories
 4 | mkdir -p data/camelhorse/{train,valid}/{camel,horse}
 5 | 
 6 | # split original data into train/test
 7 | shuf -n 68 -e data/camelhorse/camels/* | xargs -i cp {} data/camelhorse/train/camel
 8 | shuf -n 68 -e data/camelhorse/horses/* | xargs -i cp {} data/camelhorse/train/horse
 9 | shuf -n 33 -e data/camelhorse/camels/* | xargs -i cp {} data/camelhorse/valid/camel
10 | shuf -n 33 -e data/camelhorse/horses/* | xargs -i cp {} data/camelhorse/valid/horse
11 | 
12 | 
13 | ls ~/data/camelhorse/camels | wc -l
14 | ls ~/data/camelhorse/horses | wc -l
15 | ls ~/data/camelhorse/train/camel | wc -l
16 | ls ~/data/camelhorse/train/horse | wc -l
17 | ls ~/data/camelhorse/valid/camel | wc -l
18 | ls ~/data/camelhorse/valid/horse | wc -l
19 | ```
20 | 


--------------------------------------------------------------------------------
/courses/mes_projets/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | https://www.kaggle.com/devm2024/keras-model-for-beginners-0-210-on-lb-eda-r-d
4 | 


--------------------------------------------------------------------------------
/courses/mes_projets/comments.md:
--------------------------------------------------------------------------------
 1 | https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge/kernels?sortBy=votes&group=everyone&pageSize=20&competitionId=8076
 2 | 
 3 | ```
 4 | (fastai) ubuntu@ip-172-31-2-59:~/data$ pwd
 5 | /home/ubuntu/data
 6 | mkdir toxic_comments
 7 | ```
 8 | 
 9 | ```bash
10 | kg download -c jigsaw-toxic-comment-classification-challenge
11 | ```
12 | 
13 | <kbd> sudo apt install unzip </kbd>  
14 | <kbd> unzip -q train.zip </kbd>  
15 | <kbd> unzip -q test.zip </kbd> 
16 | 
17 | ```bash
18 | (fastai) ubuntu@ip-172-31-2-59:~/data/toxic_comments$ pwd
19 | /home/ubuntu/data/toxic_comments
20 | (fastai) ubuntu@ip-172-31-2-59:~/data/toxic_comments$ mkdir subm
21 | ```
22 | ```
23 | (fastai) ubuntu@ip-172-31-2-59:~/data/toxic_comments$ wget http://nlp.stanford.edu/data/glove.6B.zip
24 | ```
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/courses/mes_projets/get_data.md:
--------------------------------------------------------------------------------
 1 | # Iceberg
 2 | 
 3 | ```bash
 4 | (fastai) ubuntu@ip-172-31-2-59:~/data$ pwd
 5 | /home/ubuntu/data
 6 | (fastai) ubuntu@ip-172-31-2-59:~/data$ 
 7 | ````
 8 | 
 9 | https://www.kaggle.com/c/statoil-iceberg-classifier-challenge
10 | 
11 | 
12 | ```bash
13 | rm ~/.kaggle-cli/browser.pickle
14 | pip install kaggle-cli --upgrade
15 | ```
16 | 
17 | ```bash
18 | kg download -u "reshamashaikh" -p "xxx" -c statoil-iceberg-classifier-challenge
19 | ```
20 | 
21 | ```bash
22 | sudo apt-get install p7zip-full
23 | 7z e test.json.7z
24 | 7z e train.json.7z 
25 | 7z e sample_submission.csv.7z 
26 | ```
27 | 
28 | ```
29 | (fastai) ubuntu@ip-172-31-2-59:~/data/iceberg$ ls -alt
30 | total 1972980
31 | drwxrwxr-x 2 ubuntu ubuntu       4096 Jan  7 20:44 .
32 | drwxrwxr-x 5 ubuntu ubuntu       4096 Jan  7 20:38 ..
33 | -rw-rw-r-- 1 ubuntu ubuntu  257127394 Jan  7 20:36 test.json.7z
34 | -rw-rw-r-- 1 ubuntu ubuntu   44932785 Jan  7 20:36 train.json.7z
35 | -rw-rw-r-- 1 ubuntu ubuntu      38566 Jan  7 20:36 sample_submission.csv.7z
36 | -rw-rw-r-- 1 ubuntu ubuntu     117951 Oct 23 17:27 sample_submission.csv
37 | -rw-rw-r-- 1 ubuntu ubuntu 1521771850 Oct 23 17:27 test.json
38 | -rw-rw-r-- 1 ubuntu ubuntu  196313674 Oct 23 17:23 train.json
39 | (fastai) ubuntu@ip-172-31-2-59:~/data/iceberg$ 
40 | ```
41 | ```bash
42 | (fastai) ubuntu@ip-172-31-2-59:~/data/iceberg$ wc -l *
43 |       8425 sample_submission.csv
44 |        151 sample_submission.csv.7z
45 |          0 test.json
46 |    1004794 test.json.7z
47 |          0 train.json
48 |     175531 train.json.7z
49 |    1188901 total
50 | (fastai) ubuntu@ip-172-31-2-59:~/data/iceberg$
51 | ```
52 | 
53 | ```bash
54 | (fastai) ubuntu@ip-172-31-2-59:~/fastai/courses/dl1$ jupyter notebook
55 | ```
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/courses/mes_projets/test.md:
--------------------------------------------------------------------------------
1 | pie, desserts
2 | 


--------------------------------------------------------------------------------
/courses/mes_projets/to_do.md:
--------------------------------------------------------------------------------
 1 | # To do later...when I have some time
 2 | 
 3 | ## Reading
 4 | * [precompute=True](http://forums.fast.ai/t/precompute-true/7316/55)
 5 | * go thru Jeremy's notebook [cifar10](https://github.com/fastai/fastai/blob/master/courses/dl1/cifar10.ipynb) as it should provide you with a good intuition about how best to use:
 6 |   - lr_finder
 7 |   - cycle_len
 8 |   - cycle_mult
 9 |   - resizing
10 | 
11 | ## To Explore Later
12 | [byobu](http://byobu.co) Byobu is a GPLv3 open source text-based window manager and terminal multiplexer.
13 | 
14 | ## Reading
15 | [Kaggle Planet Competition: How to land in top 4%](https://medium.com/@irshaduetian/kaggle-planet-competition-how-to-land-in-top-4-a679ff0013ba)
16 | 
17 | 


--------------------------------------------------------------------------------
/courses/ml1/lesson_01.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Lesson 1 - random forests
  3 | 
  4 | - Length:  01:18
  5 | - Video:  https://www.youtube.com/watch?v=CzdWqFTmn0Y&feature=youtu.be
  6 | - Notebook:  [lesson1-rf.ipynb](https://github.com/fastai/fastai/blob/master/courses/ml1/lesson1-rf.ipynb)  
  7 | 
  8 | ---
  9 | 
 10 | ## Getting data using `curl`
 11 | https://www.kaggle.com/c/bluebook-for-bulldozers
 12 | 
 13 | - ML should help us understand a dataset, not just make predictions about it.
 14 | 
 15 | Firefox, to website, then Javascript console, Developer
 16 | - ctrl + shift + i to bring up web developer tool
 17 | - tab to Network
 18 | - go to data row
 19 | - right click, copy as Curl (unix command that downloads data, like `wget`)
 20 | - might want to delete "2.0" in url since it causes problems
 21 | - `curl url_link -o bulldozers.zip` `-o` means output, then give suitable file name
 22 | - `mkdir bulldozers`
 23 | - `mv bulldozers.zip bulldozers/`
 24 | - `sudo apt install unzip` or `brew install unzip`
 25 | - `unzip bulldozers.zip`
 26 | 
 27 | Python 3.6 format string:  
 28 | ```python
 29 | df_raw = pd.read_csv(f'{PATH}Train.csv', low_memory=False, 
 30 |                      parse_dates=["saledate"])
 31 | ``` 
 32 | - `f'{PATH}Train.csv'`  the `f` tells it to interpolate the "{PATH}"
 33 | - `low_memory=False` make it read more of the file to decide what the types are  
 34 | 
 35 | ### Example
 36 | `name = 'Jeremy'`  
 37 | `age = 43`  
 38 | `f'Hello {name.upper()}, you are {age}'`  
 39 | output:  
 40 | >Hello, Jeremy, you are 43  
 41 | 
 42 | ### Random Forest
 43 | - universal machine learning technique
 44 | - way of predicting something of any kind (dog/cat, price)
 45 | - can predict a categorical or continuous variable
 46 | - columns can be of any kind (pixel data, zip codes, revenues)
 47 | - in general, it doesn't overfit
 48 | - easy to stop it from overfitting
 49 | - don't need a separate validation set
 50 | - has few, if any statistical assumptions
 51 |   - doesn't assume data is normally distributed
 52 |   - doesn't assume relationships are linear
 53 |   - don't need to specify interactions
 54 | - requires few pieces of feature engineering (don't have to take log of data)
 55 | - it's a great place to start
 56 | - if your random forest doesn't work, it's a sign there is something wrong with the data
 57 | 
 58 | Both Curse of Dimensionality & No Free Lunch are largely false.  
 59 | 
 60 | #### Curse of Dimensionality - idea that the more columns you have, it creates more columns that are empty; that the more dimensions you have, the more they sit on the edge; in theory, distance between points is much less meaningful.  
 61 | - points **do** still have distance from each other
 62 | - in the 90's, theory took over machine learning
 63 | - we lost a decade of real practical development with these theories
 64 | - in practice, building models on lots and lots of columns works well
 65 | 
 66 | #### No Free Lunch Theorem
 67 | - there is no type of model that works well for any kind of dataset
 68 | - but, in the real world, that's not true; some techniques **do work**
 69 | - ensembles of decision trees works well
 70 | 
 71 | ### sklearn
 72 | - RandomForestRegressor is part of `sklearn`, `scikit learn`
 73 | - Scikit learn is not the best, but perfectly good at nearly everything; popular library
 74 | - next part of course (with Yannet), will look at different kind of decision tree ensemble, called Gradient Boosting Trees, XGBoost which is better than gradient boosting trees in scikit learn
 75 | 
 76 | `from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier`  
 77 | - RandomForestRegressor - predicts continuous variables  
 78 | - RandomForestClassifier - predicts categorical variable
 79 | 
 80 | ## Convert to Pandas Categories
 81 | The categorical variables are currently stored as strings, which is inefficient, and doesn't provide the numeric coding required for a random forest. Therefore we call train_cats to convert strings to pandas categories.  
 82 | This is a fastai library function:  
 83 | `train_cats(df_raw)`  
 84 | 
 85 | ## re-order Pandas categories
 86 | ```python
 87 | df_raw.UsageBand.cat.categories
 88 | Out[9]:
 89 | Index(['High', 'Low', 'Medium'], dtype='object')
 90 | In [10]:
 91 | df_raw.UsageBand.cat.set_categories(['High', 'Medium', 'Low'], ordered=True, inplace=True)
 92 | ```  
 93 | In the background, the code is 0, 1, 2 for the categories which is what is used in Random Forest.  -1 is assigned to NA.
 94 | 
 95 | ## get percent of missing values for each column
 96 | We're still not quite done - for instance we have lots of missing values, wish we can't pass directly to a random forest.
 97 | `display_all(df_raw.isnull().sum().sort_index()/len(df_raw))`  
 98 | ```bash
 99 | Backhoe_Mounting            0.803872
100 | Blade_Extension             0.937129
101 | Blade_Type                  0.800977
102 | Blade_Width                 0.937129
103 | Coupler                     0.466620
104 | Coupler_System              0.891660
105 | Differential_Type           0.826959
106 | Drive_System                0.739829
107 | Enclosure                   0.000810
108 | ```  
109 | 
110 | ## save to feather format
111 | - But let's save this file for now, since it's already in format can we be stored and accessed efficiently.
112 | - saves to disk the same way it appears in RAM
113 | - **feather** is fairly new
114 | - fastest way to save to disk and fastest way to read it back  
115 | ```python
116 | os.makedirs('tmp', exist_ok=True)
117 | df_raw.to_feather('tmp/bulldozers-raw')
118 | ```  
119 | So, then we don't have to re-run everything from start of notebook.  
120 | #### Pre-processing
121 | In the future we can simply read it from this fast format.
122 | ```python
123 | df_raw = pd.read_feather('tmp/bulldozers-raw')
124 | ```
125 | ## Run Random Forest
126 | - serial number (numbers): random forest works fine with these ID numbers that are not really continuous
127 | - random forests are trivially parellelizable
128 | - means it will split up data across CPUs and linearly scale
129 | - `n_jobs=-1` means create a separate job for each CPU that you have
130 | 
131 | ### Kaggle Competition
132 | - generally speaking, if you're in the top half of a Kaggle competition, you're doing well
133 | - so, here, with no thinking and using the defaults of the algorithm (random forest), we're in the top quarter of the competition
134 | - random forests are insanely powerful
135 | 
136 | ### HW
137 | - take as many Kaggle competitions as you can
138 | - try this process with Pandas set up
139 | - 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/courses/ml1/lesson_02.md:
--------------------------------------------------------------------------------
  1 | # Lesson 2:  RF - Part 2
  2 | 
  3 | Length: 01:35  
  4 | Notebook:  [lesson1-rf.ipynb](https://github.com/fastai/fastai/blob/master/courses/ml1/lesson1-rf.ipynb)  
  5 | 
  6 | ---
  7 | 
  8 | ## Create a symlink
  9 | ```bash
 10 | ln -s ../../fastai ./
 11 | ```  
 12 | where `./` is the current directory
 13 |   
 14 |   
 15 | Evaluation Metric is:  root mean squared log error  
 16 | sum{ [(ln(act) - ln(pred)]^2 }  
 17 | 
 18 | ## Data Process  
 19 | - we need all of our columns to be numbers
 20 | - use function `add_datepart` to replace a date variable with all of its date parts
 21 | - use function `train_cats` to convert strings to pandas categories (Notice: data type is not `string`, but `category`)
 22 | - use function `set_categories` to re-order categories  
 23 | - use function `proc_df` to replace categories with their numeric codes, handle missing continuous values, and split the dependent variable into a separate variable.
 24 |   >df, y, nas = proc_df(df_raw, 'SalePrice')
 25 |   - for continuous variables, missing values were replaced with the median
 26 | 
 27 | ## R^2
 28 | - if you get an R^2 that is negative, it means your model is worse than predicting the mean
 29 | - R^2 is not necessarily what you're trying to optimize
 30 | - R^2 how good is your model vs the naive mean model?
 31 | 
 32 | ## Test and Validation Sets
 33 | - Creating a validation set is the most important thing you'll do in machine learning.
 34 | - Validation Set (first hold out set): use this to determine what hyperparameters to use
 35 | - Testing (second hold out set): I've done modeling, now I'll see how it works
 36 | 
 37 | ## Random Forest code
 38 | - `n_estimators` = number of trees
 39 | - `n_jobs=-1` --> means create a separate job for each CPU that you have  
 40 | ```python
 41 | m = RandomForestRegressor(n_estimators=20, n_jobs=-1)
 42 | ```
 43 | 
 44 | ## Random Forest Scores output
 45 | [training RMSE , validation RMSE, training R^2, validation R^2]
 46 | ```bash
 47 | [0.1026724559118164, 0.33553753413792303, 0.9786895444439101, 0.79893791069374753]
 48 | ```
 49 | 
 50 | ## Bagging
 51 | - statistical technique to create a random forest
 52 | - Bag of Little Bootstraps, Michael Jordan
 53 |   - create 5 different models which are not correlated --> they offer different insights
 54 |   - build 1000 trees on 10 separate data points --> invididual trees will not be predictive, but combined they will
 55 |  
 56 | ## Bootstrapping
 57 | - pick out n rows with replacement
 58 | 
 59 | ## Out-of-Bag (OOB) Score
 60 | - very useful when we have only a small dataset
 61 | ```python
 62 | m = RandomForestRegressor(n_estimators=40, n_jobs=-1, oob_score=True)
 63 | m.fit(X_train, y_train)
 64 | print_score(m)
 65 | ```
 66 | [training RMSE , validation RMSE, training R^2, validation R^2, OOB R^2]
 67 | ```bash
 68 | [0.10198464613020647, 0.2714485881623037, 0.9786192457999483, 0.86840992079038759, 0.84831537630038534]
 69 | ```
 70 | 
 71 | ## Grid Search
 72 | - pass in list of hyperparameters we want to tune and values we want to try
 73 | 
 74 | ## Subsampling
 75 | - The basic idea is this: rather than limit the total amount of data that our model can access, let's instead limit it to a different random subset per tree. That way, given enough trees, the model can still see all the data, but for each individual tree it'll be just as fast as if we had cut down our dataset as before.
 76 | - no dataset is too big for this technique (ex: 120 million rows for grocery store data of Kaggle competition)
 77 | - need to set `oob_score = False` if using subsample approach of `set_rf_samples(20000)`
 78 | - to turn it off, do `reset_rf_samples()`
 79 | ```python
 80 | set_rf_samples(20000)
 81 | ```
 82 | 
 83 | ## Important Takeaway / Tip
 84 | - very few people in industry or academia do this
 85 | - most people run all of their models on all of their data all of the time using their best parameters
 86 | - do most of your models on a large enough sample size so your accuracy is reasonable, that takes a small number of seconds to train
 87 | 
 88 | ## Tree Building Parameters
 89 | - `min_samples_leaf=1` this is the default
 90 | - `min_samples_leaf=3` says stop training the tree further when your leaf node has 3 or less samples in; the numbers 1, 3, 5, 10, and 25 work well
 91 | - `max_features=None` this is the default; then max_features=n_features (default is to use all the features)
 92 | - `max_features=0.5` the less correlated your trees are with each other, the better; randomly choose half the features
 93 | - `max_features` in practice, good values range from 0.5 to log2 or sqrt
 94 | 
 95 | ## Random Forest
 96 | - hard to screw it up
 97 | - great for out of box, even without tuning hyperparameters
 98 | - tends to work on most datasets most of the time
 99 | 
100 | ## Looking at categories
101 | - `df_raw.fiProductClassDesc.cat.categories`
102 | - `df_raw.fiProductClassDesc.cat.codes`  --> this is what the random forest sees
103 | 
104 | ## Homework
105 | - experiment
106 | - draw the trees
107 | - plot the errors
108 | - try different datasets
109 | - write your own R2
110 | - write your own versions of the datasets
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/courses/ml1/lesson_05.md:
--------------------------------------------------------------------------------
 1 | # Lesson 5
 2 | 
 3 | - Length: 01:40  
 4 | - Video:  https://www.youtube.com/watch?v=3jl2h9hSRvc&feature=youtu.be
 5 | - Notebook:  [lesson2-rf_interpretation.ipynb](https://github.com/fastai/fastai/blob/master/courses/ml1/lesson2-rf_interpretation.ipynb)  
 6 | 
 7 | ---
 8 | ## Review
 9 | - What's the difference between Machine Learning and "any other kind of [analysis] work"?  In ML, we care about the **generalization error** (in other analysis, we care about how well we map our observations to outcome)
10 | - the most common way to check for **generalization** is to randomly pull some data rows into a **test set** and then check the accuracy of the **training set** with the **test set**
11 | - the problem is: what if it doesn't generalize?  could change hyperparameters, data augmentation, etc.  Keep doing this until many attempts, it will generalize.  But after trying 50 different things, could get a good result accidentally
12 | - what we generally do is get a second **test set** and call it a **validation set**
13 | - a trick for **random forests** is we don't need a validation set; instead, we use the **oob error/score (out-of-bag)**
14 |   - every time we train a tree in RF, there are a bunch of observations that are held out anyway (to get rid of some of the randomness).  
15 |   - **oob score** gives us something pretty similar to **validation score**, though, on average, it is a little less good
16 |   - samples from oob are bootstrap samples
17 |   - ** with validation set, we can use the whole forest to make the prediction
18 |   - ** but, here we cannot use the whole forest; every row is going to use a subset of the trees to make its predictions; with less trees, we get a less accurate predction 
19 |   - ** think about it over the week
20 |   - Why have a validation set at all when using random forests?  If it's a randomly chosen validation dataset, it is not strictly necessary; 
21 |     - you've got 4 levels of items we're got to test
22 |       1.  oob - when that's done working well, go to next one
23 |       2.  validation set
24 |       3.  test
25 |       
26 | ### How Kaggle Compute Their Validation Score
27 | - splits the test set into 2 pieces:  Public, Private
28 | - they don't tell you which is which
29 | - you submit your predictions to Kaggle
30 | - Kaggle selects a random 30% to tell you your Leaderboard score
31 | - at the end of the competition, that gets thrown away
32 | - then they use the other 70% to calculate your "real score"
33 | - making sure that you're not using the feedback from the Leaderboard to figure out some set of hyperparameters that do well but don't generalize
34 | - this is why it is good practice to use Kaggle; at the end of a competition, you may drop 100 places in a competition
35 | - good to practice on Kaggle than at a company where there are millions of dollars on the line
36 | 
37 | ### Q:  case of not using a random sample for validation
38 | - Q:  When might I not be able to use a random set for validation?
39 | - cases:  in the case of temporal data, unbalanced data
40 | - Tyler:  we expect things close by in time to be related close to them.  If we destroy the order, ...
41 | - JH:  important to remember, when you buid a model, think that we are going to use the model at a time in the future
42 | - when you build a model, you always have a systematic error, that the model will be used at a later time, at which time the world will be different than it is now; there is a lag from when time model is built to time when it is used; even when building the model, data is much older; a lot of the time, _that matters_
43 | - if we're predicting who will buy toilet paper in NJ, and it takes us 2 weeks to put model in production, and we used data based on past 2 years, then by that time, things may look very different
44 | - particularly, our validation set (if we randomly sampled from a 4-yr period), then the vast majority of that data is over a year old, and it may be that the toilet paper buying habits of folks in NJ may have dramatically shifted
45 |   - maybe there is a terrible recession and they can't afford high quality paper
46 |   - maybe paper making industry has gone thru the roof and they're buying more paper because it's cheaper
47 |   - so, the world changes, if you use a random sample for your validation set, then you are actually checking:  how good are you at predicting things that are totally obsolete now?  how good are you at predicting things that happened 4 years ago?  That's _not_ interesting.
48 | - What we want to do in practice, any time there is some temporal piece, instead say (assuming we've ordered it by time), make the tail end of the data the **validation set**
49 |   - example: last 10% of data is the test set
50 |   - the 10% of the data prior to the test set is the validation set
51 | - we then build a model that still works on stuff that is later in time than what the model was built on; that it generalizes into the future
52 | - Q:  how do you get the validation set to be good?
53 | - `20:00` if it looks good on the **oob** then it means we are not overfitting in the statistical sense; it's working well on a random sample; but then it looks bad on the validation set; you somehow failed to predict the future; you predicted the past
54 | - Suraj idea: maybe we should train a recent period only; downside, we're using less data, create a less-rich model
55 | - most machine learning functions have ability to provide a weight to each row of data
56 | - for example for RF, instead of bootstrapping, could have a weight on each row and randomly pick that row with some probability, so the most recent rows have a higher probability of being selected; that can work very well; it's something you have to try, and if you don't have a validation set that represents the future (compared to what you're training on), then you have no way of knowing how your techniques are working
57 |   - `21:15` you make a compromise between amount of data vs recency of data?
58 | - JH:  what Jeremy tends to do when he has temporal data, which is probably most of the time, he once he gets something working well on the validation set, he wouldn't just go and use the model on the test set, because the thing I've trained on is (test set) much more in the future; this time he would replicate building the model again, this time combine the train and validation sets, and retrain the model. - at that point, you've got no way to test against a validation set so you have to make sure you have a reproducible script or notebook that does exactly the same steps in exactly the same ways because if you get something wrong then you're going to find on the test set that you've got a problem; 
59 | - `22:10` so what I (JH) does in practice is I need to know is my validation set a truly representative of 
60 | - 
61 | 
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/courses/nlp/README.md:
--------------------------------------------------------------------------------
 1 | # fastai:  [NLP Course](https://www.fast.ai/2019/07/08/fastai-nlp/)
 2 | - [YouTube Playlist](https://www.youtube.com/playlist?list=PLtmWHNX-gukKocXQOkQjuVxglSDYWsSh9)
 3 | - [Jupyter notebooks on GitHub](https://github.com/fastai/course-nlp)
 4 | 
 5 | ## Videos
 6 | 
 7 | 1. [What is NLP?](www.youtube.com/watch?v=cce8ntxP_XI) (0:23)  (done 03-Mar-2020)
 8 | 2. [Topic Modeling with SVD & NMF (NLP video 2)](www.youtube.com/watch?v=tG3pUwmGjsc) (1:07)  (done 03-Mar-2020)
 9 | 3. [Topic Modeling and SVD revisited](https://youtu.be/lRZ4aMaXPBI) (33:06) (done 04-Mar-2020)
10 | 4. [Sentiment Classification of Movie Reviews (using Naive Bayes, Logistic Regression, Ngrams](https://youtu.be/hp2ipC5pW4I) (58:20)
11 |   - [notebook](https://github.com/fastai/course-nlp/blob/master/3-logreg-nb-imdb.ipynb) (done 04-Mar-2020)
12 | 5. [Sentiment Classification of Movie Reviews: NB, LR, Ngrams](https://youtu.be/dt7sArnLo1g) (52:00)   (done)
13 | 6. [Derivation of Naive Bayes & Numerical Stability](https://youtu.be/z8-Tbrg1-rE) (24:00)   (done 11-Mar-2020)
14 | 7. [Revisiting Naive Bayes, and Regex](https://youtu.be/Q1zLqfnEXdw) (38:00)  (done 12-Mar-2020)
15 | 8.  [Intro to Language Modeling](https://youtu.be/PNNHaQUQqW8) (41:00)  (done 12-Mar-2020)
16 | 9.  [Transfer learning](https://youtu.be/5gCQvuznKn0) (1:36:00)
17 |   - ...
18 | 19.
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/courses/nlp/videos_01_to_05.md:
--------------------------------------------------------------------------------
  1 | # NLP:  Lessons 1 to 5
  2 | 
  3 | ## Video 2 [Topic Modeling with SVD & NMF (NLP video 2)](www.youtube.com/watch?v=tG3pUwmGjsc)
  4 | * spacy doesn't offer a stemmer, because it doesn't think it should be used
  5 | * Google [sentencepiece](https://github.com/google/sentencepiece)
  6 |   * performs sub-word tokens
  7 | * NMF (non-negative matrix factorization) is not unique, but can be more interpretable
  8 | 
  9 | To check time of a step:  
 10 | ```python
 11 | %time u, s, v = np.linalg.svd(vectors, full_matrices=False)
 12 | ```
 13 | 
 14 | ## Video 3
 15 | 
 16 | - stemming:  getting roots of words  (chops off end, "poor man's lemmatization")
 17 | - lemmatization:  (fancier)
 18 | - lemmatization is more computationally expensive than stemming
 19 | - stemming is quicker and easier
 20 |  
 21 | ### Pre-processing
 22 | - when you have less data, do this pre-processing
 23 | - do you think your model can handle the complexity:  
 24 |  - if you're using neural networks, don't do lemmatization, because that is throwing away information
 25 |  - if you have a simpler model, can't learn as much complexity, so do this pre-processing
 26 | 
 27 | ### Factorization is analagous to matrix decomposition
 28 | 
 29 | ### What are the nice properties that matrices in an SVD decomposition have?
 30 | - A = USV
 31 | - U:  orthonormal; columns or rows are orthonormal to each other; the columns are orthogonal and pairwise normalized.  (dot product of two columns is 0. dot product of column with itself gives us 1)
 32 | - S:  diagonal matrix; everything off diagonals is 0; capture an idea of importance, singular values, descending order: capture biggest one first, non-negative, scale of U and V is both 1
 33 | - V:  same properties as U, but transpose of rows are orthonormal to each other
 34 | - NMF:  special property in decomposition is **non-negative** AND matrix is **sparse** (sparse means many of the values are zero)
 35 | 
 36 | ## Linear Algebra
 37 | - 3 Blue 1 Brown: Essence of Linear Algebra [playlist on YouTube](https://www.youtube.com/playlist?list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab0
 38 | - video Chapter 3 [Linear transformation and matrices](https://youtu.be/kYB8IZa5AuE)
 39 | 
 40 | ## Reviewing spreadsheet
 41 | - first matrix:  TF-IDF (term document matrix)
 42 |  - rows:  author_title
 43 |  - cols:  words
 44 | - use SVD to decompose TF-IDF matrix into 3 matrices
 45 |  - U rows:  author_title
 46 |  - U cols:  topics1 to x
 47 | - S:  diagonal matrix  (singular values in descending order, most important one at (1, 1) position in matrix)
 48 |  - S rows:  topics
 49 |  - S cols:  topics
 50 | - V:  
 51 |  - V rows:  topics
 52 |  - V cols:  words
 53 |  
 54 |  ## Advantages / Disadvantages of SVD vs NMF
 55 |  - NMF: non-negative values, can be more interpretable
 56 |  - SVD: can have negative values for topic
 57 |  - SVD: is an exact decomposition; can fully represent the input matrices
 58 |  - NMF:  not exact
 59 |  - NMF:  need to set the number of topics you want, that's hyperparameter that you are choosing; 
 60 |  - SVD:  traditional SVD: you are getting as many singular values as you have documents, (assuming fewer documents than vocab words)
 61 |  - SVD:  there is opportunity to look at singular values and see that when they get little, these topics may be so unimportant that I can ignore them and chop them off.  But that also means with SVD, you're doing extra work.  
 62 |   - so it is both giving you more information and extra work
 63 |  - SVD:  on a big matrix is **slow**
 64 |   - in example on a 10K by 10K matrix, it is very slow
 65 |   - one way to address this is to use **randomized SVD**
 66 | 
 67 | ### Full vs Reduced SVD
 68 | - Full SVD will have U and V both be square matrices
 69 |  - that involves making up some columns for U that don't directly depend on the data A
 70 |  - for S matrix (singular value), also adding some rows of pure zeroes
 71 | - in practice, you are usually going to be using reduced SVD, it's quicker to calculate and you are often not needing to use/ turn it into an orthonormal basis
 72 | 
 73 | ---
 74 | 
 75 | ## Video 4 [Sentiment Classification of Movie Reviews (using Naive Bayes, Logistic Regression, Ngrams](https://youtu.be/hp2ipC5pW4I)
 76 | 
 77 | ### Word frequency count
 78 | - in Jupyter notebook, type `?? URLs` to pull up documentation
 79 | - `itos` = integer to string  [is type list]
 80 | - `stoi` = string to integer  [is type dictionary], dict is good to search by string, 
 81 | - `movie_reviews.vocab.itos[230:240]` are ordered by **frequency**
 82 | - `movie_reviews.vocab.stoi['language']` gives 917
 83 | - if you want human-readable, use strings
 84 | - if you want something the algorithm can process, use numbers
 85 | - it's not a 1-to-1 mapping because several words can have the same index
 86 |  - we can have a lot of words mapping to "unknown", many things will map to capital letter, 
 87 | 
 88 | ### Creating term document matrix
 89 | - a matrix with lots of zeroes is called **sparse**
 90 |     - you can save a lot of memory by only storing the non-zero values
 91 | - opposite of **sparse** matrices are **dense** matrices
 92 | 
 93 | ## Sparse matrix storage formats
 94 | - we know most words don't show up in most reviews
 95 | 
 96 | ### coordinate-wise (scipy calls COO)
 97 |     - store 3 values:  row in matrix, col in matrix and the value of that entry
 98 |     - instead of full matrix size (say 10x10), you only store 3 items for each entry (x_i, y_i, entry)
 99 |     - rows or columns need not be ordered in any way
100 | 
101 | ### compressed sparse row (CSR)
102 | - stores column and entry
103 | - assigns row pointer, and only changes it when it moves to next row
104 | - list of row pointers is shorter than for coordinate-wise storage
105 | - if you are accessing data by row a lot, this makes it easier
106 | - it's not as easy to access columns, and that would require more calculations
107 | 
108 | ### compressed sparse column (CSC)
109 | - similar to CSR, but uses column
110 | 
111 | There are a lot of different **Sparse Matrix Compression Formats**.  
112 | - Coordinate format is the most intuitive
113 | 
114 | Advantage of CSR method over Coordinate-wise method:  
115 | - the number of operations to perform matrix-vector multiplication in both storage method are the same ...
116 | - However: the number of **memory accesses** is reduced (by 2 to be exact) in the CSR method
117 | 
118 | 
119 | ---
120 | 
121 | # Video 5:  [Sentiment Classification of Movie Reviews: NB, LR, Ngrams](https://youtu.be/dt7sArnLo1g)
122 | [Notebook](https://github.com/fastai/course-nlp/blob/master/3-logreg-nb-imdb.ipynb)  
123 | 
124 | 
125 | 


--------------------------------------------------------------------------------
/courses/nlp/videos_06_to_10.md:
--------------------------------------------------------------------------------
 1 | # NLP:  Videos 6 to 10
 2 | 
 3 | ## Video 6:  [Derivation of Naive Bayes and Numerical Stability](https://youtu.be/z8-Tbrg1-rE)
 4 | - use conda to install fastai library
 5 | - how computers store numbers
 6 | 
 7 | ### regex
 8 | - use `assert` to check test cases
 9 | - instead of writing `0 1 2 3 4 5 6 7 8 9` we can write `[0-9]` or `\d`
10 | 
11 | 
12 | ## Video 7:  [Revisiting Naive Bayes, and Regex](https://youtu.be/Q1zLqfnEXdw)
13 | - revisiting Naive Bayes via spreadsheet
14 | - 
15 | 
16 | ## Video 8:  [Intro to Language Modeling](https://youtu.be/PNNHaQUQqW8)
17 | - notebook 5
18 | - 
19 | 
20 | ## Video 9:  [Transfer learning](https://youtu.be/5gCQvuznKn0)
21 | 
22 | A **Bloom filter** is a data structure designed to tell you, rapidly and memory-efficiently, whether an element is present in a set. The price paid for this efficiency is that a Bloom filter is a probabilistic data structure: it tells us that the element either definitely is not in the set or may be in the set.
23 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/README.md:
--------------------------------------------------------------------------------
 1 | # Udacity:  PyTorch Scholarship Challenge
 2 | 
 3 | ## Useful Links
 4 | - [Slack](https://pytorchfbchallenge.slack.com/messages/CDBRFM534/details/)
 5 | - [List of Lessons](https://classroom.udacity.com/nanodegrees/nd188/parts/ef29955b-1133-473a-a46f-c0696c865f97)
 6 | - [Udacity home](https://classroom.udacity.com/me)
 7 | - [Udacity Program Manager page](https://sites.google.com/udacity.com/pytorch-scholarship-facebook/home?bsft_eid=b79c3be9-39ba-50c5-c5c6-a0855c187059&utm_campaign=sch_600_2018-11-09_ndxxx_pytorch-firstday_na&utm_source=blueshift&utm_medium=email&utm_content=sch_600_2018-11-09_ndxxx_pytorch-firstday_na&bsft_clkid=183339b1-e50a-4fde-b1ce-2c28e575da50&bsft_uid=806e445b-d051-4ad1-b190-fa3b2c617935&bsft_mid=3978c5d6-05bb-4c5a-8977-b6a49db0ac22)
 8 | - GitHub [Udacity](https://github.com/udacity/deep-learning-v2-pytorch)
 9 | 
10 | ## Goals
11 | 
12 | 
13 | ## Lessons
14 | - [x] Setup:  slack, repo 
15 | - [x] Lesson 0 Welcome 
16 | - [x] Lesson 1 Intro to NN (2 hrs)
17 | - [x] Lesson 2 Talking PyTorch with Soumith (30 min)
18 | - [x] Lesson 3 Intro to PyTorch (2 hrs)
19 | - [x] Lesson 4 CNN (5 hrs)
20 | - [x] Lesson 5 Style Transfer (5 hrs)
21 | - [ ] Lesson 6 RNN (5 hrs)
22 | - [ ] Lesson 7 Sentiment Prediction with RNN (2 hrs)
23 | - [ ] Lesson 8 Deploying PyTorch Models (30 min)
24 | - [ ] Lab Challenge
25 | 
26 | ## Course Outline
27 | In this course, you'll learn the basics of deep neural networks and how to build various models using PyTorch. You'll get hands-on experience building state-of-the-art deep learning models.
28 | 
29 | ### 1.  Introduction to Neural Networks
30 |   - Learn the concepts behind deep learning and how we train deep neural networks with backpropagation.
31 | 
32 | ### 2.  Talking PyTorch with Soumith Chintala
33 |   - Cezanne Camacho and Soumith Chintala, the creator of PyTorch, chat about the past, present, and future of PyTorch.
34 | 
35 | ### 3.  Introduction to PyTorch
36 |   - Learn how to build deep neural networks with PyTorch
37 |   - Build a state-of-the-art model using a pre-trained network that classifies cat and dog images
38 | 
39 | ### 4.  Convolutional Neural Networks
40 |   - Here you'll learn about convolutional neural networks, powerful architectures for solving computer vision problems.
41 |   - Build and train an image classifier from scratch to classify dog breeds.
42 | 
43 | ### 5.  Style Transfer
44 |   - Use a trained network to transfer the style of one image to another image
45 |   - Implement the style transfer model from Gatys et al.
46 | 
47 | ### 6.  Recurrent Neural Networks
48 |   - Learn how to use recurrent neural networks to learn from sequences of data such as time series
49 |   - Build a recurrent network that learns from text and generates new text one character at a time
50 | 
51 | ### 7.  Sentiment Prediction with an RNN
52 |   - Build and train a recurrent network that can classify the sentiment of movie reviews
53 | 
54 | ### 8.  Deploying PyTorch Models
55 |   - Learn how to use PyTorch's Hybrid Frontend to convert models from Python to C++ for use in production
56 | 
57 | ---
58 | 
59 | ## References
60 | - [Convolutional Neural Networks (CNNs / ConvNets)](http://cs231n.github.io/convolutional-networks/#conv)
61 | - [Joel Grus - Livecoding Madness - Let's Build a Deep Learning Library](https://www.youtube.com/watch?v=o64FV-ez6Gw)
62 | 
63 | ## Recommendations from Udacity
64 | 1. Stanford NLP class (using PyTorch):* http://web.stanford.edu/class/cs224n/
65 | 2. UC-Berkeley CV Class (also using PyTorch* – will be published as open courseware): https://inst.eecs.berkeley.edu/~cs280/sp18/
66 | 3. Colab now supports native PyTorch* – give it a try by importing torch torchvision, etc.. and then changing to the GPU backend: https://colab.research.google.com/notebooks/welcome.ipynb#recent=true
67 | 4. EE-559 at EPFL (including slides, code, etc..) - https://fleuret.org/ee559/
68 | 5. Ecosystem projects: https://pytorch.org/ecosystem
69 | 6. Newer tutorial by Jeremy Howard and Rachel Thomas - https://pytorch.org/tutorials/beginner/nn_tutorial.html
70 | 
71 | *A few folks they follow who are awesome:*  
72 | - Chris Manning (Stanford)
73 | - Alyosha Efros (UC-Berkeley)
74 | - Yann Lecun (FB and NYU)
75 | - Smerity (Stephen Merity) https://twitter.com/Smerity?lang=en
76 | - Andrey Karpathy (Telsa)
77 | - Bryan Catanzaro https://twitter.com/ctnzr?lang=en
78 | - Delip Rao https://twitter.com/deliprao?lang=en
79 | - Lisha Li https://twitter.com/lishali88?lang=en
80 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/images/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/images/cnn_formulas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/udacity_pytorch/images/cnn_formulas.png


--------------------------------------------------------------------------------
/courses/udacity_pytorch/notes.md:
--------------------------------------------------------------------------------
1 | # PyTorch
2 | 
3 | IR = Intermediate Representation
4 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/orientation.md:
--------------------------------------------------------------------------------
 1 | # Orientation
 2 | 
 3 | ## Info
 4 | - 12-Nov-2018
 5 | - YouTube:  https://www.youtube.com/watch?v=bHpZfvVQI3g
 6 | 
 7 | ## Contact
 8 | - DM in Slack to give feedback (or via AMA)
 9 | 
10 | ## to do
11 | - download the Udacity app
12 | - Slack:  quality over quantity
13 |   - step up, leadership
14 |   
15 | ## Tips
16 | - add calendar to personal calendar
17 | - if you miss an AMA or slack, it is all archived on PyTorch challenge site.
18 | - attitude of finishing way ahead of schedule, try to get it done in 2-4 weeks
19 |   - put in whatever extra time you have available
20 |   - stay a part of the community all the way to the end
21 |   - 
22 | ## Phase 1 Resources & Programs
23 | - check out bios of Alumni Volunteers
24 | - team of 3 on staff, but with 10,000 students
25 | - post questions on Slack
26 | - Study groups, regional, community tab on PyTorch challenge site
27 | - .inTandem
28 |   - role of:  mentor, student, study buddy
29 | - Social Media tweets, scholarship pride, daily hashtags
30 | 
31 | ## Questions
32 | - how do we turn a number (say 0.7+0.8 = 1.5) from 1.5 to a number between 0 and 1?
33 | - Use **sigmoid** function
34 | - sigmoid(1.5) = 0.82
35 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/pytorch_1_nanodegree.md:
--------------------------------------------------------------------------------
 1 | # Lesson 1:  Welcome
 2 | 
 3 | ## Phase 2: Deep Learning Nanodegree Scholarship Opportunity
 4 | You have the chance to qualify for a follow-up scholarship for a Nanodegree program. These scholarships will be awarded based on
 5 | - your progress and performance in the initial 2 month challenge course
 6 | - as well as your **contributions to the student community.**
 7 | 
 8 | So, be sure to:  
 9 | - cover all the concepts in the course
10 | - complete all exercises along the way
11 | - and help your fellow students by answering their questions in the forums or in Slack
12 | 
13 | ## Participation
14 | 
15 | - We've created a Slack Workspace especially for this program so that you have the opportunity to interact with one another in a shared community. We encourage your to use Slack:  
16 | - to ask questions and 
17 | - receive technical help from your classmates and alumni volunteers
18 | - participate in events and **attend AMA (Ask Me Anything)** sessions with the Scholarship Team.
19 | 
20 | ## Developing an AI Application
21 | After training and optimizing your model, you'll upload the saved network to one of our workspaces. Your model will receive a score based on its accuracy predicting flower species from a test set. This **score will be used in our decision process for awarding scholarships.**
22 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/pytorch_3.md:
--------------------------------------------------------------------------------
 1 | # PyTorch
 2 | 
 3 | - `weights.reshape(a, b)`  will return a new tensor with the same data as `weights` with size `(a, b)` sometimes, and sometimes a clone, as in it copies the data to another part of memory 
 4 | - `weights.resize_` underscore at end means this method is an in-place operation
 5 | 
 6 | ## Universal Function Approximator
 7 | - 
 8 | 
 9 | ## Loss Function (Cost Function)
10 | - it is a measure of our prediction error
11 | - the whole goal is to adjust our network parameters to minimize our loss
12 | - we do this by using a process called **gradient descent**
13 | 
14 | ## Gradient
15 | - the gradient is the slope of the loss function with respect to our parameters
16 | - the gradient always points in the direction of the *fastest change*
17 | - so, if we have a mountain, the gradient is always going to point up the mountain
18 | - so, you can imagine our loss function being like this mountain where we have a high loss up here, and we have a low loss down here
19 | - so, we know that we want to get to the minimum of our loss when we minimize our loss, and so we want to go downwards
20 | - and so, basically, the gradient points upwards and so, we just go the opposite direction. So, we go in the direction of the negative gradient
21 | - and then, if we keep following this down, then eventually we get to the bottom of this mountain, the **lowest loss**
22 | - with multi-layered neural networks, we use an algorithm called backpropagation to do this
23 | 
24 | ## Backpropagation
25 | - backprop is really just an application of the chain rule of calculus
26 | - So, if you think about it, when we pass in some data, some input into our network, it goes through this forward pass through the network to calculate our loss
27 | - So, we pass in some data, some feature input x
28 |   - and then it goes through this linear transformation which depends on our weights and biases.  
29 |   - And then through some activation function like sigmoid
30 |   - through another linear transformation with some more weights and biases
31 |   - and then that goes in [last layer], and from that we calculate our loss
32 | - So, if we make a small change in our weights (say in the first layer), it's going to propagate through the network and end up, like results in, a small change in our loss.
33 | - So you can kind of think of this as a chain of changes
34 | - So, with backprop, we actually use these same changes, but we go in the opposite direction
35 | - So, for each of these operations like the loss and the linear transformation (L2), and the sigmoid activation function, there's always going to be some derivative, some gradient between the outputs and the inputs
36 | - And so what we do, is we take each of the gradients for these operations and we pass them backwards through the network.  
37 | - At each step, we multiply the incoming gradient with the gradient of the operation itself.  
38 | - So, for example, just kind of starting at the end with the loss 
39 | - so we pass this gradient through the loss, dl/dL2, so this is the gradient of the loss with respect to the second linear transformation
40 | - and then we pass that backwards again and if we multiply it by the loss of this L2, so this is the linear transformation with respect to the outputs of our activation function, that gives us the gradient for this operation
41 | - And if you multiply this gradient by the gradient coming from the loss, then we get the total gradient for both of these parts 
42 | - and this gradient can be passed back to this softmax function
43 | - So, as the general process for backpropagation, we take our gradients, we pass it backwards to the previous operation, multiply it by the gradient there, and then pass that total gradient backwards.
44 | - So, we just keep doing that through each of the operations in our network. 
45 | 
46 | ## Losses in PyTorch
47 | - PyTorch provide a lot of losses, including the cross entropy loss
48 | - `criterion = nn.CrossEntropyLoss`
49 | - Cross entropy loss is used in classification problems
50 | - So, if we wanted to use cross-entropy, we just say `criterion = nn.crossEntropyLoss` and create that class
51 | - So, one thing to note, if you look at the documentation for cross-entropy loss, you'll see that it actually wants the scores, like the logits, of our network, as the input to the cross-entropy loss.  
52 | - So, you'll be using this with an output such as softmax, which gives us this nice probability distribution.  But, for computational reasons, then it's generally better to use the logits which are the input to the softmax as the input to this loss.
53 | - So, the input is expected to be the scores for each class, and not the probabilities themselves.  
54 | - So, first I am going to import the necessary modules.  
55 | 
56 | ## Metrics
57 | - Accuracy
58 | - Precision
59 | - Recall
60 | - Top-5 Error Rate
61 | - `ps.topk(1)` returns the highest value (or probability) for a class
62 | 
63 | ## Transfer Learning
64 | - Using a pre-trained network on images not in the training set is called transfer learning. 
65 | - Most of the pretrained models require the input to be 224x224 images.
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/pytorch_4_cnn.md:
--------------------------------------------------------------------------------
 1 | # Convolutional Neural Networks
 2 |  
 3 | 
 4 | ## Normalization
 5 | - will help out networks train better
 6 | - for the MNIST data, we divide each pixel value by 255.  Our normalized range will be from 0 to 1
 7 | - because neural networks rely on gradient calculations, normalizing the pixels helps the gradient calculations stay consistent and not get so large that they slow down or prevent a network from training
 8 | - 
 9 | 
10 | ## Normalizing image inputs
11 | Data normalization is an important pre-processing step. It ensures that each input (each pixel value, in this case) comes from a standard distribution. That is, the range of pixel values in one input image are the same as the range in another image. This standardization makes our model train and reach a minimum error, faster!
12 | 
13 | Data normalization is typically done by subtracting the mean (the average of all pixel values) from each pixel, and then dividing the result by the standard deviation of all the pixel values. Sometimes you'll see an approximation here, where we use a mean and standard deviation of 0.5 to center the pixel values. Read more about the [Normalize transformation in PyTorch](https://pytorch.org/docs/stable/torchvision/transforms.html#transforms-on-torch-tensor).
14 | 
15 | The distribution of such data should resemble a [Gaussian function](http://mathworld.wolfram.com/GaussianFunction.html) centered at zero. For image inputs we need the pixel numbers to be positive, so we often choose to scale the data in a normalized range [0,1].
16 | 
17 | ## MLP = Multi-layer perceptron
18 | 
19 | ### Validation Set: Takeaways
20 | We create a validation set to:  
21 | - Measure how well a model generalizes, during training
22 | - Tell us when to stop training a model; when the validation loss stops decreasing (and especially when the validation loss starts increasing and the training loss is still decreasing)
23 | 
24 | ## MLP vs CNN
25 | ### MLP  
26 | - use only fully connected layers
27 | - only accept vectors as input
28 | 
29 | ### CNN
30 | - also use sparsely connected layers
31 | - also accept matrices as input
32 | 
33 | ### openCV Library
34 | OpenCV is a computer vision and machine learning software library that includes many common image analysis algorithms that will help us build custom, intelligent computer vision applications. To start with, this includes tools that help us process images and select areas of interest! The library is widely used in academic and industrial applications; from their site, OpenCV includes an impressive list of users:  
35 | > “Along with well-established companies like Google, Yahoo, Microsoft, Intel, IBM, Sony, Honda, Toyota that employ the library, there are many startups such as Applied Minds, VideoSurf, and Zeitera, that make extensive use of OpenCV.”
36 | 
37 | So, note, how we `import cv2` in the next notebook and use it to create and apply image filters!
38 | 
39 | ## Define a Convolutional Layer in PyTorch
40 | ```python
41 | self.conv1 = nn.Conv2d(depth_of_input, desired_depth_of_output,
42 |     kernel_size, stride = 1, padding = 0)
43 | ```
44 | 
45 | - 3 channels of input:  R, G, B  
46 | - we may want to produce 16 images (or "filters")
47 | - kernel size:  3x3 filter
48 | - stride generally set to 1 (often the default value)
49 | - padding, set it so convolutional layer will have same height and width as previous layer
50 | ```python
51 | self.conv1 = nn.Conv2d(3, 16, 3, stride = 1, padding = 0)
52 | ```
53 | 
54 | ### Max Pooling
55 | - max pooling follows every 1 or 2 convolutional layers in the sequence
56 | - To define a max pooling layer, you only need to define the filter size and stride.
57 | ```python
58 | self.maxpool = nn.MaxPool2d(kernel_size, stride)
59 | ```
60 | - most common settings:
61 | ```python
62 | self.maxpool = nn.MaxPool2d(2, 2)
63 | ```
64 | 
65 | ### Q
66 | - If you want to define a convolutional layer that is the same x-y size as an input array, what padding should you have for a kernel_size of 7? (You may assume that other parameters are left as their default values.)
67 | - padding=3
68 | - Yes! If you overlay a 7x7 kernel so that its center-pixel is at the right-edge of an image, you will have 3 kernel columns that do not overlay anything! So, that's how big your padding needs to be.
69 | 
70 | ## Convolutional Layers
71 | - We typically define a convolutional layer in PyTorch using nn.Conv2d, with the following parameters, specified:
72 | - `nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)`
73 | - `in_channels` refers to the depth of an input. For a grayscale image, this depth = 1
74 | - `out_channels` refers to the desired depth of the output, or the number of filtered images you want to get as output
75 | - `kernel_size` is the size of your convolutional kernel (most commonly 3 for a 3x3 kernel)
76 | - `stride` and `padding` have default values, but should be set depending on how large you want your output to be in the spatial dimensions x, y
77 | Read more about Conv2d in the documentation.
78 | 
79 | ## Pooling Layers
80 | - Maxpooling layers commonly come after convolutional layers to shrink the x-y dimensions of an input, read more about pooling layers in PyTorch, here.
81 | 
82 | ## forward
83 | Here, we see that poling layer being applied in the forward function.
84 | ```python
85 | x = F.relu(self.conv1(x))
86 | x = self.pool(x)
87 | ```
88 | 
89 | ![cnn](images/cnn_formulas.png)
90 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/pytorch_5_style_transfer.md:
--------------------------------------------------------------------------------
 1 | # Style Transfer
 2 | 
 3 | ## Gram Matrix
 4 | - non-localized information is information that would still be there even if the image was shuffled around in space
 5 | - style:  prominent colors and textures of an image
 6 | - gram matrix:  whose values indicate the similarities between the layers
 7 |   - dimensions don't depend on the input image
 8 |   - just one mathematical way of representing shared or prominent styles
 9 | - style itself is kind of an abstract idea.  but the gram matrix is the most widely used in practice
10 | 
11 | ## Style Loss
12 | - the smaller the alpha/beta ratio, the more stylistic effect you will see.
13 | - alpha:  content weight
14 | - beta:  style weight
15 | 
16 | ## VGG Features
17 | 
18 | ## Lesson 8 Notebook (Exercise)
19 | - https://github.com/udacity/deep-learning-v2-pytorch/blob/master/style-transfer/Style_Transfer_Solution.ipynb
20 | 
21 | 


--------------------------------------------------------------------------------
/courses/udacity_pytorch/pytorch_6_rnn.md:
--------------------------------------------------------------------------------
 1 | # RNN
 2 | 
 3 | ##  recurrent neural networks (RNNs) 
 4 | RNNs are designed specifically to learn from sequences of data by passing the hidden state from one step in the sequence to the next step in the sequence, combined with the input.  
 5 | 
 6 | ## long short-term memory (LSTM)
 7 | LSTMs are an improvement the RNNs, and are quite useful when our neural network needs to switch between remembering recent things, and things from long time ago.
 8 | 
 9 | 
10 | But first, I want to give you some great references to study this further. There are many posts out there about LSTMs, here are a few of my favorites:
11 | 
12 | Chris Olah's LSTM post
13 | Edwin Chen's LSTM post
14 | Andrej Karpathy's blog post on RNNs
15 | Andrej Karpathy's lecture on RNNs and LSTMs from CS231n
16 | 
17 | ## Recurrent Layers
18 | Here is the documentation for the main types of recurrent layers in PyTorch. Take a look and read about the three main types: RNN, LSTM, and GRU.
19 | 
20 | - The hidden state should have dimensions: (num_layers, batch_size, hidden_dim).
21 | 
22 | ---
23 | 
24 | ### `__init__` explanation
25 | First I have an **embedding layer**, which should take in the size of our vocabulary (our number of integer tokens) and produce an embedding of `embedding_dim` size. So, as this model trains, this is going to create and embedding lookup table that has as many rows as we have word integers, and as many columns as the embedding dimension.
26 | 
27 | Then, I have an **LSTM layer**, which takes in inputs of `embedding_dim` size. So, it's accepting embeddings as inputs, and producing an output and hidden state of a hidden size. I am also specifying a number of layers, and a dropout value, and finally, I’m setting `batch_first` to True because we are using DataLoaders to batch our data like that!
28 | 
29 | Then, the LSTM outputs are passed to a dropout layer and then a fully-connected, linear layer that will produce `output_size` number of outputs. And finally, I’ve defined a sigmoid layer to convert the output to a value between 0-1.
30 | 
31 | Feedforward behavior
32 | Moving on to the `forward` function, which takes in an input `x` and a `hidden state`, I am going to pass an input through these layers in sequence.
33 | 
34 | ```python
35 | def forward(self, x, hidden):
36 |         """
37 |         Perform a forward pass of our model on some input and hidden state.
38 |         """
39 |         batch_size = x.size(0)
40 | 
41 |         # embeddings and lstm_out
42 |         embeds = self.embedding(x)
43 |         lstm_out, hidden = self.lstm(embeds, hidden)
44 | 
45 |         # stack up lstm outputs
46 |         lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
47 | 
48 |         # dropout and fully-connected layer
49 |         out = self.dropout(lstm_out)
50 |         out = self.fc(out)
51 | 
52 |         # sigmoid function
53 |         sig_out = self.sig(out)
54 | 
55 |         # reshape to be batch_size first
56 |         sig_out = sig_out.view(batch_size, -1)
57 |         sig_out = sig_out[:, -1] # get last batch of labels
58 | 
59 |         # return last sigmoid output and hidden state
60 |         return sig_out, hidden
61 | ```
62 | 
63 | ### `forward` explanation
64 | So, first, I'm getting the batch_size of my input x, which I’ll use for shaping my data. Then, I'm passing x through the embedding layer first, to get my embeddings as output
65 | 
66 | These embeddings are passed to my lstm layer, alongside a hidden state, and this returns an lstm_output and a new hidden state! Then I'm going to stack up the outputs of my LSTM to pass to my last linear layer.
67 | 
68 | Then I keep going, passing the reshaped lstm_output to a dropout layer and my linear layer, which should return a specified number of outputs that I will pass to my sigmoid activation function.
69 | 
70 | Now, I want to make sure that I’m returning only the last of these sigmoid outputs for a batch of input data, so, I’m going to shape these outputs into a shape that is batch_size first. Then I'm getting the last bacth by called `sig_out[:, -1], and that’s going to give me the batch of last labels that I want!
71 | 
72 | Finally, I am returning that output and the hidden state produced by the LSTM layer.
73 | 
74 | ### `init_hidden`
75 | That completes my forward function and then I have one more: init_hidden and this is just the same as you’ve seen before. The hidden and cell states of an LSTM are a tuple of values and each of these is size (n_layers by batch_size, by hidden_dim). I’m initializing these hidden weights to all zeros, and moving to a gpu if available.
76 | 
77 | ```python
78 | def init_hidden(self, batch_size):
79 |         ''' Initializes hidden state '''
80 |         # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
81 |         # initialized to zero, for hidden state and cell state of LSTM
82 |         weight = next(self.parameters()).data
83 | 
84 |         if (train_on_gpu):
85 |             hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
86 |                   weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
87 |         else:
88 |             hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
89 |                       weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
90 | 
91 |         return hidden
92 | ```
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/courses/v2-dl1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v2-dl1/.DS_Store


--------------------------------------------------------------------------------
/courses/v2-dl1/README.md:
--------------------------------------------------------------------------------
1 | # Deep Learning Homework
2 | 
3 | These are the recommended tasks:
4 | - go through the notebooks presented in class, read the extra text that is in the notebook and try changing the hyperparameters to better understand them
5 | - choose a dataset of your own and replicate the notebook
6 | - get started on the Kaggle competitions
7 | - read [blogs](https://github.com/reshamas/fastai_deeplearn_part1/blob/master/resources.md)
8 | - it is expected to watch the videos 2 to 4 times (from Lesson 1 to Lesson 7) to grasp concepts more fully
9 | 


--------------------------------------------------------------------------------
/courses/v2-dl1/lesson_6_x.md:
--------------------------------------------------------------------------------
 1 | # Lesson 6
 2 | 
 3 | 
 4 | [Wiki: Lesson 6](http://forums.fast.ai/t/wiki-lesson-6/8629)
 5 | 
 6 | Notebooks:  
 7 | * [lesson5-movielens.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson5-movielens.ipynb)
 8 | * [lesson6-rnn.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson6-rnn.ipynb)
 9 | * [lesson3-rossman.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson3-rossman.ipynb)
10 | * [lesson6-sgd.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson6-sgd.ipynb) 
11 | 
12 | ## Blogs to Review
13 | 
14 | * [Optimization for Deep Learning Highlights in 2017](http://ruder.io/deep-learning-optimization-2017/index.html) by Sebastian Ruder (researcher, not USF student)  
15 |   - this blog covers SGD, ADAM, weight decays :red_circle: (read it!)
16 | 
17 | * [Deep Learning #4: Why You Need to Start Using Embedding Layers](https://towardsdatascience.com/deep-learning-4-embedding-layers-f9a02d55ac12)
18 | 
19 | 
20 | ## Papers to Review
21 | * [Entity Embeddings of Categorical Variables](https://www.slideshare.net/sermakarevich/entity-embeddings-of-categorical-variables)
22 | 
23 | ## Summary of Course so Far
24 | - our penultimate lesson
25 | 
26 | 
27 | ### Dimensions
28 | - we can compress high dimensional spaces to a few dimensions, using PCA (Principal Component Analysis)
29 | - PCA is a linear technique
30 | - Rachel's computational linear algebra covers PCA
31 | - PCA similar to SVD (singular value decomposition)
32 | - find 3 linear combinations of the 50 dimensions which capture as much of the variation as possible, but different from each other
33 | ```python
34 | from sklearn.decomposition import PCA
35 | pca = PCA(n_components=3)
36 | movie_pca = pca.fit(movie_emb.T).components_
37 | ```
38 | 
39 | ### MAPE (Mean Average Percent Error)
40 | - can give folks at work random forest with embeddings without using neural networks
41 | - you can train a neural net with embeddings; everyone else in organization can chuck that into GBM or random forests or KNN
42 | - can give power of neural nets to everyone in organization without everyone having to do fastai table
43 | - embedding can be in SQL table
44 | - GBM and random forests learn a lot quicker than neural nets do
45 | - visualizing embeddings can be interesting
46 |   - first, see things you expect to see
47 |   - then, try seeing things that were not expected (some clusterings)
48 | - Q:  skipgrams, a type of embedding?
49 |   - A:  skipgrams for NLP
50 |   - say we have an unlabeled dataset, such as Google Books
51 |   - the best way, in my opinion to turn an unlabeled (or unsupervised) problem into a labeled problem is to invent some labels
52 |   - what they did in Word2vec is:  here's a sentence with 11 words in it: _ _ _ _ _ _ _ _ _ _ _ 
53 |     - let's delete the middle word and replace it with a random word
54 |     - example:  replace "cat" with "justice"
55 |     - sentence:  the cute little **CAT** sat on the fuzzy mat ---> **assign label = 1**
56 |     - sentence:  the cute little **JUSTICE** sat on the fuzzy mat ---> **assign label = 0**
57 |     - ! now we have something we can build a machine learning model on
58 |     - quick, shallow learning, end up with embeddings with linear characteristics
59 |     
60 | ## NLP
61 | - for something more predictive, use neural net
62 | - we need to move past Word2Vec and GLoVe, these linear based methods; these embeddings are way less predictive than with embeddings learned with deep models
63 | - nowadays, **unsupervised learning** is really **fake task labeled learning**
64 | - we need something where the type of relationships it's going to learn are the types we care about.
65 | 
66 | ## Fake Tasks
67 | - in computer vision, let's take an image and use an unusal data augmentation, such as recolor them too much, and ask neural net to predict augmented and non-augmented image
68 | - use the best fake task you can
69 | - a bad "fake task" is an **auto-encoder**; reconstruct my input using neural net with some activations deleted; most uncreative task, but it works surprisingly well
70 | - we may cover this unsupervised learning in Part 2, if there is interest
71 | 
72 | `41:00` back to Rossman notebook:  
73 | - https://github.com/fastai/fastai/blob/master/courses/dl1/lesson3-rossman.ipynb
74 | - lot of details of this notebook are covered in the ML course
75 | 
76 | #### Shallow Learning vs Deep Learning
77 | - shallow learning means it doesn't have a hidden layer
78 | 
79 | ## Recurrent Neural Networks
80 | https://github.com/fastai/fastai/blob/master/courses/dl1/lesson6-sgd.ipynb  
81 | 
82 | 
83 | * [lesson6-rnn.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson6-rnn.ipynb)
84 | 
85 | 
86 | - Machine Learning course - building stuff up from the foundations  
87 | - Deep Learning course - best practices, top down  
88 | - Lessons 9, 10, 11 of ML course:  create a neural net layer from scratch
89 | - 
90 | 
91 | 


--------------------------------------------------------------------------------
/courses/v2-dl1/lesson_7_x.md:
--------------------------------------------------------------------------------
 1 | # Lesson 7  
 2 | live 11-Dec-2017
 3 | 
 4 | 
 5 | [Wiki: Lesson 7](http://forums.fast.ai/t/lesson-7-wiki-thread/8847/1)
 6 | 
 7 | Notebooks:  
 8 | * [lesson6-rnn.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson6-rnn.ipynb)
 9 | * [lesson7-cifar10.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson7-cifar10.ipynb)
10 | * [lesson7-CAM.ipynb](https://github.com/fastai/fastai/blob/master/courses/dl1/lesson7-CAM.ipynb)
11 | 
12 | ---
13 | ## Other links
14 | - WILD ML RNN Tutorial - http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-1-introduction-to-rnns/
15 | - Chris Olah on LSTM http://colah.github.io/posts/2015-08-Understanding-LSTMs/
16 | - More from Olah and others - https://distill.pub/
17 | - [BatchNorm paper](https://arxiv.org/pdf/1502.03167.pdf)
18 | - [Laptop recommendation](https://youtu.be/EKzSiuqiHNg?t=1h1m51s); [Surface Book 2 15 inch](https://www.cnet.com/products/microsoft-surface-book-2/review/)
19 | 
20 | 
21 | ## Theme of Part 1
22 | - classification and regression with deep learning
23 | - identifying best practices
24 | - here are 3 lines of code for image classification
25 | - first 4 lessons were NLP, structured data, collaborative filtering
26 | - last 3 lessons were above topics in more detail, more detailed code
27 | 
28 | ## Theme of Part 2
29 | - generative modeling
30 | - creating a sentence, image captioning, neural translation
31 | - creating an image, style transfer
32 | - moving from best practices to speculative practices
33 | - how to read a paper and implement from scratch
34 | - does not assume a particular math background, but be prepared to dig through notation and convert to code
35 | 
36 | ## RNN
37 | - not so different
38 | - they are like a fully connected network
39 | 
40 | ## Batch Size
41 | `bs=64` means data is split into 65 chunks of data.  
42 | NOT batches of size 64!  
43 | 
44 | ## Data Augmentation for NLP
45 | - JH can't talk about that; doesn't know a good way
46 | - JH will do further study on that
47 | 
48 | ## CIFAR 10
49 | - well-known dataset in academia:  https://www.cs.toronto.edu/~kriz/cifar.html
50 | - small datasets are much more interesting than ImageNet
51 | - often, we're looking at 32x32 pixels (example:  lung cancer image)
52 | - often, it's more challenging, and more interesting
53 | - we can run algorithms much more quickly, and it's still challenging
54 | - you can get the data by:  `wget http://pjreddie.com/media/files/cifar.tgz` (provided in form we need)
55 | - this is mean, SD per channel; try to replicate on your own
56 | ```python
57 | classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
58 | stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))
59 | ```  
60 | - Kerem's notebook on how different optimizers work:  https://github.com/KeremTurgutlu/deeplearning/blob/master/Exploring%20Optimizers.ipynb
61 | - to improve model, we'll next replace our fully connected model (with 1 hidden layer) with a CNN
62 | - `nn.Conv2d(layers[i], layers[i + 1], kernel_size=3, stride=2)`
63 |   - `layers[i]` number of features coming in
64 |   - `layers[i + 1]` number of features coming out
65 |   - `stride=2` is a "stride 2 convolution"
66 |   - it has similar effect to `maxpooling`; reduces the size of the layers
67 | - `self.pool = nn.AdaptiveMaxPool2d(1)` 
68 |   - standard now for state-of-the-art algorithms
69 |   - I'm not going to tell you how big an area to pool, I will tell you how big a resolution to create
70 |   - starting with 28x28:  Do a 14x14 adaptive maxpool; same as 2x2 maxpool with a 14x14 output
71 |   
72 |  ## BatchNorm (Batch Normalization)
73 |  - a couple of years old now
74 |  - makes it easier to train deeper networks
75 | 
76 | 
77 | ## Getting Ready for Part 2
78 | - assumes you have mastered all techniques introdued in Part 1
79 | - has same level of intensity as Part 1
80 | - people who did well in Part 2 last year watched each of the videos at least 3 times
81 | - make sure you get to the point where you can recreate the notebooks without watching the videos
82 | - try and recreate the notebooks using different datasets
83 | - keep up with the forum; recent papers, advances
84 | - you'll find less of it is mysterious; makes more sense; there will always be stuff you don't understand
85 | - Lessons 1 and 2 of Part 1 may seem trivial
86 | - people who succeed are those who keep working at it
87 | - hope to see you all in March
88 | - see you in the Forum
89 | 


--------------------------------------------------------------------------------
/courses/v2-dl2/README.md:
--------------------------------------------------------------------------------
1 | Deep Learning - Part 2
2 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/.DS_Store


--------------------------------------------------------------------------------
/courses/v3-dl1/.keep:
--------------------------------------------------------------------------------
1 | xxx
2 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Course Videos
 3 | https://course.fast.ai/videos
 4 | 
 5 | 
 6 | ## Get to Jupyter Notebook
 7 | - Go to localhost (run Jupyter Notebook):  
 8 | http://localhost:8080/tree
 9 | 
10 | ## Important Links
11 | - [Google Cloud Platform](http://course-v3.fast.ai/start_gcp.html)
12 |   - [GCP: update fastai, conda & packages](http://course-v3.fast.ai/start_gcp.html#step-4-access-fastai-materials-and-update-packages)
13 | 
14 | 
15 | [PyTorch Forums](https://discuss.pytorch.org)
16 | 
17 | ## Model Tuning Advice
18 | 
19 | no graph for learning rate finder:  means learning rate is too small
20 | 
21 | ### Seed for validation dataset
22 | ```python
23 | np.random.seed(42)
24 | data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2, ds_tfms=get_transforms(), size=224, num_workers=4)
25 | ```
26 | This means that every time I run this code, I will get the same validation set.
27 | 
28 | ### If errors are too high
29 | #### example of problem
30 | ```bash
31 | Total time: 00:13
32 | epoch  train_loss  valid_loss  error_rate       
33 | 1      12.220007   1144188288.000000  0.765957    (00:13)
34 | ```
35 | 
36 | #### example of solution
37 | ```python
38 | #learn.fit_one_cycle(6, max_lr=0.5)
39 | #learn.fit_one_cycle(6, max_lr=0.25)
40 | #learn.fit_one_cycle(6, max_lr=0.05)
41 | #learn.fit_one_cycle(6, max_lr=0.025)
42 | #learn.fit_one_cycle(6, max_lr=0.01)
43 | learn.fit_one_cycle(6, max_lr=0.001)
44 | ```
45 | 
46 | ### LR finder plot is blank
47 | #### 1.
48 | ```python
49 | learn.recorder.plot()
50 | # if plot is blank
51 | learn.recorder.plot(skip_start=0, skip_end=0)
52 | ```
53 | 
54 | #### 2.  reduce batch size
55 | - Reducing your batch size, in order to increase the number of batches.
56 | ```python
57 | np.random.seed(42)
58 | data = ImageDataBunch.from_folder(path, train=".", valid_pct=0.2, ds_tfms=get_transforms(), size=224, num_workers=4, bs=16)
59 | ```
60 | 
61 | You’re now overfitting. Try 10 epochs, then unfreeze, then 4 epochs.
62 | 
63 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/gcp_0_setup_notes.md:
--------------------------------------------------------------------------------
  1 | # GCP Setup notes
  2 | 
  3 | ## GCP
  4 | - https://cloud.google.com
  5 | - [Platform:  GCP](https://forums.fast.ai/t/platform-gcp/27375) (Discourse topic)
  6 | - [Tutorial](http://course-v3.fast.ai/start_gcp.html) to get started.
  7 | - [Complete Guide](https://arunoda.me/blog/ideal-way-to-creare-a-fastai-node) - starting with $0.2/hour
  8 | 
  9 | 
 10 | ## GCP (Google Cloud Compute)
 11 | - fastai instructions for GCP: 
 12 |   - http://course-v3.fast.ai/start_gcp.html
 13 | - Console:  
 14 |   - https://console.cloud.google.com/compute/instances?project=near-earth-comets-f8c3f&folder&organizationId
 15 | - 
 16 | 
 17 | ## Instance
 18 | ```bash
 19 | gcloud --version
 20 | ```
 21 | output:  
 22 | ```bash
 23 | Google Cloud SDK 222.0.0
 24 | bq 2.0.36
 25 | core 2018.10.19
 26 | gsutil 4.34
 27 | ```
 28 | 
 29 | ## Create an Instance on GCP
 30 | ```bash
 31 | % export IMAGE_FAMILY="pytorch-1-0-cu92-experimental"
 32 | % export ZONE="us-west2-b"
 33 | % export INSTANCE_NAME="my-fastai-instance"
 34 | % export INSTANCE_TYPE="n1-highmem-8"
 35 | % gcloud compute instances create $INSTANCE_NAME \
 36 |         --zone=$ZONE \
 37 |         --image-family=$IMAGE_FAMILY \
 38 |         --image-project=deeplearning-platform-release \
 39 |         --maintenance-policy=TERMINATE \
 40 |                 --accelerator='type=nvidia-tesla-p4,count=1' \
 41 |         --machine-type=$INSTANCE_TYPE \
 42 |         --boot-disk-size=200GB \
 43 |         --metadata='install-nvidia-driver=True' \
 44 |         --preemptible
 45 | Created [https://www.googleapis.com/compute/v1/projects/near-earth-comets-f8c3f/zones/us-west2-b/instances/my-fastai-instance].
 46 | NAME                ZONE        MACHINE_TYPE  PREEMPTIBLE  INTERNAL_IP  EXTERNAL_IP    STATUS
 47 | my-fastai-instance  us-west2-b  n1-highmem-8  true         10.168.0.2   35.235.122.68  RUNNING
 48 | % 
 49 | ```
 50 | 
 51 | ## Go to GCP Console and see that the instance has been created
 52 | - https://console.cloud.google.com/compute/instances?project=near-earth-comets-f8c3f&folder&organizationId
 53 | - Note that this will be the page you have to go to later to **STOP YOUR INSTANCE**.
 54 | 
 55 | ## Connect to GCP Instance
 56 | - Once this is done, you can connect to your instance from the terminal by typing:  
 57 | Example:  
 58 | ```bash
 59 | gcloud compute ssh --zone=$ZONE jupyter@$INSTANCE_NAME -- -L 8080:localhost:8080
 60 | ```
 61 | For me, it is:  
 62 | ```bash
 63 | gcloud compute ssh --zone=$ZONE jupyter@my-fastai-instance -- -L 8080:localhost:8080
 64 | ```
 65 | ###
 66 | My passphrase:  fastai
 67 | 
 68 | ### 
 69 | >my example  
 70 | 
 71 | ```bash
 72 | % gcloud compute ssh --zone=$ZONE jupyter@my-fastai-instance -- -L 8080:localhost:8080 
 73 | Updating project ssh metadata...⠧Updated [https://www.googleapis.com/compute/v1/projects/near-earth-comets-f8c3f].
 74 | Updating project ssh metadata...done.                                                                        
 75 | Waiting for SSH key to propagate.
 76 | Warning: Permanently added 'compute.7610414667562550937' (ECDSA) to the list of known hosts.
 77 | Enter passphrase for key '/Users/reshamashaikh/.ssh/google_compute_engine': 
 78 | Enter passphrase for key '/Users/reshamashaikh/.ssh/google_compute_engine': 
 79 | ======================================
 80 | Welcome to the Google Deep Learning VM
 81 | ======================================
 82 | 
 83 | Version: m10
 84 | Based on: Debian GNU/Linux 9.5 (stretch) (GNU/Linux 4.9.0-8-amd64 x86_64\n)
 85 | 
 86 | Resources:
 87 |  * Google Deep Learning Platform StackOverflow: https://stackoverflow.com/questions/tagged/google-dl-platform
 88 |  * Google Cloud Documentation: https://cloud.google.com/deep-learning-vm
 89 |  * Google Group: https://groups.google.com/forum/#!forum/google-dl-platform
 90 | 
 91 | To reinstall Nvidia driver (if needed) run:
 92 | sudo /opt/deeplearning/install-driver.sh
 93 | This image uses python 3.6 from the Anaconda. Anaconda is installed to:
 94 | /opt/anaconda3/
 95 | 
 96 | If anything need to be installed and used with Jupyter Lab please do it in the following way:
 97 | sudo /opt/anaconda3/bin/pip install <PACKAGE>
 98 | 
 99 | Linux my-fastai-instance 4.9.0-8-amd64 #1 SMP Debian 4.9.110-3+deb9u6 (2018-10-08) x86_64
100 | 
101 | The programs included with the Debian GNU/Linux system are free software;
102 | the exact distribution terms for each program are described in the
103 | individual files in /usr/share/doc/*/copyright.
104 | 
105 | Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent
106 | permitted by applicable law.
107 | jupyter@my-fastai-instance:~$ 
108 | ```
109 | 
110 | ### Commands to run
111 | 
112 | ```bash
113 | ls
114 | python -V
115 | conda --version
116 | pip list
117 | ```
118 | 
119 | ### Go to localhost (run Jupyter Notebook)
120 | http://localhost:8080/tree
121 | 
122 |     
123 |     
124 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/gcp_1_logging_in.md:
--------------------------------------------------------------------------------
  1 | # Logging in to GCP
  2 | 
  3 | ## Step 1:  GCP Console
  4 | 1.  Go to my [GCP console](https://console.cloud.google.com/compute/instances?project=near-earth-comets-f8c3f&folder&organizationId&duration=PT1H)
  5 | 2.  `Start` my instance, which is called `my-fastai-instance`
  6 | 
  7 | ## Step 2:  My Mac Terminal
  8 | 0.  `gcloud auth login`  May need to login in via Google chrome
  9 | 
 10 | 1.  Go to my terminal on the Mac, type this:  
 11 | ```bash
 12 | gcloud compute ssh --zone=$ZONE jupyter@my-fastai-instance -- -L 8080:localhost:8080
 13 | ```
 14 | ```bash
 15 | gcloud compute ssh --zone=us-west2-b jupyter@my-fastai-instance -- -L 8080:localhost:8080
 16 | ```
 17 | 
 18 | >Enter passphrase for key '/Users/reshamashaikh/.ssh/google_compute_engine': 
 19 | ```
 20 | xxxxx
 21 | ```
 22 | I will see this:  
 23 | ```bash
 24 | jupyter@my-fastai-instance:~$ 
 25 | ```
 26 | 
 27 | ## Updating
 28 | ### Important Links
 29 | - [Google Cloud Platform](http://course-v3.fast.ai/start_gcp.html)
 30 |   - [GCP: update fastai, conda & packages](http://course-v3.fast.ai/start_gcp.html#step-4-access-fastai-materials-and-update-packages)
 31 | 
 32 | ### Updating packages
 33 | ```bash
 34 | cd course-v3/
 35 | git pull
 36 | ```
 37 | ```bash
 38 | jupyter@my-fastai-instance:~/course-v3$ cd ..
 39 | jupyter@my-fastai-instance:~$ pwd
 40 | /home/jupyter
 41 | ```
 42 | ```bash
 43 | cd tutorials/fastai
 44 | git checkout .
 45 | git pull
 46 | ```
 47 | 
 48 | ## Update fastai library
 49 | ```bash
 50 | sudo /opt/anaconda3/bin/conda install -c fastai fastai
 51 | ```
 52 | ```bash
 53 | conda install -c fastai fastai
 54 | ```
 55 | 
 56 | ### get fastai version ---> in terminal
 57 | ```bash
 58 | pip list | grep fastai
 59 | ```
 60 | 
 61 | 
 62 | 
 63 | ---
 64 | ```bash
 65 | jupyter@my-fastai-instance:~/tutorials/fastai$ pip list | grep fastai
 66 | fastai                             1.0.12
 67 | ```
 68 | Fri, 11/12/18
 69 | ```bash
 70 | fastai                             1.0.18 
 71 | ```
 72 | Sat, 12/8/18
 73 | ```bash
 74 | fastai                             1.0.35
 75 | ```
 76 | Sat, 12/15/18
 77 | ```bash
 78 | From https://github.com/fastai/fastai
 79 |    7d617eda..af59fa03  master         -> origin/master
 80 |  * [new branch]        release-1.0.36 -> origin/release-1.0.36
 81 |  * [new branch]        release-1.0.37 -> origin/release-1.0.37
 82 |  * [new tag]           1.0.37         -> 1.0.37
 83 |  * [new tag]           1.0.36         -> 1.0.36
 84 |  * [new tag]           1.0.36.post1   -> 1.0.36.post1
 85 | ```
 86 | 
 87 | ### get fastai version ---> in Jupyter notebook
 88 | ```python
 89 | import torch
 90 | print(torch.__version__)
 91 | import fastai
 92 | print(fastai.__version__)
 93 | ```
 94 | 
 95 | ## Step 3:  Get to Jupyter Notebook
 96 | - Go to localhost (run Jupyter Notebook):  
 97 | http://localhost:8080/tree
 98 | 
 99 | ## Where am I working?
100 | ```bash
101 | jupyter@my-fastai-instance:~/projects$ pwd
102 | /home/jupyter/projects
103 | ```
104 | http://localhost:8080/tree/projects
105 | 
106 | 
107 | ## Step 4:  Shut down GCP instance in the console
108 | - Go to GCP console
109 | 
110 | ---
111 | 
112 | - `ImageBunch`
113 | - `TextDataBunch`
114 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/images/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/images/camel.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/camel.jpeg


--------------------------------------------------------------------------------
/courses/v3-dl1/images/camels_class.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/camels_class.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/camels_confusion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/camels_confusion.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/elephant1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/elephant1.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/elephant_cm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/elephant_cm.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/elephant_predict.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/elephant_predict.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/gcp1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/gcp1.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/horse.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/horse.jpeg


--------------------------------------------------------------------------------
/courses/v3-dl1/images/horses_txt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/horses_txt.png


--------------------------------------------------------------------------------
/courses/v3-dl1/images/nyc_group.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/nyc_group.jpeg


--------------------------------------------------------------------------------
/courses/v3-dl1/images/rs_camel.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/rs_camel.jpg


--------------------------------------------------------------------------------
/courses/v3-dl1/images/soumith.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/soumith.jpg


--------------------------------------------------------------------------------
/courses/v3-dl1/images/south_africa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v3-dl1/images/south_africa.png


--------------------------------------------------------------------------------
/courses/v3-dl1/kaggle_fruits.md:
--------------------------------------------------------------------------------
 1 | ```bash
 2 | 568  ls
 3 |   569  pip install kaggle-cli
 4 |   570  pip install kaggle
 5 |   571  export KAGGLE_USERNAME=reshamashaikh
 6 |   572  export KAGGLE_KEY=9896d8968968968968968968962
 7 |   573  kaggle datasets download -d moltean/fruits
 8 |   574  conda env list
 9 |   575  history
10 |   576  pip uninstall kaggle-cli
11 |   577  pip install --upgrade pip
12 |   578  sudo pip install kaggle
13 |   579  history
14 |   580  kaggle datasets download -d moltean/fruits
15 |   581  ls
16 |   582  mkdir inputs
17 |   583  ls
18 |   584  mv inputs/ input/
19 |   585  ls
20 |   586  unzip fruits.zip input/
21 |   587  mkdir input/fruits
22 |   588  unzip fruits.zip input/
23 |   589  ls
24 |   590  unzip fruits.zip -d input/fruits/
25 |   591  ls
26 |   592  history
27 | jupyter@my-fastai-instance:~/kaggle_fruits$ 
28 | ```
29 | 
30 | ```bash
31 |   571  export KAGGLE_USERNAME=reshamashaikh
32 |   572  export KAGGLE_KEY=9896d8968968968968968968962
33 |   578  sudo pip install kaggle
34 |   kaggle datasets download -d moltean/fruits
35 |     582  mkdir inputs
36 |    584  mv inputs/ input/
37 |     590  unzip fruits.zip -d input/fruits/ 
38 | ```
39 | 
40 | ```bash
41 | jupyter@my-fastai-instance:~/projects/dl_fastai/nlp/data$ kaggle datasets download -d yelp-dataset/yelp-dataset
42 | jupyter@my-fastai-instance:~/projects/dl_fastai/nlp/data$ ls
43 | yelp-dataset.zip
44 | jupyter@my-fastai-instance:~/projects/dl_fastai/nlp/data$ unzip yelp-dataset.zip 
45 | ```
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/lesson_1_lecture.md:
--------------------------------------------------------------------------------
 1 | # Lesson 1
 2 | 
 3 | - Live Date:  22-Oct-2018
 4 | - [Wiki](https://forums.fast.ai/t/lesson-1-class-discussion-and-resources/27332)
 5 | - [Video](https://www.youtube.com/watch?v=BWWm4AzsdLk)
 6 |   - Video duration:  1:40:11
 7 | - Notebook:  
 8 |   - [lesson1-pets.ipynb](https://github.com/fastai/course-v3/blob/master/nbs/dl1/lesson1-pets.ipynb)
 9 | - [fastai library](https://github.com/fastai/fastai)
10 | 
11 | ---
12 | 
13 | ## Homework:  parts completed ✅
14 | - Google Cloud setup
15 |   - Get your GPU going
16 | - read lesson1-pets.ipynb notebook
17 | - read [Tips for Building Image dataset](https://forums.fast.ai/t/tips-for-building-large-image-datasets/26688)
18 | - read [Lesson 1 notes](https://forums.fast.ai/t/deep-learning-lesson-1-notes/27748)
19 | - read fastai documentation
20 | - run lesson1-pets.ipynb
21 | 
22 | ## Homework: To Do
23 | - get your own image dataset
24 |   - Repeat process on your own dataset
25 |   - Share on Forums
26 | - repo:  fastai_docs
27 |   - download repo
28 |   - run the code
29 |   - experiment
30 |   - git/clone, open in Jupyter (GitHub doesn't render notebooks so well)
31 | - Use the first notebook
32 | 
33 | ## Lesson 1 Pets
34 | ```bash
35 | RuntimeError: CUDA error: out of memory
36 | ```
37 | Note:  reduce batch size and restart kernel
38 | 
39 | 
40 | 
41 | ---
42 | 
43 | # Intro
44 | - slightly delayed, waiting for students to get through security
45 | - For in-class students in SF:
46 |   - get to know your group of 6
47 |   
48 | ## Pete Maker
49 | - intro from PG&E
50 | - USF specific site procedures (earthquake, emergencies, evacuation)
51 | 
52 | ## [David Uminsky](https://www.linkedin.com/in/david-uminsky-5153b1a8/)
53 | - Professor of DS at USF
54 | - Diversity Fellows sponsored by:  EBay, Facebook
55 | - 3rd iteration of this course (started from 60-80 students to 280)
56 | 
57 | ## Rachel Thomas
58 | 
59 | ## Jeremy Howard
60 | - largest group of people joining:  Bangalore, India
61 | - US towns
62 | - Lagos
63 | 
64 | ## Computer for in-class
65 | 1.  AWS Salamander
66 | 2.  AWS EC2
67 | 3.  Google Compute Platform (GCP)
68 | 
69 | ## Computers for Int'l
70 | 1.  Google Computer Platform 
71 |   - has fastai image
72 |   - $300 credits
73 | 2.  AWS EC2 $0.90/hr  
74 | 
75 | ## GCP
76 | https://cloud.google.com
77 | 
78 | ### Advice
79 | Pick one project, do it very well, and make it fantastic.
80 | 
81 | doc(interp.plot_top_losses)
82 | - prediction, actual, loss, probability it was predicted
83 | - Don't be afraid to look at the source code.
84 | - confusion matrix, if you have lots of classes, don't use confusion matrix.  use interp.most_confused. 
85 | - `unfreeze`:  please train the whole model
86 | - if you run out of memory, use a smaller batch size
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/lesson_1_rs_camels_horses.md:
--------------------------------------------------------------------------------
  1 | # Camels vs Horses
  2 | 
  3 | ## Important Links
  4 | - [Google Cloud Platform](http://course-v3.fast.ai/start_gcp.html)
  5 |   - [GCP: update fastai, conda & packages](http://course-v3.fast.ai/start_gcp.html#step-4-access-fastai-materials-and-update-packages)
  6 | 
  7 | ---
  8 | 
  9 | # Downloading Images
 10 | [Fastai tutorial: downloading images](https://github.com/fastai/course-v3/blob/master/nbs/dl1/download_images.ipynb)
 11 | - After this step in the Chrome Javascript console:  
 12 | 
 13 | ```java
 14 | window.open('data:text/csv;charset=utf-8,' + escape(urls.join('\n')));
 15 | ```
 16 | in Mac, it will download a file called `download.csv` to my `~/Downloads` folder
 17 | - rename the folder to your image.  For me:  
 18 | 1.  camels.csv
 19 | 2.  horses.csv
 20 | 
 21 | #### Go to my `Downloads` directory
 22 | ```bash
 23 | pwd
 24 | ```
 25 | ```
 26 | /Users/reshamashaikh/Downloads
 27 | ```
 28 | 
 29 | #### List items in directory in reverse order
 30 | ```bash
 31 | ls -lrt
 32 | ```
 33 | ```
 34 | -rw-r--r--@   1        68354 Oct 26 17:01 camels.csv
 35 | -rw-r--r--@   1        85497 Oct 26 17:03 horses.csv
 36 | ```
 37 | 
 38 | ## `scp` to GCP
 39 | ```bash
 40 | gcloud compute scp camels.csv jupyter@my-fastai-instance:~
 41 | gcloud compute scp horses.csv jupyter@my-fastai-instance:~
 42 | ```
 43 | 
 44 | ## on GCP: move data to `data` directory
 45 | ```bash
 46 | jupyter@my-fastai-instance:~$ ls
 47 | camels.csv  course-v3  horses.csv  tutorials
 48 | jupyter@my-fastai-instance:~$ mv *.csv /home/jupyter/tutorials/data
 49 | jupyter@my-fastai-instance:~$ ls
 50 | course-v3  tutorials
 51 | jupyter@my-fastai-instance:~$ ls tutorials/data
 52 | camels.csv  horses.csv
 53 | jupyter@my-fastai-instance:~$ 
 54 | ```
 55 | 
 56 | ## convert `.csv` files to `.txt` files
 57 | ```bash
 58 | cat camels.csv | tr  ',' '\n' > camels.txt
 59 | cat horses.csv | tr  ',' '\n' > horses.txt
 60 | ```
 61 | 
 62 | ## rename files to match notebook
 63 | ```bash
 64 | mv camels.txt urls_camels.txt
 65 | mv horses.txt urls_horses.txt
 66 | ```
 67 | 
 68 | ## Create directory and upload urls file into your server
 69 | - Original [notebook](https://github.com/fastai/course-v3/blob/master/nbs/dl1/download_images.ipynb
 70 | ```bash
 71 | my_path = "/home/jupyter/tutorials/"
 72 | ```
 73 | ```bash
 74 | folder = 'camels'
 75 | file = 'urls_camels.txt'
 76 | ```
 77 | ```bash
 78 | path = Path(my_path+'data/mammals')
 79 | dest = path/folder
 80 | dest.mkdir(parents=True, exist_ok=True)
 81 | ```
 82 | do same for "horses"
 83 | 
 84 | ### Move url_name.txt file to appropriate folder
 85 | ```bash
 86 | mv urls_camels.txt /home/jupyter/tutorials/data/mammals/camels
 87 | mv urls_horses.txt /home/jupyter/tutorials/data/mammals/horses
 88 | ```
 89 | 
 90 | ## Directory Tree
 91 | ```bash
 92 | jupyter@my-fastai-instance:~/tutorials/data$ pwd
 93 | /home/jupyter/tutorials/data
 94 | jupyter@my-fastai-instance:~/tutorials/data$ tree -d
 95 | .
 96 | └── mammals
 97 |     ├── camels
 98 |     └── horses
 99 | 
100 | 3 directories
101 | jupyter@my-fastai-instance:~/tutorials/data$
102 | ```
103 | 
104 | ## let's look at file
105 | ```bash
106 | head urls_camels.txt 
107 | ```
108 | 
109 | ```bash
110 | jupyter@my-fastai-instance:~/tutorials/data/mammals$ head urls_camels.txt 
111 | https://media.buzzle.com/media/images-en/gallery/mammals/camels/1200-close-up-of-camel-nostrils.jpg
112 | http://www.cidrap.umn.edu/sites/default/files/public/styles/ss_media_popup/public/media/article/baby_camel_nursing.jpg?itok=0vwqXyoW
113 | https://www.thenational.ae/image/policy:1.632918:1506081168/image/jpeg.jpg?f=16x9&w=1200&$p$f$w=dfa40e8
114 | https://i.dailymail.co.uk/i/pix/2012/11/24/article-2237967-162CA49A000005DC-153_634x409.jpg
115 | https://samslifeinjeddah.files.wordpress.com/2014/08/jed-camel-2_edited.jpg
116 | https://i.pinimg.com/236x/29/94/04/299404d417dd8b836b4a5c396cb597a6--camel-animal-baby-camel.jpg
117 | https://i.chzbgr.com/full/9056188416/h8763E301/
118 | https://i.dailymail.co.uk/i/pix/2012/11/24/article-2237967-162CA5A0000005DC-2_634x372.jpg
119 | https://secure.i.telegraph.co.uk/multimedia/archive/01676/Camel_Milk_1676595c.jpg
120 | https://upload.wikimedia.org/wikipedia/commons/4/43/07._Camel_Profile%2C_near_Silverton%2C_NSW%2C_07.07.2007.jpg
121 | jupyter@my-fastai-instance:~/tutorials/data/mammals$ 
122 | ```
123 | 
124 | 


--------------------------------------------------------------------------------
/courses/v3-dl1/lesson_3_lecture.md:
--------------------------------------------------------------------------------
 1 | # Lesson 3: Multi-label, Segmentation, Image Regression & More
 2 | 
 3 | - Live Date:  08-Nov-2018
 4 | - Video:  https://www.youtube.com/watch?v=VPg2ZlRPiXI
 5 | - Wiki:  https://forums.fast.ai/t/lesson-3-official-resources-and-updates/29732
 6 | 
 7 | ## Video Player for Lessons
 8 | - Zach in SF study group
 9 | - http://videos.fast.ai
10 | 
11 | ## Intro
12 | - in class discussion thread for Forums, stuff related to lesson, related to people new
13 | - lesson 3, further discussion thread, on advanced sub-category
14 | - Andrew Ng has a Machine Learning course on Coursera
15 | - fastai ML course not a prereq for DL course
16 | 
17 | ## Production area on documentation
18 | - [Zeit deployment of app](https://course-v3.fast.ai/deployment_zeit.html)
19 | - 
20 | 


--------------------------------------------------------------------------------
/courses/v3-dl2/README.md:
--------------------------------------------------------------------------------
1 | # Part 2 (version 3: Sprint 2019)
2 | 
3 | Forums:  [Part 2 Lessons, Links and Updates](https://forums.fast.ai/t/2019-part-2-lessons-links-and-updates/41429)
4 | 
5 | ## Lessons
6 | - Lesson 8:
7 | - Lesson 9:  How to train your model
8 | 


--------------------------------------------------------------------------------
/courses/v3-dl2/lecture_8.md:
--------------------------------------------------------------------------------
 1 | # Lesson 8
 2 | 
 3 | ## From foundations: Matrix multiplication; Fully connected network forward and backward passes
 4 | 
 5 | ### Broadcasting
 6 | - powerful tool for writing code in Python that runs at C speed
 7 | - with PyTorch, it will run at CUDA speed;  allows us to get rid of our for-loops
 8 | - 'broadcasting' a scalar to a tensor
 9 | ```python
10 | t = c.expand_as(m)
11 | t.storage()
12 | t.stride(), t.shape
13 | ```
14 | - tensors that behave as higher rank things than they are actually stored as
15 | - broadcasting functionality gives us C like speed without additional memory overhead
16 | - `unsqueeze` adds an additional dimension
17 | ```python
18 | c.unsqueeze(1)
19 | ```
20 | 
21 | ### Einstein Summation Notation
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/README.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning:  fastai version 4
 2 | 
 3 | ## Fastai Forums
 4 | - [Official Part 1 (2020) updates and resources thread ](https://forums.fast.ai/t/official-part-1-2020-updates-and-resources-thread/63376)
 5 | 
 6 | 
 7 | ## Resources
 8 | - video: [nbdev tutorial](https://youtu.be/Hrs7iEYmRmg)
 9 |   - A walk-thru of the basic features of nbdev (http://nbdev.fast.ai/).
10 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/doc_Jupyter_01.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Referencing Documentation in Jupyter Notebook
  3 | 
  4 | #### inside ( of function; shift + tab
  5 | Note:  go inside the parenthesis of a functio and hit Shift+Tab to see options
  6 | 
  7 | ![image](image/transforms.png)
  8 | 
  9 | #### `?` OR `??` gives interactive python guide  (abbreviated output below. [full doc output](doc_01_reference.md))
 10 | 
 11 | ```text
 12 | IPython -- An enhanced Interactive Python
 13 | =========================================
 14 | 
 15 | IPython offers a fully compatible replacement for the standard Python
 16 | interpreter, with convenient shell features, special commands, command
 17 | history mechanism and output results caching.
 18 | 
 19 | At your system command line, type 'ipython -h' to see the command line
 20 | options available. This document only describes interactive features.
 21 | 
 22 | GETTING HELP
 23 | ------------
 24 | 
 25 | Within IPython you have various way to access help:
 26 | 
 27 |   ?         -> Introduction and overview of IPython's features (this screen).
 28 |   object?   -> Details about 'object'.
 29 |   object??  -> More detailed, verbose information about 'object'.
 30 |   %quickref -> Quick reference of all IPython specific syntax and magics.
 31 |   help      -> Access Python's own help system.
 32 | 
 33 | If you are in terminal IPython you can quit this screen by pressing `q`.
 34 | ```
 35 | 
 36 | ####  `?learn` gives (`learn?` works too)
 37 | ```bash
 38 | Signature:   learn(event_name)
 39 | Type:        Learner
 40 | String form: <fastai2.learner.Learner object at 0x7f5ffb61dfd0>
 41 | File:        /opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/learner.py
 42 | Docstring:   Group together a `model`, some `dls` and a `loss_func` to handle training
 43 | ```
 44 | 
 45 | #### `??learn` gives entire class info  (doc abbreviated here); (`learn??` works too)
 46 | ```bash
 47 | Signature:   learn(event_name)
 48 | Type:        Learner
 49 | String form: <fastai2.learner.Learner object at 0x7f5ffb61dfd0>
 50 | File:        /opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/learner.py
 51 | Source:     
 52 | class Learner():
 53 |     def __init__(self, dls, model, loss_func=None, opt_func=Adam, lr=defaults.lr, splitter=trainable_params, cbs=None,
 54 |                  metrics=None, path=None, model_dir='models', wd=None, wd_bn_bias=False, train_bn=True,
 55 |                  moms=(0.95,0.85,0.95)):
 56 |         store_attr(self, "dls,model,opt_func,lr,splitter,model_dir,wd,wd_bn_bias,train_bn,metrics,moms")
 57 |         self.training,self.create_mbar,self.logger,self.opt,self.cbs = False,True,print,None,L()
 58 |         if loss_func is None:
 59 |             loss_func = getattr(dls.train_ds, 'loss_func', None)
 60 |             assert loss_func is not None, "Could not infer loss function from the data, please pass a loss function."
 61 |         self.loss_func = loss_func
 62 |         self.path = path if path is not None else getattr(dls, 'path', Path('.'))
 63 |         self.add_cbs([(cb() if isinstance(cb, type) else cb) for cb in L(defaults.callbacks)+L(cbs)])
 64 |         self.model.to(self.dls.device)
 65 |         if hasattr(self.model, 'reset'): self.model.reset()
 66 |         self.epoch,self.n_epoch,self.loss = 0,1,tensor(0.)
 67 | 
 68 |     @property
 69 |     def metrics(self): return self._metrics
 70 |     @metrics.setter
 71 |     def metrics(self,v): self._metrics = L(v).map(mk_metric)
 72 | ```
 73 | 
 74 | #### `?learn.predict` gives:
 75 | ```bash
 76 | Signature: learn.predict(item, rm_type_tfms=None, with_input=False)
 77 | Docstring: Return the prediction on `item`, fully decoded, loss function decoded and probabilities
 78 | File:      /opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/learner.py
 79 | Type:      method
 80 | ```
 81 | 
 82 | #### `??learn.predict` gives:
 83 | ```bash
 84 | Signature: learn.predict(item, rm_type_tfms=None, with_input=False)
 85 | Docstring: Return the prediction on `item`, fully decoded, loss function decoded and probabilities
 86 | Source:   
 87 |     def predict(self, item, rm_type_tfms=None, with_input=False):
 88 |         dl = self.dls.test_dl([item], rm_type_tfms=rm_type_tfms)
 89 |         inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)
 90 |         dec = self.dls.decode_batch((*tuplify(inp),*tuplify(dec_preds)))[0]
 91 |         i = getattr(self.dls, 'n_inp', -1)
 92 |         dec_inp,dec_targ = map(detuplify, [dec[:i],dec[i:]])
 93 |         res = dec_targ,dec_preds[0],preds[0]
 94 |         if with_input: res = (dec_inp,) + res
 95 |         return res
 96 | File:      /opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/learner.py
 97 | Type:      method
 98 | ```
 99 | 
100 | #### `doc(learn)` gives
101 | 
102 | ```text
103 | Learner object at 0x7f5ffb61dfd0>[source]
104 | Learner object at 0x7f5ffb61dfd0>(event_name)
105 | 
106 | Group together a model, some dls and a loss_func to handle training
107 | ```
108 | #### `doc(learn)` and getting to source code
109 | Can click on "[source]" after typing `doc(learn)` to bring you to the fastai code in GitHub repo
110 | 
111 | - `doc(learn.predict)` gives
112 | ```text
113 | Learner.predict[source]
114 | Learner.predict(item, rm_type_tfms=None, with_input=False)
115 | 
116 | Return the prediction on item, fully decoded, loss function decoded and probabilities
117 | 
118 | Show in docs
119 | ```
120 | 
121 | #### `ImageDataLoaders` + <kbd> shift </kbd> + <kbd> tab </kbd> 
122 | ```text
123 | Init signature: ImageDataLoaders(*loaders, path='.', device=None)
124 | Docstring:      Basic wrapper around several `DataLoader`s with factory methods for computer vision problems
125 | File:           /opt/conda/envs/fastai/lib/python3.7/site-packages/fastai2/vision/
126 | ```
127 | 
128 | 
129 | 
130 | 
131 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/image/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/image/transforms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/courses/v4-dl1/image/transforms.png


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_01.md:
--------------------------------------------------------------------------------
  1 | # Lesson 1
  2 | - Live:  17-Mar-2020
  3 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
  4 | - course will be released in July
  5 | - supposed to be the official version (now **v4**)
  6 | - book: [Deep Learning for Coders with fastai and PyTorch: AI Applications Without a PhD ](https://www.amazon.com/Deep-Learning-Coders-fastai-PyTorch/dp/1492045527)
  7 | 
  8 | ## Homework
  9 | - [Lesson 1 Homework](https://forums.fast.ai/t/did-you-do-the-homework/660340)
 10 | 
 11 | - [x] make sure you can spin up a GPU server
 12 | - [x] that you can shut it down when it is finished
 13 | - [x] run the code shown in the lecture
 14 | - [x] use the documentation, use the doc function inside juypter notebook
 15 | - [x] do some searching of the fast.ai docs
 16 | - [ ] see if you can grab the fast.ai documentation notebooks and try running them: [doc notebooks](https://github.com/fastai/fastai2/tree/master/nbs)
 17 | - [ ] read a chapter of the fast.ai book 11
 18 | - [ ] do the questionnaire at the end of the chapter (not everything has been covered yet, answer only the questions that you can)
 19 | - [ ] try to get comfortable with running code
 20 | 
 21 | ## Paperspace
 22 | - fastai: [Getting Started with Gradient](https://course.fast.ai/start_gradient.html)
 23 | - fastai: v4 [Paperspace (free, paid options)](https://forums.fast.ai/t/platform-paperspace-free-paid-options/65515)
 24 | 
 25 | ### My steps on Paperspace
 26 | 1.  notebook:  https://www.paperspace.com/telmjtws3/notebook/prjrrhy56
 27 | 2.  Open terminal, via Jupyter Notebook
 28 | - type `bash` to get a regular terminal (autocomplete, etc)
 29 | - `pip install fastai2 fastcore --upgrade`
 30 | - `cd course-v4`
 31 | - `git pull`
 32 | 
 33 | 
 34 | ## Logistics
 35 | - edited video will be available in 1-2 days
 36 | - whatever you ask on the forum, it will eventually be public
 37 | - it's not personal if your post gets deleted from forums, it's for the readability of the forums
 38 | - 800 most valued members of community taking course
 39 | - at 9:40pm EST, there are **441** people watching
 40 | - at 9:45pm EST, **465**
 41 | - at 10:00pm EST, **483**
 42 | - at 111:45pm EST, **434**
 43 | - at 12am, **405**
 44 | 
 45 | ## Forums
 46 | - can select "none" to remove study group threads
 47 | - study group:  research shows doing work in a group are much more likely to create powerful, long-term projects
 48 | - will set up virtual study groups
 49 | 
 50 | ## COVID-19
 51 | - blog: [Covid-19, your community, and you — a data science perspective](https://www.fast.ai/2020/03/09/coronavirus/)
 52 | - published:  09-Mar-2020
 53 | - 1/2 million people read the blog
 54 | - post translated in 15 languages
 55 | - OPEN Forum category:  [covid-19](https://forums.fast.ai/c/covid-19/52)
 56 | 
 57 | 10:33 break
 58 | 
 59 | ## Getting Started
 60 | - AGI:  Artificial General Intelligence
 61 | - Neural networks:  a brief history
 62 | - 1986:  MIT released book on Parallel Distributed Processing (PDP)
 63 | - 
 64 | 
 65 | ## Education at Bat: 7 Principals for Educators
 66 | Professor David Perkins uses his childhood baseball experiences:  
 67 | 1. Play the whole game
 68 | 2. Make the game worth playing
 69 | 3. Work on the hard parts
 70 | 
 71 | You will be practicing things that are hard.  Requires:
 72 | - tenacity
 73 | - committment
 74 | - will need to work damn hard
 75 | - spend less time on theory, and MORE time on running models and with code
 76 | 
 77 | ## Software Stack
 78 | - fastai
 79 | - PyTorch
 80 | - Python
 81 | 
 82 | ## PyTorch
 83 | - Tensorflow got bogged down
 84 | - PyTorch was easier to use
 85 | - in last 12 months, % of papers that use PyTorch at conferences went from 20% to 80%
 86 | - industry moves slowly, but will catch up
 87 | - PyTorch:  very flexible, not designed for beginner-friendliness
 88 |   - doesn't have higher level libraries
 89 |   - fastai is the most popular higher level API for PyTorch
 90 |   - fastai uses a layered API
 91 |   
 92 | ## To do work
 93 | - need GPU, Nvidia one
 94 | - use one of the platforms that is easily set up
 95 | - run it on Linux; it's hard enough to learn deep learning w/o archane solutions
 96 | - app_jupyter.ipynb:  learn about Jupyter notebook
 97 | - REPL:  Read, Evaluate, Print, Loop
 98 | 
 99 | ## Jupyter Notebook
100 | - shift + enter:  to run
101 | - Workflow:  select notebook, duplicate it and rename it 
102 | - fastbook repository: all text from book 
103 | - course-v4 --> this removes all text, leaves just code
104 | - at the end of notebooks, there are Questionnaires
105 |   - What do we want you to take away from each notebook?
106 |   - What should you know before you move on?
107 |   - Do questionnaire before moving on to each chapter
108 |   - If you missed something, do go back and read it
109 |   - If you do get stuck after a couple of times, then do move on to next chapter and you might understand it better later
110 |   - File / Trust Notebook 
111 | - `jupyter labextension install @jupyter-widgets/jupyterlab-manager`
112 | 
113 | ## 
114 | - deep learning is a kind of machine learning
115 | - 
116 | 
117 | ## Limitations Inherent to Machine Learning
118 | - 
119 | 
120 | ## Consider how a model interacts with its environment
121 | - PROXY:  arrest is a proxy for crime [listen to this again]
122 | - 
123 | 
124 | ## Homework
125 | 1. spin up a GPU server
126 | 2. run code
127 | 3. search fastai docs
128 | 4. try to get comfortable, know your way around
129 | 5. read chapter of book
130 | 6. go through questionnaire
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_03.md:
--------------------------------------------------------------------------------
 1 | # Lesson 3
 2 | - Live:  31-Mar-2020
 3 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
 4 | 
 5 | - 9:30pm  144 viewing
 6 | - 9:45pm  263 viewing
 7 | - 10:00pm  viewing
 8 | 
 9 | Note:  finished watching Apr 16, 2020.
10 | 
11 | ## Homework
12 | - [Lesson 3 Homework] ()
13 | 
14 | - [ ] read blog: [](https://www.fast.ai/2016/12/29/uses-of-ai/)
15 | - [ ] create your own application
16 | 
17 | 
18 | ## Notes
19 | - [fastai/fastbook](https://github.com/fastai/fastbook)
20 |   - full notebooks that contain text of O'Reilly book
21 | - [fastai/course-v4](https://github.com/fastai/course-v4) 
22 |   - same notebooks with prose stripped away
23 |   - do practice coding here
24 | 
25 | ## 
26 | - using notebook:  https://github.com/fastai/fastbook/blob/master/02_production.ipynb
27 | - look at getting model into production
28 | - `DataBlock` API
29 | ```python
30 | bears = DataBlock(
31 |     blocks=(ImageBlock, CategoryBlock), 
32 |     get_items=get_image_files, 
33 |     splitter=RandomSplitter(valid_pct=0.3, seed=42),
34 |     get_y=parent_label,
35 |     item_tfms=Resize(128))
36 | ```
37 | 
38 | ## Data Augmentation
39 | - default: it grabs the center of image
40 | - `.new`: creates a new DataBlock object
41 | ```python
42 | bears = bears.new(item_tfms=Resize(128, ResizeMethod.Squish))
43 | dls = bears.dataloaders(path)
44 | dls.valid.show_batch(max_n=4, nrows=1)
45 | ```
46 | - `ResizeMethod.Pad` adds black bars to side, avoids squishing image
47 | - `pad_mode='zeros'` can have `pad_mode='reflect'`
48 | `bears = bears.new(item_tfms=Resize(128, ResizeMethod.Pad, pad_mode='zeros'))`
49 | - `ResizeMethod.Squish` most efficient
50 | - `tem_tfms=RandomResizedCrop` most popular one; `min_scale=0.3` pick 30% of pixels of orig image each time
51 | `bears = bears.new(item_tfms=RandomResizedCrop(128, min_scale=0.3))`
52 | 
53 | - Item transforms vs Batch transforms
54 | ```python
55 | bears = bears.new(item_tfms=Resize(128), batch_tfms=aug_transforms(mult=2))
56 | dls = bears.dataloaders(path)
57 | dls.train.show_batch(max_n=8, nrows=2, unique=True)
58 | ```
59 | - fastai will avoid doing data augmentation on the validation dataset
60 | - show name of cateogories:
61 | ```python
62 | learn_inf.dls.vocab
63 | ```
64 | ```bash
65 | (#3) ['black','grizzly','teddy']
66 | ```
67 | 
68 | ## Making a GUI; web app for predictions (25:00)
69 | - `!pip install voila`
70 | - can use binder for making it publicly available
71 | 
72 | ### *out of domain* data (domain shift)
73 | 
74 | ### Python broadcasting
75 | 
76 | ## MNIST: baseline + calculating gradient
77 | - notebook:  https://github.com/fastai/fastbook/blob/master/04_mnist_basics.ipynb
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_04.md:
--------------------------------------------------------------------------------
 1 | # Lesson 4
 2 | - Live:  14-Apr-2020
 3 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
 4 | - finished watching 21-apr-2020
 5 | 
 6 | ## Homework
 7 | - [Lesson 4 Homework] ()
 8 | - [ ] 
 9 | - [ ] 
10 | 
11 | ## Notes
12 | `pets1.summary(path/"images"` helps with debugging
13 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_05_ethics.md:
--------------------------------------------------------------------------------
 1 | # Lesson 5:  Ethics for Data Science
 2 | - Live:  07-Apr-2020
 3 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
 4 | - Lesson 5 thread:  https://forums.fast.ai/t/lesson-5-official-topic/68039
 5 | 
 6 | NOTE:  finished watching lecture on 14-Apr-2020
 7 | 
 8 | ## Ethics Course
 9 | - full USF ethics course by Rachel Thomas will be released before July 2020
10 | 
11 | 
12 | ## Case Study
13 | 1. Feedback loop
14 | - data
15 | - recommendation systems: they are determining what user is exposed to, and what content will become popular
16 | - Google  promoting damaging conspiracy theories
17 | - 
18 | 2. Software to determine poor people's health benefits
19 | - bug in software cut coverage for people with cerebral palsy
20 | - system implemented with no way to identify and address mistakes
21 | 3.  Latanya Sweeeney
22 | - Pd.D
23 | - when googled her name, would see ads for criminal records
24 | - disproportionate African names were getting ads for criminal records
25 | - bias in advertising shows up a lot
26 | 
27 | 
28 | Resources:
29 | - [Georgetown Law:  Center for Privacy and Technology](https://forums.fast.ai/t/lesson-5-official-topic/68039)
30 | - How to Fact Check:  https://www.notion.so/Check-Please-Starter-Course-ae34d043575e42828dc2964437ea4eed
31 | - Maciej Ceglowski
32 | https://en.wikipedia.org/wiki/Maciej_Ceg%C5%82owski
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_06.md:
--------------------------------------------------------------------------------
  1 | # Lesson 6
  2 | - Live:  21-Apr-2020
  3 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
  4 | - 10:30pm 185 watching
  5 | 
  6 | ## Homework
  7 | - [Lesson 4] ()
  8 | - [ ] 
  9 | - [ ] 
 10 | 
 11 | ## Notes
 12 | - [fastai/fastbook](https://github.com/fastai/fastbook)
 13 |   - full notebooks that contain text of O'Reilly book
 14 | - [fastai/course-v4](https://github.com/fastai/course-v4) 
 15 |   - same notebooks with prose stripped away
 16 |   - do practice coding here
 17 |   
 18 | ## Topics
 19 | - pet breeds; multiple classification
 20 | - good learning rate finder questions and answers
 21 | 
 22 | ## Computer Vision Problem: Pet Breed
 23 | 
 24 | ### Discriminative Learning Rates
 25 | - Notebook:  https://github.com/fastai/course-v4/blob/master/nbs/05_pet_breeds.ipynb
 26 | - unfreezing and transfer learning
 27 | >what we would
 28 | really like is to have a small learning
 29 | rate for the early layers and a bigger
 30 | learning rate for the later layers
 31 | - slicing
 32 | ```python
 33 | learn.fit_one_cycle(6, lr_max=1e-5)
 34 | ```
 35 | #### our own version of fine-tuning here
 36 | ```python
 37 | learn = cnn_learner(dls, resnet34, metrics=error_rate)
 38 | learn.fit_one_cycle(3, 3e-3)
 39 | learn.unfreeze()
 40 | learn.fit_one_cycle(12, lr_max=slice(1e-6,1e-4))
 41 | ```
 42 | #### how do you make it better now?
 43 | - 5.4% error on 37 categories is pretty good (for pet breed data)
 44 | - can use a deeper architecture
 45 | - `Cuda runtime error: out of memory` is out of memory on your GPU
 46 |   - restart notebook
 47 |   - can use less precise numbers to save memory
 48 | ```python
 49 | from fastai2.callback.fp16 import *
 50 | learn = cnn_learner(dls, resnet50, metrics=error_rate).to_fp16()
 51 | learn.fine_tune(6, freeze_epochs=3)
 52 | ```
 53 | - increasing number of layers (or more complex architecture) doesn't always improve the error rate
 54 | - requires experimentation
 55 | - trick:  use small models for as long as possible (to do cleaning and testing); then try bigger models because they will take longer
 56 | - "always assume you can do better [with error rate] because you never know"
 57 | 
 58 | ## Multi-label Classification
 59 | - notebook:  https://github.com/fastai/course-v4/blob/master/nbs/06_multicat.ipynb
 60 | - determining multiple labels per image (Ex: contains car, bike person, etc)
 61 | - dataset:  PASCAL
 62 |   - http://host.robots.ox.ac.uk/pascal/VOC/
 63 |   - https://gluon-cv.mxnet.io/build/examples_datasets/pascal_voc.html
 64 | 
 65 | 
 66 | ## Example
 67 | ```python
 68 | a = list(enumerate(string.ascii_lowercase))
 69 | a[0], len(a)
 70 | ```
 71 | ```bash
 72 | ((0, 'a'), 26)
 73 | ```
 74 | 
 75 | ## creating: **Datasets**, **Data Block** and **DataLoaders**
 76 | - serialization: means saving something
 77 | - best to use functions over lambda (because in Python, it doesn't save object created using lambda)
 78 | - one-hot encoding for multiple labels
 79 | - 
 80 | ```python
 81 | def splitter(df):
 82 |     train = df.index[~df['is_valid']].tolist()
 83 |     valid = df.index[df['is_valid']].tolist()
 84 |     return train,valid
 85 | 
 86 | dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock),
 87 |                    splitter=splitter,
 88 |                    get_x=get_x, 
 89 |                    get_y=get_y)
 90 | 
 91 | dsets = dblock.datasets(df)
 92 | dsets.train[0]
 93 | ```
 94 | ## path
 95 | ```python
 96 | Path.BASE_PATH = None
 97 | path
 98 | ```
 99 | ```python
100 | (path/'01').ls()
101 | ```
102 | ### Important to know
103 | 1. create a learner
104 | 2. grab a batch of data
105 | 3. pass it to the model
106 | 4. see the shape; recognize why the shape is
107 | ```python
108 | learn = cnn_learner(dls, resnet18)
109 | ```
110 | ```python
111 | x,y = dls.train.one_batch()
112 | activs = learn.model(x)
113 | activs.shape
114 | ```
115 | >torch.Size([64, 20])
116 | 
117 | ### Binary cross entropy
118 | 
119 | ## Note
120 | - **Accuracy** only works for single label datasets, like MNIST
121 | 
122 | ## Collaborative Filtering Deep Dive
123 | - applications:  what kind of other diagnosis, figure out where someone will click next
124 | - anything where you are trying to learn from past behavior
125 | - 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_07.md:
--------------------------------------------------------------------------------
 1 | # Lesson 7
 2 | 
 3 | - Live:  28-Apr-2020
 4 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
 5 | - 9:30pm 101 watching
 6 | - 10:30pm 177 watching
 7 | 
 8 | ## Topics
 9 | - weight decay, regularization
10 | - embedding
11 | - PyTorch code
12 | - Tablular
13 | 
14 | ## Notebook
15 | - Collaborative filtering:  https://github.com/fastai/course-v4/blob/5a9fca472f55a8186e62a21111deab119001e0df/nbs/08_collab.ipynb
16 | - Tabular:  https://github.com/fastai/course-v4/blob/5a9fca472f55a8186e62a21111deab119001e0df/nbs/09_tabular.ipynb
17 | 
18 | ## Regularization
19 | - use for **overfitting**
20 | - **weight decay** is also known as **L2 Regularization**
21 | - in general, big coefficients are going to cause a big swing in the loss
22 | 
23 | ## Embeddings
24 | - index lookup into an array
25 |   - computational shortcut to one hot encoding
26 | - cardinality: number of levels of a variable
27 | 
28 | ## Dataset 
29 | - Blue Book for Bulldozers Kaggle Competition
30 | 
31 | ## Random Forests:  Bagging
32 | - to improve the random forests, use **bagging**
33 | - randomly select subsets of data and train it
34 | - then average the different versions of the models
35 | - advantage to this is that these models have errors which are not correlated to each other
36 | 
37 | Here is the procedure that Breiman is proposing:
38 | 1. Randomly choose a subset of the rows of your data (i.e., "bootstrap replicates of your learning set")
39 | 2. Train a model using this subset
40 | 3. Save that model, and then return to step one a few times
41 | 4. This will give you a number of trained models. To make a prediction, predict using all of the models, and then take the average of each of those model's predictions.
42 | 
43 | ## BAGGING
44 | it means that we can improve the accuracy of nearly any kind of machine learning algorithm by training it multiple times, each time on a different random subset of data, and average its predictions.
45 | 
46 | ## Leo Breiman: Random Forest
47 | In 2001 Leo Breiman went on to demonstrate that this approach to building models, when applied to decision tree building algorithms, was particularly powerful. He went even further than just randomly choosing rows for each model's training, but also randomly selected from a subset of columns when choosing each split in each decision tree. He called this method the random forest
48 | 
49 | ## OOB:  out-of-box
50 | - review: remove each variable and how it impacts the R MSE
51 | 
52 | ## Partial Dependence Plot
53 | 
54 | ## Boosting
55 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/lesson_08_NLP.md:
--------------------------------------------------------------------------------
 1 | # Lesson 8
 2 | 
 3 | - Live:  05-May-2020
 4 | - Time: 6:30 to 9pm PST  (9:30pm to midnight EST)
 5 | - 9:30pm 160 watching
 6 |  
 7 | ## Topics
 8 | - NLP
 9 | 
10 | ## Notebook
11 | - [10_nlp](https://github.com/fastai/fastbook/blob/master/10_nlp.ipynb)
12 | - [12_nlp_dive](https://github.com/fastai/fastbook/blob/master/12_nlp_dive.ipynb)
13 | 
14 | ## AR and TAR Regularization
15 | 


--------------------------------------------------------------------------------
/courses/v4-dl1/paperspace.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## Paperspace
  3 | - fastai: [Getting Started with Gradient](https://course.fast.ai/start_gradient.html)
  4 | - fastai: v4 [Paperspace (free, paid options)](https://forums.fast.ai/t/platform-paperspace-free-paid-options/65515)
  5 | 
  6 | ### My steps on Paperspace
  7 | 1.  notebook:  https://www.paperspace.com/telmjtws3/notebook/prjrrhy56
  8 | 2.  Open terminal, via Jupyter Notebook
  9 | - type `bash` to get a regular terminal (autocomplete, etc)
 10 | - `pip install fastai2 fastcore --upgrade`
 11 | - `cd course-v4`
 12 | - `git pull`
 13 | 
 14 | ### Back to work
 15 | 1.  Log in:  https://www.paperspace.com
 16 | 2.  To "notebooks" or "workspace":  https://www.paperspace.com/console/notebooks
 17 | 3.  Actions / Start
 18 | 4.  Actions / Open
 19 | 5.  New / terminal
 20 | 
 21 | ## updating packages on Paperspace
 22 | ```bash
 23 | apt-get update
 24 | ```
 25 | ```bash
 26 | apt-get install libsndfile1-dev
 27 | ```
 28 | 
 29 | ## unzip files
 30 | ```
 31 |    10  cd storage
 32 |    11  ls
 33 |    12  cd fowl_data/
 34 |    13  ls
 35 |    14  unzip Test.zip
 36 |    15  pwd
 37 |    16  clear
 38 |    17  history
 39 | ```
 40 | ```bash
 41 | root@6c4a45f4bab8:/notebooks/storage/fowl_data# unzip -q Train.zip
 42 | ```
 43 | 
 44 | 
 45 | ## Adding a data folder and data
 46 | 
 47 | 6.  use bash shell:  `# bash`
 48 | 7.  going to `storage` folder
 49 | ```bash
 50 | root@51ae9bcde285:/notebooks/storage# pwd
 51 | /notebooks/storage
 52 | ```
 53 | 8.  can `mkdir` here to add datasets
 54 | ```bash
 55 | # bash
 56 | root@51ae9bcde285:/notebooks# ls
 57 | course-v4  datasets  storage
 58 | root@51ae9bcde285:/notebooks# cd storage
 59 | root@51ae9bcde285:/notebooks/storage# ls
 60 | archive  data  models
 61 | root@51ae9bcde285:/notebooks/storage# mkdir fowl
 62 | ```
 63 | 9.  go to that directory
 64 | ```bash
 65 | root@51ae9bcde285:/notebooks/storage# cd fowl
 66 | root@51ae9bcde285:/notebooks/storage/fowl# ls
 67 | root@51ae9bcde285:/notebooks/storage/fowl# pwd
 68 | /notebooks/storage/fowl
 69 | ```
 70 | Tried:  `wget` and `curl` but urls were not working  
 71 | Zindi Fowl competition: https://zindi.africa/competitions/fowl-escapades/data
 72 | 
 73 | 10. Go to Jupyter notebook in Paperspace
 74 | - navigate to `storage` folder
 75 | - use **upload** to upload files
 76 | 
 77 | ## Data
 78 | ```bash
 79 | root@3b9d9da72ac6:/notebooks/storage/fowl_data# pwd
 80 | /notebooks/storage/fowl_data
 81 | root@3b9d9da72ac6:/notebooks/storage/fowl_data# ls -alt
 82 | total 2104240
 83 | drwxr-xr-x 6 root root       4096 Mar 31 19:48 ..
 84 | -rw-r--r-- 1 root root 1407124233 Mar 31 16:26 Train.zip
 85 | -rw-r--r-- 1 root root  743620991 Mar 31 16:12 Test.zip
 86 | drwxr-xr-x 3 root root       4096 Mar 31 15:12 .
 87 | drwxr-xr-x 2 root root       4096 Mar 31 15:12 .ipynb_checkpoints
 88 | -rw-r--r-- 1 root root    3815649 Mar 31 15:12 StarterNotebook.ipynb
 89 | -rw-r--r-- 1 root root       2391 Mar 31 15:11 authors.csv
 90 | -rw-r--r-- 1 root root      80027 Mar 31 15:11 SampleSubmission.csv
 91 | -rw-r--r-- 1 root root      48594 Mar 31 15:11 Train.csv
 92 | -rw-r--r-- 1 root root      13679 Mar 31 15:11 Test.csv
 93 | root@3b9d9da72ac6:/notebooks/storage/fowl_data#
 94 | ```
 95 | ### rename directories
 96 | ```bash
 97 | root@6c4a45f4bab8:/notebooks/storage/fowl_data# mv Train/ train/
 98 | root@6c4a45f4bab8:/notebooks/storage/fowl_data# mv Test/ test/
 99 | root@6c4a45f4bab8:/notebooks/storage/fowl_data#
100 | ```
101 | ```bash
102 | conda install -c conda-forge ffmpeg
103 | ```
104 | 
105 | ```bash
106 | apt-get install htop
107 | ```
108 | ```bash
109 | htop
110 | ```
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/fastai_dl_course_v1.md:
--------------------------------------------------------------------------------
 1 | # [Fastai](http://www.fast.ai) Deep Learning Course:  Version 1
 2 | 
 3 | ## Dates of Course (Version 1)
 4 | - Deep Learning  (Oct 2016 to Apr 2017)
 5 |     - Part 1:  Oct - Dec 2016 
 6 |     - Part 2:  Mar - May 2017
 7 | 
 8 | ## Deep Learing Coursework (Version 1)
 9 | * [Part 1 v1](http://course17.fast.ai)
10 | * [Part 2 v1](http://course17.fast.ai/part2.html)
11 | 
12 | 
13 | ## Other
14 | - [fastai v1: Launch Announcement](http://www.fast.ai/2018/10/02/fastai-ai/)
15 | - [fastai_old (on GitHub)](https://github.com/fastai/fastai_old) (old version)
16 | 


--------------------------------------------------------------------------------
/fastai_dl_course_v2.md:
--------------------------------------------------------------------------------
 1 | # [Fastai](http://www.fast.ai) Deep Learning Course:  Version 2
 2 | 
 3 | ## Dates of Course
 4 | - Deep Learning Version 2 (Oct 2017 to Apr 2018)
 5 |     - Part 1:  Oct - Dec 2017  
 6 |     - Part 2:  Mar - May 2018
 7 | 
 8 | ## Forums
 9 | * [Discourse:  part1-v2](http://forums.fast.ai/c/part1-v2)
10 | * [Discourse:  part1-v2 beginner](http://forums.fast.ai/c/part1v2-beg)
11 | * [Discourse:  part2-v2](http://forums.fast.ai/c/part2-v2)
12 | 
13 | 
14 | ---
15 | ## Deep Learning Coursework (Version 2)
16 | * [Part 1 v2](http://course.fast.ai) (released Jan 2018)
17 | * [Part 2 v2](http://www.fast.ai/2018/05/07/part2-launch/) (released May 2018)
18 | 
19 | ### [Deep Learning Part 1](http://forums.fast.ai/t/welcome-to-part-1-v2/5787)
20 | * [Lesson 1 wiki](http://forums.fast.ai/t/wiki-lesson-1/9398) Image Recognition
21 | * [Lesson 2 wiki](http://forums.fast.ai/t/wiki-lesson-2/9399) CNNs
22 | * [Lesson 3 wiki](http://forums.fast.ai/t/wiki-lesson-3/9401) Overfitting
23 | * [Lesson 4 wiki](http://forums.fast.ai/t/wiki-lesson-4/9402) Embeddings
24 | * [Lesson 5 wiki](http://forums.fast.ai/t/wiki-lesson-5/9403) NLP 
25 | * [Lesson 6 wiki](http://forums.fast.ai/t/wiki-lesson-6/9404) RNNs
26 | * [Lesson 7 wiki](http://forums.fast.ai/t/wiki-lesson-7/9405) CNN Architecture
27 | 
28 | ### [Deep Learning, Part 2](http://www.fast.ai/2018/05/07/part2-launch/)
29 | * [Lesson 8](http://course.fast.ai/lessons/lesson8.html) Object Detection
30 | * [Lesson 9](http://course.fast.ai/lessons/lesson9.html) Single Shot Multibox Detector (SSD)
31 | * [Lesson 10](http://course.fast.ai/lessons/lesson10.html) NLP Classification and Translation
32 | * [Lesson 11](http://course.fast.ai/lessons/lesson11.html) Neural Translation
33 | * [Lesson 12](http://course.fast.ai/lessons/lesson12.html) Generative Adverarial Networks (GANS)
34 | * [Lesson 13](http://course.fast.ai/lessons/lesson13.html) Image Enhancement
35 | * [Lesson 14](http://course.fast.ai/lessons/lesson14.html) Super Resolution; Image Segmentation with UNET
36 | 
37 | 
38 | ### Deep Learning Lesson Timelines
39 | * http://forums.fast.ai/t/part-1-v2-complete-collection-of-video-timelines/11183
40 | 
41 | ---
42 | 
43 | ### [Deep Learning 1: My Lesson Notes](courses/dl1-v2/) 
44 | * Lesson 1
45 |   - [Lesson 1a: Course Intro](courses/dl1-v2/lesson_1a_course_intro.md)
46 |   - [Lesson 1b: CNN and resnet Architecture](courses/dl1-v2/lesson_1b_cnn_tools.md)
47 | * [Lesson 2: resnet34, resnext50](courses/dl1-v2/lesson_2_resnet34_resnext50.md) CNNs
48 | * [Lesson 3: CNN Image Intro](courses/dl1-v2/lesson_3_x.md) Overfitting
49 | * [Lesson 4: Embeddings](courses/dl1-v2/lesson_4_x.md) Embeddings
50 | * [Lesson 5:  ](courses/dl1-v2/lesson_5_x.md) NLP
51 | * [Lesson 6:  ](courses/dl1-v2/lesson_6_x.md) RNNs
52 | * [Lesson 7:  ](courses/dl1-v2/lesson_7_x.md) CNN Architecture
53 | 
54 | ---
55 | ### [Deep Learning 2: My Lesson Notes](courses/dl2-v2/) 
56 | * [Lesson 8](courses/dl2-v2/lesson_08.md) Object Detection
57 | * [Lesson 9](courses/dl2-v2/lesson_09.md) Multi-object Detection
58 | * Lesson 10 NLP Classification and Translation
59 |     - [Lesson 10_1](courses/dl2-v2/lesson_10_1.md)
60 |     - [Lesson 10_2](courses/dl2-v2/lesson_10_2.md)
61 | * Lesson 11 Neural Translation
62 |     - [Lesson 11_1](courses/dl2-v2/lesson_11_1.md)
63 |     - [Lesson 11_2](courses/dl2-v2/lesson_11_2.md)
64 | * [Lesson 12] ()
65 | * [Lesson 13] ()
66 | * [Lesson 14] () 
67 | 
68 | ---
69 | 
70 | ## Platforms for Using fastai (GPU required) v2
71 | [Summary of Cloud GPU Vendors (with billing)](https://github.com/binga/cloud-gpus)
72 | * [Paperspace setup](tools/paperspace.md)
73 | * [AWS AMI GPU Setup](tools/aws_ami_gpu_setup.md)  
74 |     - [How to setup fastai in an Amazon AWS region without fastai AMI like in Europe](https://medium.com/@pierre_guillou/guide-install-fastai-in-any-aws-region-8f4fe29132e5)
75 | * [Crestle](tools/crestle_run.md)
76 | * [Google Cloud GPU Setup for fastai](https://medium.com/google-cloud/set-up-google-cloud-gpu-for-fast-ai-45a77fa0cb48)
77 | * [Set up personal deep learning box (home computer)](tools/setup_personal_dl_box.md)
78 | * [Microsoft Azure](https://medium.com/@manikantayadunanda/setting-up-deeplearning-machine-and-fast-ai-on-azure-a22eb6bd6429)
79 | * [Running fast.ai notebooks with Amazon SageMaker](https://aws.amazon.com/blogs/machine-learning/running-fast-ai-notebooks-with-amazon-sagemaker/)
80 | * Docker
81 |     - [Paperspace Docker Container](https://hub.docker.com/r/paperspace/fastai/)
82 |     - [Fastai and Docker](https://nji-syd.github.io/2018/03/26/up-and-running-with-fast-ai-and-docker/)
83 | * [manual: bash script for setup](http://files.fast.ai/setup/paperspace)
84 |   - the CUDA drivers
85 |   - Anaconda (special Python distribution)
86 |   - Python libraries
87 |   - fastai library
88 |   - courses
89 |   - data  
90 | * Other
91 |   - [FloydHub](https://www.floydhub.com)
92 |     - https://github.com/YuelongGuo/floydhub.fast.ai
93 |   - [Google Colaboratory](https://colab.research.google.com/notebook#fileId=/v2/external/notebooks/welcome.ipynb)
94 |   - [Salamander](http://forums.fast.ai/t/setup-on-salamander-cheaper-easier-than-aws/25427)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/fastai_dl_course_v3.md:
--------------------------------------------------------------------------------
 1 | # [Fastai](http://www.fast.ai) Deep Learning Course:  Version 3
 2 | 
 3 | ## Part 1:  Dates of Course
 4 | - [Application Announcement](http://forums.fast.ai/t/fast-ai-live-the-new-version-of-the-international-fellowship/22825):  CLOSED
 5 | - Deep Learning Version 3 (Oct 2018 to Dec 2018)
 6 | - Class is at the following time:
 7 |   - **6:30pm to 9:00pm PST** (Pacific Standard Time)
 8 |   - 9:30pm to midnight EST
 9 | - Class is on the following days:
10 |   - Lecture 1:  Mon Oct 22
11 |   - Lecture 2:  Tue Oct 30
12 |   - Lecture 3:  Thu Nov 8
13 |   - Lecture 4:  Tue Nov 13
14 |   - Lecture 5:  Mon Nov 19
15 |   - Lecture 6:  Tue Nov 27
16 |   - Lecture 7:  Wed Dec 12
17 | 
18 | ## Forums (Discourse)
19 | * [part1-v3](http://forums.fast.ai/c/part1-v3)
20 | * [part1-v3-adv](https://forums.fast.ai/c/part1-v3/part1-v3-adv)
21 | 
22 | ---
23 | ## Coursework (fastai website)
24 | * Course release announcement:  https://www.fast.ai/2019/01/24/course-v3/
25 | * Videos:  [Part 1 v3](https://course.fast.ai/videos)  (released 24-Jan-2019)
26 | * Course docs:  https://course.fast.ai
27 | 
28 | ---
29 | 
30 | ### [Deep Learning: My Lesson Notes](courses/dl1-v3/) 
31 | * [Lesson 1](courses/v3-dl1/lesson_1_lecture.md) Image Recognition
32 | * [Lesson 2] (courses/v3-dl1/lesson_2_1.md)  
33 | * [Lesson 3] (courses/v3-dl1/lesson_3_1.md)  
34 | * [Lesson 4] (courses/v3-dl1/lesson_4_1.md)  
35 | * [Lesson 5] (courses/v3-dl1/lesson_5_1.md)  
36 | * [Lesson 6] (courses/v3-dl1/lesson_6_1.md)  
37 | * [Lesson 7] (courses/v3-dl1/lesson_7_1.md)  
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/fastai_ml_course.md:
--------------------------------------------------------------------------------
 1 | # [Fastai](http://www.fast.ai) Machine Learning [(ML)](http://www.fast.ai/2018/09/26/ml-launch/) Course
 2 | 
 3 | ## Dates of Course
 4 | - Machine Learning (Fall 2017)
 5 | 
 6 | ## Forums
 7 | - [ML Forum](http://forums.fast.ai/t/another-treat-early-access-to-intro-to-machine-learning-videos/6826)
 8 | - [ML video timelines](http://forums.fast.ai/t/another-treat-early-access-to-intro-to-machine-learning-videos/6826/321?u=ericpb)
 9 | 
10 | ## [Intro to Machine Learning: My Lesson Notes](courses/ml1/) 
11 | * [Lesson 1: Random Forests Part 1](courses/ml1/lesson_01.md)
12 | * [Lesson 2: Random Forests Part 2](courses/ml1/lesson_02.md)
13 | * [Lesson 3: Preprocessing Data](courses/ml1/lesson_03.md)
14 | * [Lesson 4: RF Hyperparameters & Feature Importance](courses/ml1/lesson_04.md)
15 | * [Lesson 5:  ](courses/ml1/lesson_05.md) * in-progress *
16 | * [Lesson 6:  ] (courses/ml1/)
17 | * [Lesson 7:  ] (courses/ml1/)
18 | * [Lesson 8:  ] (courses/ml1/)
19 | * [Lesson 9:  ] (courses/ml1/)
20 | * [Lesson 10:  ] (courses/ml1/)
21 | * [Lesson 11:  ] (courses/ml1/)
22 | * [Lesson 12:  ] (courses/ml1/)
23 | 


--------------------------------------------------------------------------------
/googlefc30e18b4a9edaa2.html:
--------------------------------------------------------------------------------
1 | google-site-verification: googlefc30e18b4a9edaa2.html


--------------------------------------------------------------------------------
/helpful_linux_commands.md:
--------------------------------------------------------------------------------
 1 | # Helpful Linux Commands
 2 | 
 3 | 
 4 | ### get list of Jupyter Notebook sessions
 5 | ```
 6 | jupyter notebook list
 7 | ```
 8 | 
 9 | ### list CPU GPU memory usage:  
10 | ```
11 | htop
12 | ```
13 | 
14 | ### see GPU usage
15 | ```bash
16 | nvidia smi
17 | ```
18 | ```bash
19 | nvidia-smi dmon
20 | ```
21 | ```bash
22 | watch -n 1 nvidia-smi
23 | ```
24 | 
25 | ### list number of lines in a file
26 | `wc -l file.csv`  
27 | 
28 |  
29 | 
30 | 
31 | 


--------------------------------------------------------------------------------
/images/chrome_curlwget.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/chrome_curlwget.png


--------------------------------------------------------------------------------
/images/dl_libraries.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/dl_libraries.png


--------------------------------------------------------------------------------
/images/image_downloader.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/image_downloader.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson08_lr_find.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson08_lr_find.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_bbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_bbox.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_dl_box.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_dl_box.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_embeddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_embeddings.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_learning.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_learning2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_learning2.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_lr_find2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_lr_find2.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_matplotlib.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_matplotlib.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_md.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_md.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_motivation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_motivation.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_nb_pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_nb_pascal.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_obj_det.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_obj_det.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_opps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_opps.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_paper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_paper.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_part1_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_part1_2.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_part2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_part2.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_stage1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_stage1.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_step1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_step1.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_transfer_learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_transfer_learning.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_visualize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_visualize.png


--------------------------------------------------------------------------------
/images/lesson_08/lesson8_x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_08/lesson8_x.png


--------------------------------------------------------------------------------
/images/lesson_09/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/lesson_09/lesson9_archit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_09/lesson9_archit.png


--------------------------------------------------------------------------------
/images/lesson_09/lesson9_bbox.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_09/lesson9_bbox.png


--------------------------------------------------------------------------------
/images/lesson_09/lesson9_data_loader.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_09/lesson9_data_loader.png


--------------------------------------------------------------------------------
/images/lesson_09/lesson9_know_these1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_09/lesson9_know_these1.png


--------------------------------------------------------------------------------
/images/lesson_09/lesson9_know_these2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_09/lesson9_know_these2.png


--------------------------------------------------------------------------------
/images/lesson_11/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_charloop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_charloop.png


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_nt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_nt.png


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_rnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_rnn.png


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_rnn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_rnn2.png


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_rnn_stacked.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_rnn_stacked.png


--------------------------------------------------------------------------------
/images/lesson_11/lesson_11_rnn_stacked2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/lesson_11/lesson_11_rnn_stacked2.png


--------------------------------------------------------------------------------
/images/ncm_gephi.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/ncm_gephi.jpg


--------------------------------------------------------------------------------
/images/paperspace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/paperspace.png


--------------------------------------------------------------------------------
/images/paperspace_fastai.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/paperspace_fastai.png


--------------------------------------------------------------------------------
/images/paperspace_jupyter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/paperspace_jupyter.png


--------------------------------------------------------------------------------
/images/pretrained_networks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/pretrained_networks.png


--------------------------------------------------------------------------------
/images/softmax.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/softmax.png


--------------------------------------------------------------------------------
/images/tmux_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/tmux_start.png


--------------------------------------------------------------------------------
/images/tmux_summary.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/tmux_summary.png


--------------------------------------------------------------------------------
/images/triple_backticks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/reshamas/fastai_deeplearn_part1/21e30ad3a6ec379edfb0feee3920bb170701fe47/images/triple_backticks.png


--------------------------------------------------------------------------------
/notes/competitions.md:
--------------------------------------------------------------------------------
 1 | # Competitions
 2 | 
 3 | * [Kaggle](https://www.kaggle.com/competitions)
 4 | * [crowdAI](https://www.crowdai.org)  
 5 | * [Space Apps Challenges](https://2017.spaceappschallenge.org/challenges/)
 6 | * [Datahacks](https://datahack.analyticsvidhya.com/contest/practice-problem-age-detection/)
 7 | * [Congressional Data Competition](https://www.challenge.gov/list/)
 8 | * [GECCO Competitions](http://gecco-2018.sigevo.org/index.html/tiki-index.php?page=Competitions)
 9 | * [KD Nuggets listing](https://www.kdnuggets.com/competitions/)
10 | * [International Data Analytics Olympiad](http://idao.world/)
11 | * [SpaceNet Competition: Road Detection and Routing](https://www.iqt.org/cosmiq-works-radiant-solutions-and-nvidia-announce-third-spacenettm-competition-road-detection-and-routing-challenge/#new_tab)
12 | 
13 | ## Conference Competitions / Tasks
14 | * [NIPS Competition](https://nips.cc/Conferences/2018/CallForCompetitions)
15 | * [American Statistical Association Data Expo](http://community.amstat.org/stat-computing/data-expo/data-expo-2018)
16 | * [SemEval-2017 Task 9 (NLP)](http://alt.qcri.org/semeval2017/task9/)
17 | * [2016 Shared Task: Challenges in NLP for Clinical Data](https://www.i2b2.org/NLP/RDoCforPsychiatry/PreviousChallenges.php)
18 | * [Noisy User Generated Task (WNUT) Shared Tasks](http://noisy-text.github.io/2017/)
19 | * [SemEval-2017 Task 1 (Semantic Textual Similarity)](http://alt.qcri.org/semeval2017/task1/)
20 | 
21 | 
22 | ## Students
23 | * [Adobe Analytics Challenge](http://adobeanalyticschallenge.com/) (for university students)
24 | * [ASEAN Data Mining Competition](https://www.youthop.com/competitions/asean-date-science-competition-2018), Asia
25 | * [Data Mining Cup - International Student Competition](https://www.data-mining-cup.com/)
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/notes/deep_learning_libraries.md:
--------------------------------------------------------------------------------
 1 | # Deep Learning Libraries
 2 | 
 3 | * TensorFlow (Google)
 4 | * Keras (Google); is an open source neural network library written in Python. It is capable of running on top of MXNet, Deeplearning4j, Tensorflow, CNTK or Theano
 5 | * Caffe (Berkeley)
 6 | * Theano (Pascal Lamblin and Yoshua Bengio), library retired in Fall 2017
 7 | * PyTorch (Facebook)
 8 | * Sonnet (Google)
 9 | * MXNet (Amazon)
10 | * Torch (Lua-based)
11 | * Microsoft Cognitive Toolkit (CNTK)
12 | * DLIB (C++ library)
13 | * Caffe2 (Facebook)
14 | * Chainer (Preferred Networks, Japan)
15 | * Paddlepaddle (Baidu, China)
16 | * DeepLearning4J (Skymind in SF); Java, can use with Apache Hadoop and Apache Spark
17 | * Lasagne ( lightweight library used to construct and train networks in Theano)
18 | 
19 | And more:  
20 | * fastai (USF)
21 | * Pyro (Uber)
22 | 
23 | 
24 | ![dl table](../images/dl_libraries.png)
25 |  
26 |  
27 | ---
28 | 
29 | ## References
30 | - [Full Ranking List](https://github.com/thedataincubator/data-science-blogs/blob/master/output/DL_libraries_final_Rankings.csv)
31 | - [Ranking Popular Deep Learning Libraries for Data Science](https://blog.thedataincubator.com/2017/10/ranking-popular-deep-learning-libraries-for-data-science/) Oct 2017
32 | 


--------------------------------------------------------------------------------
/notes/imagenet.md:
--------------------------------------------------------------------------------
 1 | # [ImageNet](http://www.image-net.org)
 2 | 
 3 | First step is to use a pre-trained model.
 4 | 
 5 | ### Pre-trained Model:  
 6 | - Someone has already come along, downloaded millions of images off of the internet
 7 | - Built a deep learning model that has learned to recognize the contents of those images
 8 | - Nearly always, with these pre-trained models, they use ImageNet dataset
 9 | - ImageNet has most respected annual computer vision competition (winners are Google, Microsoft)
10 | - 32,000+ categories
11 | - Folks that create these pre-trained networks basically download large subset of images from ImageNet
12 | 
13 | #### Shortcomings of ImageNet Dataset
14 | ImageNet is carefully curated so that photo has one main item in it
15 | 
16 | ### Using ImageNet
17 | - For us, this is a suitable dataset
18 | - Each year, the winner make source code / weights available
19 | 
20 | 
21 | ## Architectures: Winners of ImageNet
22 | - **SENet**, 2017 (Squeeze-and-Excitation Networks)
23 | - **ResNet**, 2016 (Microsoft)
24 | - **GoogLeNet**, 2015, Inception module
25 | - **VGG Net**, 2014 (Oxford Univ group)
26 |   - Last of the really powerful simple architectures
27 |   - VGG’s simpler approach is not much less accurate than others
28 |   - For teaching purposes, it is pretty State of the art AND easy to understand
29 |   - Excellent for the problems that differ (like satellite imagery vs simple photos)
30 | - **ZF Net**, 2013 (Matthew Zeiler and Rob Fergus from NYU)
31 | - **AlexNet**, 2012 (Stanford)
32 | 
33 | ## Pre-trained Models
34 | - source:  https://pytorch.org/docs/stable/torchvision/models.html
35 | 
36 | ![ImageNet](../images/pretrained_networks.png)
37 | 
38 | 
39 | ## Reference
40 | [The 9 Deep Learning Papers You Need To Know About (Understanding CNNs Part 3)](https://adeshpande3.github.io/adeshpande3.github.io/The-9-Deep-Learning-Papers-You-Need-To-Know-About.html) 
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/notes/loss_functions.md:
--------------------------------------------------------------------------------
 1 | # Loss Functions
 2 | 
 3 | ## Cross-entropy Loss
 4 | 
 5 | https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html
 6 | 
 7 | Cross-Entropy
 8 | Cross-entropy loss, or log loss, measures the performance of a classification model whose output is a probability value between 0 and 1. Cross-entropy loss increases as the predicted probability diverges from the actual label. So predicting a probability of .012 when the actual observation label is 1 would be bad and result in a high loss value. A perfect model would have a log loss of 0.
 9 | 
10 | _images/cross_entropy.png
11 | 
12 | The graph above shows the range of possible loss values given a true observation (isDog = 1). As the predicted probability approaches 1, log loss slowly decreases. As the predicted probability decreases, however, the log loss increases rapidly. Log loss penalizes both types of errors, but especially those predictions that are confident and wrong!
13 | 
14 | Cross-entropy and log loss are slightly different depending on context, but in machine learning when calculating error rates between 0 and 1 they resolve to the same thing.
15 | 
16 | 


--------------------------------------------------------------------------------
/notes/nlp_data.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | [The wikitext long term dependency language modeling dataset](https://www.salesforce.com/products/einstein/ai-research/the-wikitext-dependency-language-modeling-dataset/)
5 | 


--------------------------------------------------------------------------------
/notes/nlp_terms.md:
--------------------------------------------------------------------------------
 1 | # NLP Terms
 2 | ```
 3 | POS = part of speech  
 4 | NP-chunking = noun phrase chunking  
 5 | 
 6 | DT = determiner
 7 | JJ = adjectives
 8 | NN = noun
 9 | VBD = verb
10 | ```
11 | 
12 | ### BLEU (bilingual evaluation understudy) 
13 | is an algorithm for evaluating the quality of text which has been machine-translated from one natural language to another. Quality is considered to be the correspondence between a machine's output and that of a human: "the closer a machine translation is to a professional human translation, the better it is" – this is the central idea behind BLEU.[1][2] BLEU was one of the first metrics to achieve a high correlation with human judgements of quality,[3][4] and remains one of the most popular automated and inexpensive metrics.
14 | 
15 | ### Word Embedding
16 | Word Embedding turns text into numbers.
17 | 
18 | #### Types of Word Embedding
19 | 1.  Bag of Words - each word is represented in the matrix, results in sparse matrices
20 | 2.  GloVe - counts of co-occurrences
21 | 3.  Word2Vec
22 |   - These models are shallow, two-layer neural networks that are trained to reconstruct linguistic contexts of words. Word2vec takes as its input a large corpus of text and produces a vector space, typically of several hundred dimensions, with each unique word in the corpus being assigned a corresponding vector in the space. Word vectors are positioned in the vector space such that words that share common contexts in the corpus are located in close proximity to one another in the space.
23 | 
24 | ### IE (Information Extraction)
25 | IE turns the unstructured information embedded in texts into structured data. 
26 | 
27 | 
28 | ### IOB (Inside, Outside, Beginning)
29 | ```
30 | The most widespread file representation uses IOB tags:
31 | IOB = Inside-Outside-Begininning
32 | B = beginnning (marks beginning of chunk)
33 | I = inside (all subsequent parts of chunk)
34 | O = outside
35 | ```
36 | 
37 | ### Named Entity
38 | anything that can be referred to with a proper name
39 | 
40 | 
41 | ### NER (Named Entity Recognition)
42 | task of detecting and classifying all the proper names mentioned in a text  
43 | * Generic NER:  finds names of people, places and organizations that are mentioned in ordinary news texts
44 | * practical applications:  built to detect everything  from names of genes and proteins, to names of college courses
45 | 
46 | ### Reference Resolution (Coreference)
47 | occurs when two or more expressions in a text refer to the same person or thing; they have the same referent, e.g. Bill said he would come; the proper noun Bill and the pronoun he refer to the same person, namely to Bill
48 | 
49 | ### Relation Detection and Classification
50 | find and classify semantic relations among the entities discovered in a given text
51 | 
52 | ### Event Detection and Classification
53 | find and classify the events in which the entities are participating
54 | 
55 | 
56 | ### GloVe
57 | GloVe is an unsupervised learning algorithm for obtaining vector representations for words. Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and the resulting representations showcase interesting linear substructures of the word vector space.
58 | 
59 | 
60 | 
61 | ### Temporal Expression Detection
62 | * tells us that our sample text contains the temporal expressions *Friday* and *Thursday*
63 | * includes date expressions such as days of the week, months, holidays, as well as relative expressions including phrases like *two days from now* or *next year*.
64 | * includes time:  noon, 3pm, etc.
65 | 
66 | ### Temporal Analysis
67 | over problem is to map temporal expressions onto specific calendar dates or times of day and then to use those times to situate events in time.  
68 | 


--------------------------------------------------------------------------------
/resources.md:
--------------------------------------------------------------------------------
 1 | # Resources
 2 | 
 3 | ## Lessons
 4 | * [Lesson 1 Notes](http://forums.fast.ai/t/deeplearning-lec1notes/7089) Tim Lee [(Tim's GitHub repo)](https://github.com/timdavidlee/learning-deep/tree/master/deeplearning1)
 5 | * [Lesson 2: Case Study - A world class image classifier for dogs and cats (err.., anything)](https://medium.com/@apiltamang/case-study-a-world-class-image-classifier-for-dogs-and-cats-err-anything-9cf39ee4690e) Apil Tamang
 6 | * [Lesson 2 Notes](http://forums.fast.ai/t/deeplearning-lecnotes2/7515/2) Tim Lee
 7 | * [Lesson 3 Notes](http://forums.fast.ai/t/deeplearning-lecnotes3/7866) Tim Lee
 8 | * [Lesson 4 Notes](http://forums.fast.ai/t/deeplearning-lec4notes/8146) Tim Lee
 9 | 
10 | ## Blog Sites by Author
11 | - [Anand Saha](http://teleported.in/)
12 | - [Apil Tamang](https://medium.com/@apiltamang)
13 | 
14 | 
15 | ## Blogs Written by (or recommended by) fastai Fellows 
16 | 
17 | ### Resnet
18 | * [Decoding the ResNet architecture](http://teleported.in/posts/decoding-resnet-architecture/) Anand Saha   
19 | * [Yet Another ResNet Tutorial (or not)](https://medium.com/@apiltamang/yet-another-resnet-tutorial-or-not-f6dd9515fcd7) Apil Tamang
20 | * [An Overview of ResNet and its Variants](https://towardsdatascience.com/an-overview-of-resnet-and-its-variants-5281e2f56035) Vincent Fung
21 | 
22 | ### Stuctured Deep Learning
23 | * [Structured Deep Learning](https://towardsdatascience.com/structured-deep-learning-b8ca4138b848) Kerem Turgutlu (Masters' student at USF)
24 | 
25 | ### NLP
26 | * [Fine-tuned Language Models for Text Classification](https://arxiv.org/abs/1801.06146) by Jeremy Howard and Sebastian Ruder
27 | 
28 | ### PyTorch
29 | * [Transfer Learning using PyTorch — Part 2](https://towardsdatascience.com/transfer-learning-using-pytorch-part-2-9c5b18e15551) Vishnu Subramanian (April 2017)
30 | * [A practitioner's guide to PyTorch](https://medium.com/@radekosmulski/a-practitioners-guide-to-pytorch-1d0f6a238040) by Radek 
31 | 
32 | ### Learning Rate
33 | * [Improving the way we work with learning rate](https://techburst.io/improving-the-way-we-work-with-learning-rate-5e99554f163b) Vitaly Bushaev
34 | * [Visualizing Learning rate vs Batch size (Neural Nets basics using Fast.ai tools)](https://miguel-data-sc.github.io/2017-11-05-first/) Miguel (Nov 2017)
35 | * [Estimating an Optimal Learning Rate For a Deep Neural Network](https://medium.com/@surmenok/estimating-optimal-learning-rate-for-a-deep-neural-network-ce32f2556ce0) Pavel Surmenok
36 | * [Cyclical Learning Rate Technique](http://teleported.in/posts/cyclic-learning-rate/) Anand Saha
37 | * [Transfer Learning using differential learning rates](https://towardsdatascience.com/transfer-learning-using-differential-learning-rates-638455797f00) Manikanta Yadunanda
38 | 
39 | 
40 | ### CNN
41 | * [Convolutional Neural Network in 5 minutes](https://medium.com/@init_27/convolutional-neural-network-in-5-minutes-8f867eb9ca39) Sanyam Bhutani
42 | * [CS231n Convolutional Neural Networks for Visual Recognition](http://cs231n.github.io/convolutional-networks/)
43 | 
44 | ### Kaggle
45 | * [FastAI Kaggle Starter Kit ](https://www.kaggle.com/timolee/fastai-kaggle-starter-kit-lb-0-33) Tim Lee
46 | 
47 | ### Jupyter Notebook 
48 | 
49 | * [Debugging Jupyter notebooks](https://davidhamann.de/2017/04/22/debugging-jupyter-notebooks/)
50 | 
51 | ### and More
52 | 
53 | * [Do smoother areas of the error surface lead to better generalization? (An experiment inspired by the first lecture of the fast.ai MOOC)](https://medium.com/@radekosmulski/do-smoother-areas-of-the-error-surface-lead-to-better-generalization-b5f93b9edf5b) Radek
54 | * [Contributing to fast.ai](https://medium.com/@wgilliam/86f2c05d72aa) Wayde Gilliam
55 | * [Getting Computers To See Better Than Humans](https://medium.com/@ArjunRajkumar/getting-computers-to-see-better-than-humans-346d96634f73) Arjun Rajkumar
56 | * [Fun with small image data-sets](https://medium.com/@nikhil.b.k_13958/fun-with-small-image-data-sets-8c83d95d0159) Nikhil B
57 | * [Fun with small image data-sets (Part 2)](https://medium.com/@nikhil.b.k_13958/fun-with-small-image-data-sets-part-2-54d683ca8c96) Nikhil B
58 | * [Structured Deep Learning](https://medium.com/@keremturgutlu/structured-deep-learning-b8ca4138b848) Kerem Turgutlu 
59 | * [Exploring Stochastic Gradient Descent with Restarts (SGDR)](https://medium.com/38th-street-studios/exploring-stochastic-gradient-descent-with-restarts-sgdr-fa206c38a74e) Mark Hoffman
60 | * [How do We Train Neural Networks?](https://towardsdatascience.com/how-do-we-train-neural-networks-edd985562b73) Vitaly Bushev
61 | 
62 | ### Reference Blogs
63 | 
64 | * [Understanding LSTMs](http://colah.github.io/posts/2015-08-Understanding-LSTMs/) Christopher Olah
65 | * [Recurrent Neural Network Tutorial, Part 4 – Implementing a GRU/LSTM RNN with Python and Theano](http://www.wildml.com/2015/10/recurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano/) Denny Britz
66 | 
67 | ## Research Publications
68 | 
69 | * [A systematic study of the class imbalance problem
70 | in convolutional neural networks](https://arxiv.org/pdf/1710.05381.pdf)
71 | * [What’s your ML Test Score? A rubric for ML
72 | production systems](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45742.pdf)  (NIPS 2016)  
73 | * [ADAM:  A Method for Stochastic Optimization](https://arxiv.org/pdf/1412.6980.pdf) (ICLR 2015)
74 | * [A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch size, momentum, and weight decay](https://arxiv.org/abs/1803.09820) Leslie Smith, March 2018
75 | * [Cyclical Learning Rates for Training Neural Networks](https://arxiv.org/abs/1506.01186) (WACV 2017) Leslie Smith
76 | * [Fixing Weight Decay Regularization in Adam](https://arxiv.org/abs/1711.05101) Ilya Loshchilov, Frank Hutter (Submitted on 14 Nov 2017)
77 | * [Learning Distributed Representations of Concepts](http://www.cs.toronto.edu/~hinton/absps/families.pdf) Geoffrey Hinton, 1986
78 | * [Using the Output Embedding to Improve Language Models](https://arxiv.org/abs/1608.05859)
79 | 
80 | ### Key Research Papers
81 | * [A disciplined approach to neural network hyper-parameters: Part 1 -- learning rate, batch size, momentum, and weight decay](https://arxiv.org/abs/1803.09820), Leslie N. Smith, 2018
82 | * [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) Kaiming He, ILSVRC 2015 classification task winner
83 | * [Visualizing and Understanding Convolutional Networks](http://www.matthewzeiler.com/wp-content/uploads/2017/07/arxive2013.pdf) Zeiler & Fergus, 2013
84 | 
85 | 
86 | ## Videos
87 | 
88 | * [The wonderful and terrifying implications of computers that can learn](https://www.ted.com/talks/jeremy_howard_the_wonderful_and_terrifying_implications_of_computers_that_can_learn) (Ted Talk by Jeremy Howard 2014)
89 | * [A Visual and Intuitive Understanding of Deep Learning](https://www.youtube.com/embed/Oqm9vsf_hvU?autoplay=1&feature=oembed&wmode=opaque) Otavio Good of Google, AI Conf SF Sep 2017
90 | * [Ian Goodfellow - Numerical Computation for Deep Learning - AI With The Best Oct 14-15, 2017](https://www.youtube.com/watch?v=XlYD8jn1ayE&t=5m40s)
91 | * [Ali Rahimi's talk at NIPS(NIPS 2017 Test-of-time award presentation)](https://www.youtube.com/watch?v=Qi1Yry33TQE)
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/takeaways.md:
--------------------------------------------------------------------------------
 1 | # Takeways / Tips
 2 | 
 3 | ## Modeling 
 4 | 1.  When training a model, we can "ignore" or not worry as much about **overfitting** as long as the validation error is decreasing.
 5 | 
 6 | 
 7 | 2.  **Image Sizes** are generally at 224x224 and 299x299, which are the sizes that imagenet models are generally trained at. You get best results if you use the same as the original training size. Since people don’t tend to mention what size was used originally, you can try using both with something like dogs v cats and see which works better. More recent models seem to generally use 299.
 8 | 
 9 | 3.  **Rare Cases**  You can replicate the rare classes to make them more balanced. Never throw away data!
10 | 
11 | ### Reducing Overfitting
12 | * data augmentation
13 | * pretrained network
14 | * gradually increasing image size
15 | * differential learning rates
16 | * SGDR
17 | * dropouts
18 | * higher resolution images
19 | 
20 | # Best Practices
21 | 
22 | 1.  When opening a notebook in fastai library, make a copy with the prefix **tmp**.  "tmp" files are included in fastai repo's [.gitignore](https://github.com/fastai/fastai/blob/master/.gitignore)
23 | 
24 | 


--------------------------------------------------------------------------------
/tips_faq_beginners.md:
--------------------------------------------------------------------------------
  1 | # Fastai FAQs for Beginners
  2 | 
  3 | ## Q1:  How to ask for help for fastai
  4 | - http://forums.fast.ai/t/how-to-ask-for-help/10421  
  5 | 
  6 | - Make sure you enclose your code in triple back ticks.  Example:  
  7 | 
  8 | >use this code - notice the 3 ` enclosing the code block:  
  9 | 
 10 | ![triple_backticks](images/triple_backticks.png)
 11 | 
 12 | 
 13 | >to render this:  
 14 | 
 15 | ```bash
 16 | ~/.conda/envs/tf-gpu/lib/python3.6/multiprocessing/popen_fork.py in __init__(self, process_obj)
 17 |      18         sys.stderr.flush()
 18 |      19         self.returncode = None
 19 | ---> 20         self._launch(process_obj)
 20 |      21 
 21 |      22     def duplicate_for_child(self, fd):
 22 | 
 23 | ~/.conda/envs/tf-gpu/lib/python3.6/multiprocessing/popen_fork.py in _launch(self, process_obj)
 24 |      65         code = 1
 25 |      66         parent_r, child_w = os.pipe()
 26 | ---> 67         self.pid = os.fork()
 27 |      68         if self.pid == 0:
 28 |      69             try:
 29 | 
 30 | OSError: [Errno 12] Cannot allocate memory
 31 | ```
 32 | 
 33 | 
 34 | ---
 35 | ## Q2:  Where can I put _my_ Jupter Notebook?
 36 | 
 37 | :red_circle: **NOTE:** Do NOT put your Jupyter Notebook under the `/data/` directory!  Here's [the link](http://forums.fast.ai/t/how-to-remove-ipynb-checkpoint/8532/2) for why.
 38 | 
 39 | ### Option 1 (default):  under `/courses`
 40 | The default location is under the `dl1` folder, wherever you've cloned the repo on your GPU machine.
 41 | >my example
 42 | ```bash
 43 | (fastai) paperspace@psgyqmt1m:~$ ls
 44 | anaconda3  data  downloads  fastai
 45 | ```
 46 | - Paperspace:  `/home/paperspace/fastai/courses/dl1`
 47 | - AWS:         `/home/ubuntu/fastai/courses/dl1`
 48 | 
 49 | ### Option 2:  where you want
 50 | If you change the default **location of your notebook**, you'll need to update your `.bashrc` file.  Add in the path to where you've cloned the fastai GitHub repo:  
 51 | - for me, my notebooks are in a "projects" directory:  `~/projects`
 52 | - my `fastai` repo is cloned at the root level, so it is here:  `~/fastai`
 53 | 
 54 | in the file `.bashrc`  add this path:
 55 | ```
 56 | export PYTHONPATH=$PYTHONPATH:~/fastai
 57 | ```  
 58 | **Reminder:** don't forget to run (or `source`) your `.bashrc` file:  
 59 | 1.  add path where fastai repo is to `.bashrc`
 60 | 2.  save and exit
 61 | 3.  source it:  `source ~/.bashrc`
 62 | 
 63 | ### Option 3:  used `pip install`
 64 | Note that if you did `pip install`, you don't need to specify the path (as in option 2, or you don't need to put in the courses folder, as in option 1).  
 65 | However, fastai is still being updated so there is a delay in library being available directly via pip.  
 66 | Can try:  
 67 | `pip install https://github.com/fastai/fastai/archive/master.zip`
 68 | 
 69 | ---
 70 | ## Q3:  What does my directory structure look like?
 71 | >my path
 72 | ```bash
 73 | PATH = "/home/ubuntu/data/dogscats/"
 74 | ```
 75 | 
 76 | >looking at my directory structure
 77 | ```bash
 78 | !tree {PATH} -d
 79 | ```
 80 | ```bash
 81 | /home/ubuntu/data/dogscats/
 82 | ├── models
 83 | ├── sample
 84 | │   ├── models
 85 | │   ├── tmp
 86 | │   ├── train
 87 | │   │   ├── cats
 88 | │   │   └── dogs
 89 | │   └── valid
 90 | │       ├── cats
 91 | │       └── dogs
 92 | ├── test
 93 | ├── train
 94 | │   ├── cats
 95 | │   └── dogs
 96 | └── valid
 97 |     ├── cats
 98 |     └── dogs
 99 | ```
100 | ### Notes on directories
101 | * `models` directory:  created automatically
102 | * `sample` directory:  you create this with a small sub-sample, for testing code
103 | * `test` directory:  put any test data there if you have it
104 | * `train`/`test` directory:  you create these and separate the data using your own data sample
105 | * `tmp` directory:  if you have this, it was automatically created after running models
106 | * fastai / keras code automatically picks up the **label** of your categories based on your folders.  Hence, in this example, the two labels are:  dogs, cats
107 | 
108 | ### Notes on image file names
109 | * not important, you can name them whatever you want
110 | 
111 | 
112 | ### Getting file counts
113 | >looking at file counts
114 | ```bash
115 | # print number of files in each folder
116 | 
117 | print("training data: cats")
118 | !ls -l {PATH}train/cats | grep ^[^dt] | wc -l
119 | 
120 | print("training data: dogs")
121 | !ls -l {PATH}train/dogs | grep ^[^dt] | wc -l
122 | 
123 | print("validation data: cats")
124 | !ls -l {PATH}valid/cats | grep ^[^dt] | wc -l
125 | 
126 | print("validation data: dogs")
127 | !ls -l {PATH}valid/dogs | grep ^[^dt] | wc -l
128 | 
129 | print("test data")
130 | !ls -l {PATH}test1 | grep ^[^dt] | wc -l
131 | ```
132 | >my output
133 | ```bash
134 | training data: cats
135 | 11501
136 | training data: dogs
137 | 11501
138 | validation data: cats
139 | 1001
140 | validation data: dogs
141 | 1001
142 | test data
143 | 12501
144 | ```
145 | ---
146 | ## Q4:  What is a good train/validation/test split?
147 | - can do `80/20` (train/validation)
148 | - if you have or are creating a 'test' split, use for (train/validation/test):
149 |     - can do `80/15/5`
150 |     - can do `70/20/10` 
151 |     - can do `60/20/20` 
152 |     
153 | **Note:**  Depending on who the instructor is, they use various naming conventions:  
154 | - train/test and then **validation** for holdout data
155 | - train/validation and then **test** for holdout data
156 | 
157 | It's important to understand that:  
158 | - in the case of train/test, the test set is used to test for **generalization**
159 | - the **holdout data** is a second test set
160 | 
161 | ---
162 | ## Q5:  How do I copy files or data from my local computer to a cloud machine (Paperspace, AWS, etc)?
163 | 
164 | [Instructions on using `scp` command to transfer files from platforms](https://github.com/reshamas/fastai_deeplearn_part1/blob/master/tools/copy_files_local_to_cloud.md)
165 | 
166 | ---
167 | ## Q6:  Where do I put my sample images?
168 | [testing sample images after the model has been created](http://forums.fast.ai/t/wiki-lesson-1/9398/282)
169 | 


--------------------------------------------------------------------------------
/tips_prereqs.md:
--------------------------------------------------------------------------------
1 | # Things to Know Before Running Fastai Library
2 | 
3 | 
4 | ## Q1:  What is train/valid/test?
5 | 
6 | 
7 | ## Q2:  How do I divide up train/valid/test?
8 | 
9 | 


--------------------------------------------------------------------------------
/tips_troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # Solving Errors
 2 | 
 3 | ## Latest version of fastai library
 4 | Do <kbd> git pull </kbd> of [fastai library](https://github.com/fastai/fastai).  Updates may sort out some errors.
 5 | ```bash
 6 | git pull
 7 | ```
 8 | ## Update Anaconda packages
 9 | ```bash
10 | conda env update
11 | conda update --all 
12 | ```
13 | 
14 | ## Delete `tmp` directory and rerun  
15 | 
16 | ## CUDA out of memory error
17 | - interrupt kernel
18 | - reduce batch size
19 | - **RESTART kernel**!
20 | 
21 | ## TTA (Test Time Augmentation)
22 | - [forum post](http://forums.fast.ai/t/lesson-2-dog-breeds-error-on-call-of-accuracy-log-preds-y/11965)
23 | - "TTA used to return the average of the augmentations as a prediction. Now it returns the set so you can do with them as you please."
24 | 
25 | #### Error with this code
26 | ```python
27 | log_preds,y = learn.TTA()
28 | probs = np.exp(log_preds)
29 | accuracy(log_preds,y), metrics.log_loss(y, probs)
30 | ```
31 | #### Adjust with this code
32 | ```python
33 | log_preds,y = learn.TTA()
34 | preds = np.mean(np.exp(log_preds),0)
35 | ```
36 | 
37 | ---
38 | ## Empty graph with learning rate finder
39 | - try increasing the batch size
40 | 
41 | ---
42 | 
43 | # Debugging
44 | Note from Jeremy:  
45 | Immediately after you get the error, type `%debug` in a cell to enter the debugger. Then use the standard python debugger commands to follow your code to see what’s happening. 
46 | 


--------------------------------------------------------------------------------
/tools/README.md:
--------------------------------------------------------------------------------
1 | # Tools for Deep Learning 
2 | 
3 | 
4 | [Create an image dataset from scratch](http://forums.fast.ai/t/create-an-image-dataset-from-scratch/9992)
5 | 


--------------------------------------------------------------------------------
/tools/check_links.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Objective:  run a script to check an *.md file to see that all links are valid
  4 | 
  5 | # EXAMPLE of how to run file:  
  6 | """
  7 | ▶ pwd
  8 | /Users/reshamashaikh/ds/my_repos/fastai_deeplearn_part1/tools
  9 | 
 10 | my_repos/fastai_deeplearn_part1/tools 
 11 | ▶ python check_links.py -v /Users/reshamashaikh/ds/my_repos/fastai_deeplearn_part1/README.md
 12 | VALID   http://www.fast.ai
 13 | VALID   http://forums.fast.ai/c/part1-v2
 14 | VALID   http://forums.fast.ai/c/part1v2-beg
 15 | VALID   https://github.com/fastai/fastai
 16 | VALID   tools/aws_ami_gpu_setup.md
 17 | VALID   tools/tmux.md
 18 | VALID   resources.md
 19 | 
 20 | my_repos/fastai_deeplearn_part1/tools
 21 | ▶ python check_links.py -v /Users/reshamashaikh/ds/my_repos/fastai_deeplearn_part1/tools/tmux.md
 22 | VALID   #section-a
 23 | VALID   #section-b
 24 | VALID   #section-c
 25 | VALID   #section-d
 26 | VALID   #section-e
 27 | VALID   https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340
 28 | VALID   https://alekshnayder.com
 29 | VALID   http://console.aws.amazon.com/
 30 | 
 31 | """
 32 | 
 33 | # Running Python 3
 34 | 
 35 | __author__ = 'taylanbil'
 36 | 
 37 | 
 38 | import os
 39 | import markdown
 40 | from argparse import ArgumentParser
 41 | 
 42 | from bs4 import BeautifulSoup
 43 | 
 44 | 
 45 | class LinkChecker(object):
 46 | 
 47 |     def __init__(self, mdfilename, verbose=False):
 48 |         """
 49 |         input: mdfilename has to be the full path!!!
 50 |         """
 51 |         self.mdfilename = mdfilename
 52 |         self.path = os.path.abspath(os.path.dirname(mdfilename))
 53 |         self.soup = self.get_soup()
 54 |         self.verbose = verbose
 55 | 
 56 |     def validate_link(self, link):
 57 |         if link.startswith('http'):
 58 |             return True
 59 |         elif link.startswith('#'):
 60 |             return bool(self.soup.find_all('a', {'name': link[1:]}))
 61 |         elif link.startswith('/'):
 62 |             return os.path.exists(os.path.join(self.path, link[1:]))
 63 |         else:
 64 |             return os.path.exists(os.path.join(self.path, link))
 65 | 
 66 |     def get_soup(self):
 67 |         with open(self.mdfilename, 'r') as f:
 68 |             md = markdown.markdown(f.read())
 69 |         soup = BeautifulSoup(md, "lxml")
 70 |         return soup
 71 | 
 72 |     def get_links(self):
 73 |         for link in self.soup.find_all('a', href=True):
 74 |             yield link['href']
 75 | 
 76 |     def process_link(self, link):
 77 |         isvalid = 'VALID' if self.validate_link(link) else 'INVALID'
 78 |         if self.verbose or isvalid == 'INVALID':
 79 |             print('{isvalid}\t{link}'.format(isvalid=isvalid, link=link))
 80 | 
 81 |     def main(self):
 82 |         for link in self.get_links():
 83 |             self.process_link(link)
 84 | 
 85 | 
 86 | def get_namespace():
 87 |     parser = ArgumentParser()
 88 |     parser.add_argument(
 89 |         'mdfilename', help='''full path to the .md file you would like
 90 |         to check links in''')
 91 |     parser.add_argument(
 92 |         '-v', '--verbose', action='store_true',
 93 |         help='''verbose flag. if specified, prints all links with
 94 |         results. Otherwise, prints invalid links only''')
 95 |     return parser.parse_args()
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     ns = get_namespace()
100 |     LC = LinkChecker(ns.mdfilename, verbose=ns.verbose)
101 |     LC.main()
102 | 
103 |     # # a test here
104 |     # mdfile = '/Users/reshamashaikh/ds/my_repos/fastai_deeplearn_part1/README.md'
105 |     # LC = LinkChecker(mdfile)
106 |     # LC.main()
107 | 
108 | 


--------------------------------------------------------------------------------
/tools/copy_files_local_to_cloud.md:
--------------------------------------------------------------------------------
 1 | # Copy Files from Local Computer to Cloud Computer
 2 | - copy files from local computer to AWS, Paperspace, Google Cloud, etc
 3 | - copy files from cloud computer to local
 4 | - copy files from local computer to remote machine
 5 | 
 6 | ## Reference
 7 | [fastai Forum thread](http://forums.fast.ai/t/lesson-1-part-1-v2-custom-images/10154/16)
 8 | - [Stack Overflow](https://stackoverflow.com/questions/4728752/scp-a-bunch-of-files-via-bash-script-there-must-be-a-better-way)
 9 | - [Stack Exchange](https://unix.stackexchange.com/questions/232946/how-to-copy-all-files-from-a-directory-to-a-remote-directory-using-scp)
10 | 
11 | ## Defintion
12 | `scp` = secure copy
13 | 
14 | ### General Syntax
15 | `scp -i "path to .pem file" "file to be copied from local machine" username@amazoninstance: 'destination folder to copy file on remote machine'`
16 | 
17 | ### Examples
18 | ```bash
19 | scp -r . ubuntu@107.22.140.44:~/data/camelhorse 
20 | ```
21 | 
22 | ```bash
23 | scp -i "path to .pem file" "file to be copied from local machine" username@amazoninstance: 'destination folder to copy file on remote machine'
24 | ```
25 | 
26 | ```bash
27 | scp -i .ssh/aws-key-fast-ai.pem 
28 | ubuntu@ec2-35-165-244-148.us-west2.compute.amazonaws.com:~/nbs/Notebooks/Weights/Predictions/test_preds_rms.dat ~/test_preds_rms.dat
29 | ```
30 | 


--------------------------------------------------------------------------------
/tools/create_keypair.md:
--------------------------------------------------------------------------------
 1 | # Create a keypair  
 2 | 
 3 | ### Step 1:  go to appropriate directory in termainal
 4 | * In your Terminal, go to `.ssh` folder under your home directory  
 5 | (Note:  Windows users should have Ubuntu installed.)  
 6 | >my example
 7 | `/Users/reshamashaikh/.ssh`
 8 | 
 9 | **Note:**  If you do not have the `.ssh` directory, you can create it (make sure you are in your home directory):  
10 | `mkdir .ssh` 
11 | 
12 | ### Step 2:  create `id_rsa` files if needed
13 | **Note:**  these `id_rsa` files contain a special password for your computer to be able to log onto AWS.
14 | 
15 | If you do not have these two files (`id_rsa` and `id_rsa.pub`), create them by typing:  
16 | - `ssh-keygen`
17 | - Hit `<enter>` 3 times
18 | 
19 | >my example
20 | ```bash
21 | % pwd 
22 | /Users/reshamashaikh/.ssh
23 | % ls
24 | % ssh-keygen
25 | Generating public/private rsa key pair.
26 | Enter file in which to save the key (/Users/reshamashaikh/.ssh/id_rsa): 
27 | Enter passphrase (empty for no passphrase): 
28 | Enter same passphrase again: 
29 | Your identification has been saved in /Users/reshamashaikh/.ssh/id_rsa.
30 | Your public key has been saved in /Users/reshamashaikh/.ssh/id_rsa.pub.
31 | The key fingerprint is:
32 | SHA256:jmDJes1qOzDi8KynXLGQ098JMSRnbIyt0w7vSgEsr2E reshamashaikh@RESHAMAs-MacBook-Pro.local
33 | The key's randomart image is:
34 | +---[RSA 2048]----+
35 | |   .=+           |
36 | |.  .==           |
37 | |.o  +o           |
38 | |..+= oo          |
39 | |.E.+X.  S        |
40 | |+o=o=*oo.        |
41 | |++.*o.+o.        |
42 | |..*.oo           |
43 | |o= o+o           |
44 | +----[SHA256]-----+
45 | % ls
46 | total 16
47 | -rw-------  1   1675 Dec 17 12:20 id_rsa
48 | -rw-r--r--  1    422 Dec 17 12:20 id_rsa.pub
49 | % 
50 | ```
51 | 
52 | ### Step 3:  import key files to AWS
53 | (Note:  Extra step for Windows users:  you will need to copy these files to your hardrive from Ubuntu.)  
54 | In AWS, go to **Key Pairs** in left menu and import `id_rsa.pub`.  This step connects your local computer to AWS.  
55 | Note for Mac Users:  can also `cat id_rsa.pub` in terminal, copy and paste it into AWS for "key contents".
56 | 
57 | 
58 |  
59 | 


--------------------------------------------------------------------------------
/tools/crestle_run.md:
--------------------------------------------------------------------------------
 1 | # Getting Crestle Working - for Newbies
 2 | fastai.ai Part 1 v2  
 3 | Updated:  05-Nov-2017  
 4 | 
 5 | ### Why does my notebook have all these errors when I try running it in Crestle?
 6 | Answer:  the fastai repo in there has outdated materials
 7 | 
 8 | ### What's the easiest way to fix it?
 9 | 
10 | a)  log into [Crestle](https://www.crestle.com) and `Start Jupyter`    
11 | b)  Hit `New Terminal`   
12 | c)  `ls`  
13 | d)  `cd courses`  
14 | e)  `ls`  (you'll see the fastai course there)  
15 |   
16 | f)  `git pull` (update repo)
17 |   
18 | OR, if you run into errors because you have added files to the repository, etc., this is a quick fix:  
19 | g)  `rm -rf fastai`  (delete this old version)  
20 | h)  `git clone https://github.com/fastai/fastai.git`  (clone, get updated course files)  
21 | 
22 | >my example
23 | ```bash
24 | nbuser@jupyter:~$ ls
25 | README.txt  courses  examples
26 | nbuser@jupyter:~$ cd courses
27 | nbuser@jupyter:~/courses$ ls
28 | fastai
29 | nbuser@jupyter:~/courses$ rm -rf fastai
30 | nbuser@jupyter:~/courses$ git clone https://github.com/fastai/fastai.git
31 | Cloning into 'fastai'...
32 | remote: Counting objects: 1055, done.
33 | remote: Compressing objects: 100% (19/19), done.
34 | remote: Total 1055 (delta 11), reused 17 (delta 9), pack-reused 1026
35 | Receiving objects: 100% (1055/1055), 64.37 MiB | 40.84 MiB/s, done.
36 | Resolving deltas: 100% (598/598), done.
37 | Checking connectivity... done.
38 | Checking out files: 100% (110/110), done.
39 | nbuser@jupyter:~/courses$
40 | ```
41 | And, now my [Lesson 1 notebook](https://s.users.crestle.com/u-fqnc8t2x12/notebooks/courses/fastai/courses/dl1/lesson1.ipynb) works!  :boom:
42 | 
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/tools/download_data_browser_curlwget.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Browser extensions for getting data
 3 | 
 4 | ## [Chrome Extension:  CurlWget](https://chrome.google.com/webstore/detail/curlwget/jmocjfidanebdlinpbcdkcmgdifblncg?hl=en)
 5 | 
 6 | Note:  This is a second way to download the data from Kaggle. The first way is using `kaggle-cli`.  
 7 | 
 8 | ## Kaggle Competition
 9 | [Planet:  Understanding the Amazon from Space](https://www.kaggle.com/c/planet-understanding-the-amazon-from-space)
10 | 
11 | The Planet data is here:  
12 | https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/data  
13 | * Download data, and cancel it.
14 | * In Chrome browser, top right, click bright yellow icon and copy the text.  Mine looks like this:  
15 | <img src="../images/chrome_curlwget.png" alt="Drawing" style="width: 100px;"/>  
16 | 
17 | Copy and paste the syntax in your terminal.  
18 | >my example  
19 | ```bash
20 | wget --header="Host: storage.googleapis.com" --header="User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8" --header="Accept-Language: en-US,en;q=0.8" "https://storage.googleapis.com/kaggle-competitions-data/kaggle/6322/test-jpg.tar.7z?GoogleAccessId=competitions-data@kaggle-161607.iam.gserviceaccount.com&Expires=1510937384&Signature=5%2Bq%2BWbix63zFgHiDlusQsWDmXpAmCZ43%2BNCyXV9v6m%2BaPjEHloBX%2FFX858hPSZohUXUs3kT9gbE5zEhQ%2FKjYD8ngPGgPwQYP3IOV4Tn3ku2P2%2FQ8vtE%2FFNUmcqs7rOqC8ZUAoX3TZ8OHSoh%2B1R3zYp0mY%2FjDbhJXPVVsZSsnEynbO0Rg9jsXFN0UH2QgKWhGoYou%2B1W2u6UvUsjgNfYnwgzCzeEjmjN1Fp2we7q18EYgbdv3Y%2BMpP%2BDQxz57%2B%2Bn9Cio%2Bn012qy5hDJec9%2F6PSZ2w%2Bvl0JuazRmaOP2K7L9MgH1zhAlO%2FQy37fC9r8XqOtLqMChYBYKXPHO0qSF6Dw%3D%3D" -O "test-jpg.tar.7z" -c
21 | ```
22 | All the cookies, headers needed to download header are saved in the syntax.  Also useful for downloading other items hidden behind a login. 
23 | 
24 | ## Data Location
25 | Option 1:  `data` directory could be a sub-directory of where your Jupyter Notebook is located.  
26 | Option 2:  Use symlinks
27 | 
28 | ## Firefox browser extension
29 | https://addons.mozilla.org/en-US/firefox/addon/cliget/
30 | 


--------------------------------------------------------------------------------
/tools/download_data_curl.md:
--------------------------------------------------------------------------------
 1 | # Download Dataset using `curl`
 2 | 
 3 | ## Getting Data
 4 | 
 5 | Sample data:  https://www.kaggle.com/c/bluebook-for-bulldozers  
 6 | - in this example, we're using Firefox as a browser (you can use another browser)
 7 | - go to the website where the data is
 8 | - go to Developer section
 9 |   - method 1:  Javascript console, Developer 
10 |   - method 2:  <kbd> ctrl + shift + i </kbd> to bring up web developer tool
11 | - tab to Network
12 | - go to data row
13 | - right click, copy as "curl" (unix command that downloads data, like wget)
14 | - might want to delete "2.0" in url since it causes problems
15 | - <kbd> curl url_link -o bulldozers.zip -o </kbd> means output, then give suitable file name
16 | 
17 | ## Setting up the Data Directory
18 | - <kbd> mkdir bulldozers </kbd>
19 | - <kbd> mv bulldozers.zip bulldozers/ </kbd>
20 | - <kbd> sudo apt install unzip </kbd> or if on Mac:  <kbd> brew install unzip </kbd>
21 | - <kbd> cd bulldozers </kbd> 
22 | - <kbd> unzip bulldozers.zip </kbd>
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/tools/download_data_kaggle_cli.md:
--------------------------------------------------------------------------------
  1 | # Kaggle CLI
  2 | (**CLI** = **C**ommand **L**ine **I**nterface)  
  3 | 
  4 | ## Resource
  5 | [Kaggle CLI Wiki](http://wiki.fast.ai/index.php/Kaggle_CLI)
  6 | 
  7 | ## Installation
  8 | Check to see if `kaggle-cli` is installed:  
  9 | <kbd> kaggle-cli --version </kbd>  
 10 | 
 11 | Install `kaggle-cli`:  
 12 | <kbd> pip install kaggle-cli </kbd>  
 13 | or <kbd> pip3 install kaggle-cli </kbd> 
 14 | 
 15 | May need to **update package** if you run into errors:  
 16 | <kbd> pip install kaggle-cli --upgrade </kbd>  
 17 | or <kbd> pip3 install kaggle-cli --upgrade </kbd>  
 18 | 
 19 | 
 20 | ---
 21 | 
 22 | ## [Kaggle Competition Datasets](https://www.kaggle.com/datasets)
 23 | Note 1:  You must have a Kaggle user ID and password.  If you logged in to Kaggle using FB or LI, you'll have to reset your password, as that is needed for command line access to the data.  
 24 | 
 25 | Note 2:  Pick a competition, and ensure you have **accepted the rules** of that competition.  Otherwise, you will not be able to download the data using the CLI.
 26 | 
 27 | 
 28 | 
 29 | ### Step 1:  Identify the competition I will use
 30 | https://www.kaggle.com/c/dogs-vs-cats   
 31 | 
 32 | **Note:**  the competition name can be found in the url; here it is **dogs-vs-cats**
 33 | 
 34 | ### Step 2:  Accept competition rules  
 35 | https://www.kaggle.com/c/dogs-vs-cats/rules
 36 | 
 37 | ### Step 3:  Set up data directory 
 38 | <kbd> ls </kbd>  
 39 | <kbd> mkdir data </kbd>  
 40 | <kbd> cd data </kbd>  
 41 | >my example
 42 | ```bash
 43 | ubuntu@ip-10-0-0-13:~$ ls
 44 | anaconda2  anaconda3  downloads  git  nbs  temp
 45 | ubuntu@ip-10-0-0-13:~$ mkdir data
 46 | ubuntu@ip-10-0-0-13:~$ cd data
 47 | ```
 48 | 
 49 | ### Step 4a:  Download data (try 1)
 50 | Syntax:  
 51 | <kbd> kg config -g -u 'username' -p 'password' -c 'competition' </kbd>  
 52 | <kbd> kg download </kbd>  
 53 | 
 54 | Note:  Here's an example of warning message I receive when I tried to download data before accepting the rules of the competition:  
 55 | >my example
 56 | ```bash
 57 | ubuntu@ip-10-0-0-13:~/data$ kg config -g -u 'reshamashaikh' -p 'xxx' -c dogs-vs-cats
 58 | ubuntu@ip-10-0-0-13:~/data$ kg download
 59 | Starting new HTTPS connection (1): www.kaggle.com
 60 | downloading https://www.kaggle.com/c/dogs-vs-cats/download/sampleSubmission.csv
 61 | 
 62 | sampleSubmission.csv N/A% |                                                                                                                   | ETA:  --:--:--   0.0 s/B
 63 | 
 64 | Warning: download url for file sampleSubmission.csv resolves to an html document rather than a downloadable file. 
 65 | Is it possible you have not accepted the competition's rules on the kaggle website?
 66 | ```
 67 | 
 68 | ### Step 4b:  Dowload data (try 2)
 69 | Note 1:  I have accepted the competition rules; will try downloading again   
 70 | <kbd> config -g -u 'username' -p 'password' -c 'competition' </kbd>  
 71 | <kbd> kg download </kbd>  
 72 | >my example
 73 | ```bash
 74 | ubuntu@ip-10-0-0-13:~/data$ kg config -g -u 'reshamashaikh' -p 'xxx' -c dogs-vs-cats
 75 | ubuntu@ip-10-0-0-13:~/data$ kg download
 76 | Starting new HTTPS connection (1): www.kaggle.com
 77 | downloading https://www.kaggle.com/c/dogs-vs-cats/download/sampleSubmission.csv
 78 | 
 79 | Starting new HTTPS connection (1): storage.googleapis.com
 80 | sampleSubmission.csv 100% |##################################################################################################################| Time: 0:00:00 320.2 KiB/s
 81 | 
 82 | downloading https://www.kaggle.com/c/dogs-vs-cats/download/test1.zip
 83 | 
 84 | test1.zip 100% |#############################################################################################################################| Time: 0:00:08  32.5 MiB/s
 85 | 
 86 | downloading https://www.kaggle.com/c/dogs-vs-cats/download/train.zip
 87 | 
 88 | train.zip 100% |#############################################################################################################################| Time: 0:00:17  31.4 MiB/s
 89 | ```
 90 | 
 91 | ### Download Kaggle Data (another way)
 92 | Note:  sometimes setting up the configuration results in an error the next time you try to download another competition.  You may want to bypass configuration and directly include your user ID, password and competition name in one command line.
 93 | 
 94 | ```bash
 95 | kg download -u 'reshamashaikh' -p 'xxx' -c statoil-iceberg-classifier-challenge
 96 | ```
 97 | 
 98 | ### Step 5:  Look at data that was downloaded
 99 | <kbd> ls -alt </kdb>  
100 | ```bash
101 | ubuntu@ip-10-0-0-13:~/data$ ls -alt
102 | total 833964
103 | -rw-rw-r--  1 ubuntu ubuntu 569546721 Nov  4 18:24 train.zip
104 | drwxrwxr-x  2 ubuntu ubuntu      4096 Nov  4 18:24 .
105 | -rw-rw-r--  1 ubuntu ubuntu 284321224 Nov  4 18:24 test1.zip
106 | -rw-rw-r--  1 ubuntu ubuntu     88903 Nov  4 18:23 sampleSubmission.csv
107 | drwxr-xr-x 22 ubuntu ubuntu      4096 Nov  4 18:23 ..
108 | ubuntu@ip-10-0-0-13:~/data$ 
109 | ```
110 | 
111 | ### Step 6:  Unzip Files
112 | Note 1:  You will need to install and use `unzip` to unzip files.
113 | 
114 | For Window users:
115 | 1. First Download ubuntu from Window Microsoft Store
116 | 
117 | 2. Open PowerShell as Administrator and run:`Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux`
118 | 
119 | 3. Once the download has completed, select "Launch".This will open a console window. Wait for installation to complete then you will be prompted to create your LINUX user account.
120 | 
121 | 4. Create your LINUX username and password.
122 | 
123 | 5. Go to Control Panel and Turn on Developer Mode .
124 | 
125 | 6. Run `bash` from command-prompt. After that you can follow same as Linux users guide.
126 | 
127 | 
128 | For Linux Users:
129 | 
130 | 
131 | <kbd> sudo apt install unzip </kbd>  
132 | <kbd> unzip train.zip </kbd>  
133 | <kbd> unzip -q test.zip </kbd>  (Note:  `-q` means to unzip quietly, suppressing the printing)  
134 | 
135 | ```bash
136 | ubuntu@ip-10-0-0-13:~/nbs/data$ ls train/dogs/dog.1.jpg
137 | train/dogs/dog.1.jpg
138 | ubuntu@ip-10-0-0-13:~/nbs/data$ ls -l train/dogs/ | wc -l
139 | 12501
140 | ubuntu@ip-10-0-0-13:~/nbs/data$ 
141 | 
142 | 
143 | ubuntu@ip-10-0-0-13:~/nbs/data$ ls -l train/cats/ | wc -l
144 | 12501
145 | ubuntu@ip-10-0-0-13:~/nbs/data$
146 | ubuntu@ip-10-0-0-13:~/nbs/data$ ls test1 | wc -l
147 | 12500
148 | ubuntu@ip-10-0-0-13:~/nbs/data$ 
149 | ```
150 | 
151 | ---
152 | ## Kaggle - Submit Results
153 | ```bash
154 | kg submit <submission-file> -u <username> -p <password> -c <competition> -m "<message>"  
155 | ```
156 | >my example
157 | ```bash
158 | /home/ubuntu/data/iceberg/sub
159 | (fastai) ubuntu@ip-172-31-2-59:~/data/iceberg/sub$ 
160 | ```
161 | ```bash
162 | kg submit resnext50_sz150_zm13.csv -u 'reshamashaikh' -p 'xxx' -c statoil-iceberg-classifier-challenge
163 | ```
164 | 
165 | ---
166 | ### Jeremy’s Setup
167 | Good to copy 100 or so the sample directory; enough to check that the scripts are working
168 | 
169 | Advice 1:  Separate TEST data into VALIDATION
170 | TASK:  move 1000 each dogs / cats into valid 
171 | ```bash
172 | > ls valid/cats/ | wc -l
173 | 1000
174 | > ls valid/dogs/ | wc -l
175 | 1000
176 | 
177 | Advice 2:  Do all of your work on sample data
178 | > ls sample/train
179 | 
180 | > ls sample/valid
181 | 
182 | > ls sample/train/cats | wc -l
183 | 8
184 | > ls sample/valid/cats | wc -l
185 | 4
186 | ```
187 | ---
188 | ## Kaggle API
189 | Another option is to use the Kaggle API
190 | https://github.com/Kaggle/kaggle-api
191 | 
192 | 


--------------------------------------------------------------------------------
/tools/getting_image_data.md:
--------------------------------------------------------------------------------
 1 | # Getting Image Data
 2 | 
 3 | ## Data Sources
 4 | * [New York Public Library Digital Collections](https://digitalcollections.nypl.org) referred by [Enigma](https://www.enigma.com). 
 5 | * Google Images:  https://images.google.com/
 6 | * ImageNet:  http://www.image-net.org
 7 | 
 8 | ## Search for image of interest
 9 | - Example:  Search for images of horses
10 | 
11 | ## Downloading Image Data using Chrome Plug in  
12 | - this extension lets you bulk download images from a website
13 | - Use this plugin, Image Downloader which downloads images:  
14 | https://chrome.google.com/webstore/detail/image-downloader/cnpniohnfphhjihaiiggeabnkjhpaldj
15 | - there should be extensions for other browsers such as Firefox, Opera, etc. 
16 | 
17 | <img style="border:10px solid black;" src="../images/image_downloader.png" align="center"  height="500" width="750"  > 
18 |  
19 | 
20 | 
21 | ---
22 | #### My Sample Data
23 | * Horses:  https://digitalcollections.nypl.org/search/index?&keywords=horses&sort=score+desc#/?scroll=150  
24 | * Camels:  https://digitalcollections.nypl.org/search/index?utf8=%E2%9C%93&keywords=camels#/?scroll=180
25 | 


--------------------------------------------------------------------------------
/tools/jupyter_notebook.md:
--------------------------------------------------------------------------------
 1 | # Jupyter Notebook Commands & Shortcuts
 2 | 
 3 | In [Kaggle 2017 data science survey](https://www.kaggle.com/surveys/2017) of 16K data scientists, Jupyter Notebook came up as 3rd most important self-reported tool for data science.  
 4 | 
 5 | ## Notebook Features
 6 | * can add text, images, code - all in one place
 7 | * can document what we're doing as we go along and code
 8 | * can put pictures, videos, html tables, interactive widgets
 9 | * great experimentation environment
10 | 
11 | ## Help
12 | <kbd> h </kbd> shows the list of shortcuts
13 | 
14 | ## Notebook Commands / Shortcuts
15 | * <kbd> Shift + Enter </kbd> to run cell  
16 | * <kbd> Shift + Tab </kbd>  First time pressing:  tells you what parameters to pass 
17 | * <kbd> Shift + Tab </kbd> Press 3 times:  gives additional info about method
18 | 
19 | ### Select multiple cells 
20 | <kbd> ESC </kbd>    <kbd> Shift+ :arrow_up: </kbd>  extend select cells above  
21 | <kbd> ESC </kbd>   <kbd> Shift+ :arrow_down: </kbd>  extend select cells below  
22 | 
23 | 
24 | ## Notebook Source Code Access
25 | 
26 | ### to look at documentation for code (or function)
27 | * <kbd> ? </kbd> + <kbd> function name </kbd>  
28 |   * Example: <kbd> ?ImageClassifierData.from_paths </kbd>
29 |   
30 | ### to look at source code for a function
31 | * <kbd> ?? </kbd>  + <kbd> function name </kbd>   
32 |   * Example:  <kbd> ??ImageClassifierData.from_paths </kbd>
33 | 
34 | ### to find out where a particular function or class comes from
35 | * <kbd> function name </kbd>, then <kbd>Shift + Enter </kbd>  
36 |   * Example of Input:  <kbd> ImageClassifierData </kbd> <kbd>Shift + Enter </kbd>
37 |     * Example of Output: `fastai.dataset.ImageClassifierData`
38 |   * Example of Input:  <kbd> display </kbd> <kbd>Shift + Enter </kbd>
39 |     * Example of Output: `<function IPython.core.display.display>`
40 |     
41 | 
42 | ### to find out what parameters that the function can take, also shows default parameter options
43 | * Within function, <kbd>Shift + Tab </kbd> 
44 | * `object`, then <kbd> Tab </kbd> shows all the options for that object or function
45 | 
46 | ## Convert your notebooks to .md 
47 | ```bash
48 | jupyter nbconvert --to <output format> <input notebook> 
49 | ```
50 | 
51 | ## Jupyter Notebook Gist Sharing
52 | - press the Gist share button (the yellow highlighted one). It will generate a link for your Jupyter notebook to share for trouble-shooting.
53 | 
54 | ---
55 | ## Resources
56 | 
57 | * [How to Change Your Jupyter Notebook Theme](https://jcharistech.wordpress.com/2017/05/18/how-to-change-your-jupyter-notebook-theme/)
58 | 


--------------------------------------------------------------------------------
/tools/setup_personal_dl_box.md:
--------------------------------------------------------------------------------
 1 | # Setting Up a Personal Deep Learning Computer
 2 | These are steps to replicate the AWS AMI setup on your own computer (assuming you have NVIDIA CUDA GPUs)  
 3 | Recommended hardware:  NVIDIA GTX-1080 Ti  
 4 | 
 5 | 
 6 | ## Step 1: Install Anaconda Python 3.6
 7 | Python version should be 3.6+  
 8 | https://conda.io/docs/user-guide/tasks/manage-python.html
 9 | 
10 | 
11 | ## Step 2: Clone the fastai library
12 | ```git
13 | git clone https://github.com/fastai/fastai.git
14 | ```
15 | 
16 | ## Step 3: Go to directory where `environment.yml` file
17 | - The environment.yml file is under this directory.  
18 | - The environment file contains all the dependencies needed:  https://github.com/fastai/fastai/blob/master/environment.yml
19 | 
20 | ```bash
21 | cd fastai/ 
22 | ```
23 | 
24 | ## Step 4: Install header files to build python extensions
25 | This step installs python3-dev package.
26 | ```bash
27 | sudo apt-get install python3-dev
28 | ```
29 | 
30 | ## Step 5:  Create the virtual environment
31 | This step installs all of the dependencies.  
32 | ```bash
33 | conda env create -f environment.yml
34 | ```
35 | 
36 | ## Step 6:  Activate virtual environment 
37 | Do this step every time you login. Or else put it in your `.bashrc` file.  
38 | ```bash
39 | source activate fastai
40 | ```
41 | 


--------------------------------------------------------------------------------
/tools/symlinks.md:
--------------------------------------------------------------------------------
 1 | # Symlinks
 2 | 
 3 | ## Symlinks Code
 4 | 
 5 | Here’s an example. Let’s say you wanted to create a symbolic link in your Desktop folder that points to your Downloads folder. You’d run the following command:
 6 | 
 7 | <kbd> ln -s /Users/name/Downloads /Users/name/Desktop </kbd>
 8 | 
 9 | 
10 | ## Resources
11 | 
12 | [How to Create and Use Symbolic Links (aka Symlinks) on a Mac](https://www.howtogeek.com/297721/how-to-create-and-use-symbolic-links-aka-symlinks-on-a-mac/) (Mar 2017
13 | 


--------------------------------------------------------------------------------
/tools/temp/.keep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tools/tmux.md:
--------------------------------------------------------------------------------
  1 | # tmux on AWS
  2 | 
  3 | [Part 1.  Background](#section-a)  
  4 | [Part 2.  Getting Set Up](#section-b)  
  5 | [Part 3.  Using tmux](#section-c)  
  6 | [Part 4.  Use tmux, Exit AWS, Log back in to AWS](#section-d)  
  7 | [Part 5.  tmux:  Summary of Primary Commands](#section-e)  
  8 | 
  9 | 
 10 | ## <a name="section-a"></a>Part 1.  Background
 11 | 
 12 | 
 13 | ### Purpose
 14 | * Lets you tile window panes in a command-line environment.
 15 | * This in turn allows you to run, or keep an eye on, multiple programs within one terminal.
 16 | * **:key: With tmux, you can leave scripts running for a while, and it doesn’t matter if the terminal closes or you lose your internet connection for a moment; the script is running in the background**
 17 | 
 18 | ### Resource
 19 | [Tmux: A Gentle Introduction to tmux](https://hackernoon.com/a-gentle-introduction-to-tmux-8d784c404340) by [Alek Shnayder](https://alekshnayder.com)
 20 | * Installation
 21 | * Getting In & Getting Out
 22 | * Managing Panes
 23 | * Custom Themes
 24 | 
 25 | ---
 26 | ## <a name="section-b"></a>Part 2.  Getting Set Up
 27 | 
 28 | ### Step 1:  A Background
 29 | Note 1:  I am using fastai deep learning AMI  
 30 | Note 2:  tmux is already installed.  
 31 | Note 3:  **On the fastai AWS AMI, tmux mouse mode is enabled, so hold down shift while selecting to copy to your local clipboard.**  
 32 | Note 4:  If tmux is not installed:
 33 | - Mac:  <kbd> brew install tmux </kbd>
 34 | - Linux:  <kbd> sudo apt-get install tmux </kbd>
 35 | - see the Resource above for instructions
 36 |   
 37 | ### Step 2:  Log into AWS Instance
 38 | Note 1:  Log into [AWS Console](http://console.aws.amazon.com/ ) and check that instance is running  
 39 | Note 2:  Ensure I am in the appropriate directory  
 40 | 
 41 | ```bash
 42 | pwd
 43 | /Users/reshamashaikh/.ssh
 44 | ```
 45 | >my syntax for logging in  
 46 | 
 47 | <kbd> ssh -i aws_fastai_gpu.pem ubuntu@54.164.78.42 -L8888:localhost:8888  </kbd>  
 48 | 
 49 | >my example
 50 | ```bash
 51 | ssh -i aws_fastai_gpu.pem ubuntu@54.164.78.42 -L8888:localhost:8888 
 52 | The authenticity of host '54.164.78.42 (54.164.78.42)' can't be established.
 53 | ECDSA key fingerprint is SHA256:RoHkdmFaS+2/w/9CGncGb4cPO3lUutStxQ7BACCzopI.
 54 | Are you sure you want to continue connecting (yes/no)? yes
 55 | Warning: Permanently added '54.164.78.42' (ECDSA) to the list of known hosts.
 56 | Welcome to Ubuntu 16.04.3 LTS (GNU/Linux 4.4.0-1039-aws x86_64)
 57 | 
 58 |  * Documentation:  https://help.ubuntu.com
 59 |  * Management:     https://landscape.canonical.com
 60 |  * Support:        https://ubuntu.com/advantage
 61 | 
 62 |   Get cloud support with Ubuntu Advantage Cloud Guest:
 63 |     http://www.ubuntu.com/business/services/cloud
 64 | 
 65 | 5 packages can be updated.
 66 | 0 updates are security updates.
 67 | 
 68 | (fastai) ubuntu@ip-172-31-10-243:~$ 
 69 | ```
 70 | 
 71 | ### Update packages  
 72 | <kbd> sudo apt-get update </kbd>   
 73 | >my example  
 74 | ```bash
 75 | (fastai) ubuntu@ip-172-31-10-243:~$ sudo apt-get update
 76 | Hit:1 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial InRelease
 77 | Hit:2 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-updates InRelease
 78 | Hit:3 http://us-east-1.ec2.archive.ubuntu.com/ubuntu xenial-backports InRelease
 79 | Hit:4 http://security.ubuntu.com/ubuntu xenial-security InRelease                                                               
 80 | Hit:5 http://ppa.launchpad.net/openjdk-r/ppa/ubuntu xenial InRelease                                                            
 81 | Reading package lists... Done                     
 82 | (fastai) ubuntu@ip-172-31-10-243:~$
 83 | ```
 84 | 
 85 | ---
 86 | 
 87 | ## <a name="section-c"></a>Part 3.  Using tmux
 88 | 
 89 | ### See what version of tmux is running
 90 | <kbd> tmux -V </kbd>
 91 | >my example  
 92 | ```bash
 93 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux -V
 94 | tmux 2.1
 95 | ```
 96 | 
 97 | ### See what tmux sessions are running
 98 | <kbd> tmux ls </kbd>  
 99 | >my example
100 | ```bash
101 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
102 | error connecting to /tmp/tmux-1000/default (No such file or directory)
103 | ```
104 | Note:  right now, I have no tmux sessions running.  
105 | 
106 | 
107 | ### tmux:  start a tmux session
108 | <kbd> tmux </kbd>  
109 | Note:  A green bar will appear at the bottom  
110 | 
111 | ![tmux start](../images/tmux_start.png)
112 | 
113 | ### Confirm tmux sessions are running
114 | <kbd> tmux ls </kbd>  
115 | 
116 | ### tmux commands
117 | * hit <kbd> ctrl </kbd> and <kbd> b </kbd> at the same time and then let go
118 | * then type subsequent command
119 | >example for creating two stacked windows   
120 | <kbd> ctrl </kbd> and <kbd> b </kbd>  (at same time)  
121 | <kbd>  % </kbd>
122 | 
123 | ### tmux help commands 
124 | <kbd> ctrl+b ? </kbd>  
125 | 
126 | 
127 | 
128 | ### tmux: create a second window (horizontal)
129 | <kbd> ctrl+b " </kbd>   
130 | 
131 | ### tmux: create a third window (vertical)
132 | <kbd> ctrl+b % </kbd>
133 | 
134 | **Note:**  type ctrl+b then let go, and then type %
135 | 
136 | ### tmux:  navigate between windows
137 | <kbd> ctrl+b [then arrow up/down/side] </kbd>
138 | 
139 | ### tmux:  MAGIC command
140 | <kbd> ctrl+b </kbd>  
141 | :key: This is the **key** prompt for using tmux.  
142 | 
143 | ### tmux: kill current pane
144 | <kbd> ctrl+b x </kbd>  
145 | 
146 | ### tmux: detach session
147 | This will detach the current session and return you to your normal shell.  You can exit AWS with tmux running in background, and `ssh` into the instance again.  
148 | <kbd> ctrl+b  d </kbd> 
149 | 
150 | ### tmux:  list sessions from normal shell
151 | <kbd> tmux ls </kbd>  
152 | >my example
153 | ```bash
154 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
155 | 0: 1 windows (created Sat Nov 11 15:31:41 2017) [159x38]
156 | (fastai) ubuntu@ip-172-31-10-243:~$ 
157 | ```
158 | 
159 | ### tmux:  connect back to tmux session from normal shell
160 | <kbd> tmux a -t 0 </kbd>
161 | 
162 | 
163 | ### tmux: kill tmux server, along will ALL sessions
164 | <kbd> tmux ls </kbd>  
165 | <kbd> tmux kill-server </kbd>  
166 | <kbd> tmux ls </kbd>  
167 | 
168 | >my example
169 | ```bash
170 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
171 | 0: 1 windows (created Sat Nov 11 15:31:41 2017) [159x38]
172 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux kill-server
173 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
174 | no server running on /tmp/tmux-1000/default
175 | (fastai) ubuntu@ip-172-31-10-243:~$ 
176 | ```
177 | 
178 | ---
179 | ## <a name="section-d"></a>Part 4.  Use tmux, Exit AWS, Log back in to AWS
180 | 
181 | ### Start a tmux session on AWS
182 | <kbd> tmux ls </kbd>  
183 | <kbd> tmux </kbd>  
184 | <kbd> tmux ls </kbd>  
185 | <kbd> exit </kbd>  
186 | 
187 | >my example  
188 | ```
189 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
190 | no server running on /tmp/tmux-1000/default
191 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux
192 | [detached (from session 0)]
193 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
194 | 0: 1 windows (created Sat Nov 11 15:52:31 2017) [159x38]
195 | (fastai) ubuntu@ip-172-31-10-243:~$ 
196 | (fastai) ubuntu@ip-172-31-10-243:~$ exit
197 | logout
198 | Connection to 54.164.78.42 closed.
199 | ```
200 | 
201 | ### Log back into AWS & Confirm tmux session is still running
202 | <kbd> ssh -i aws_fastai_gpu.pem ubuntu@54.164.78.42 -L8888:localhost:8888 </kbd>  
203 | <kbd> tmux ls </kbd>  
204 | >my example  
205 | ```bash
206 | (fastai) ubuntu@ip-172-31-10-243:~$ tmux ls
207 | 0: 1 windows (created Sat Nov 11 15:52:31 2017) [159x38]
208 | (fastai) ubuntu@ip-172-31-10-243:~$ 
209 | ```
210 | Note:  The tmux session is still running!
211 | 
212 | ---
213 | ## <a name="section-e"></a>Part 5.  tmux:  Summary of Primary Commands
214 | ![tmux_summary](../images/tmux_summary.png)
215 | 
216 | 
217 | 
218 | 
219 | 


--------------------------------------------------------------------------------
/tools/unix_linux.md:
--------------------------------------------------------------------------------
 1 | # Unix / Linux
 2 | 
 3 | 
 4 | `cd !$` where `!$` fills in last word of last command
 5 | 
 6 | If you use a Python variable inside a Jupyter shell command, need to put it inside brackets:  
 7 | `PATH = "data/bulldozers"`  
 8 | `!ls {PATH}`
 9 | 
10 |   
11 | `-h` makes many commands more readable.
12 | 
13 | ### copying LOTS of files from one directory to another
14 | #### `rsync`
15 | 
16 | https://askubuntu.com/questions/217764/argument-list-too-long-when-copying-files
17 | 


--------------------------------------------------------------------------------