├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── code_summarization_transfer_learning
├── 1 - Preprocess Data.ipynb
├── 2 - Keras code summarization.ipynb
├── 3 - Train Language Model Using FastAI.ipynb
├── 4 - Train Model To Map Code Embeddings to Language Embeddings.ipynb
├── 5 - Build Search Index.ipynb
├── 6 - Eval metrics.ipynb
├── README.md
├── fastai
│ ├── .gitignore
│ ├── .travis.yml
│ ├── CODE-OF-CONDUCT.md
│ ├── LICENSE
│ ├── MANIFEST.in
│ ├── README.md
│ ├── courses
│ │ ├── dl1
│ │ │ ├── .gitignore
│ │ │ ├── adamw-sgdw-demo.ipynb
│ │ │ ├── cifar10-simplenet.ipynb
│ │ │ ├── cifar10.ipynb
│ │ │ ├── embedding_refactoring_unit_tests.ipynb
│ │ │ ├── excel
│ │ │ │ ├── collab_filter.xlsx
│ │ │ │ ├── conv-example.xlsx
│ │ │ │ ├── entropy_example.xlsx
│ │ │ │ ├── graddesc.xlsm
│ │ │ │ └── layers_example.xlsx
│ │ │ ├── fastai
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ ├── adaptive_softmax.py
│ │ │ │ ├── column_data.py
│ │ │ │ ├── conv_learner.py
│ │ │ │ ├── core.py
│ │ │ │ ├── dataloader.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── executors.py
│ │ │ │ ├── fp16.py
│ │ │ │ ├── images
│ │ │ │ │ └── industrial_fishing.png
│ │ │ │ ├── imports.py
│ │ │ │ ├── initializers.py
│ │ │ │ ├── io.py
│ │ │ │ ├── layer_optimizer.py
│ │ │ │ ├── layers.py
│ │ │ │ ├── learner.py
│ │ │ │ ├── lm_rnn.py
│ │ │ │ ├── losses.py
│ │ │ │ ├── lsuv_initializer.py
│ │ │ │ ├── metrics.py
│ │ │ │ ├── model.py
│ │ │ │ ├── models
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── cifar10
│ │ │ │ │ │ ├── main.sh
│ │ │ │ │ │ ├── main_dxy.py
│ │ │ │ │ │ ├── main_kuangliu.py
│ │ │ │ │ │ ├── preact_resnet.py
│ │ │ │ │ │ ├── resnext.py
│ │ │ │ │ │ ├── senet.py
│ │ │ │ │ │ ├── utils.py
│ │ │ │ │ │ ├── utils_kuangliu.py
│ │ │ │ │ │ └── wideresnet.py
│ │ │ │ │ ├── convert_torch.py
│ │ │ │ │ ├── darknet.py
│ │ │ │ │ ├── fa_resnet.py
│ │ │ │ │ ├── inceptionresnetv2.py
│ │ │ │ │ ├── inceptionv4.py
│ │ │ │ │ ├── nasnet.py
│ │ │ │ │ ├── resnet.py
│ │ │ │ │ ├── resnext_101_32x4d.py
│ │ │ │ │ ├── resnext_101_64x4d.py
│ │ │ │ │ ├── resnext_50_32x4d.py
│ │ │ │ │ ├── unet.py
│ │ │ │ │ ├── wideresnet.py
│ │ │ │ │ └── wrn_50_2f.py
│ │ │ │ ├── nlp.py
│ │ │ │ ├── plots.py
│ │ │ │ ├── rnn_reg.py
│ │ │ │ ├── rnn_train.py
│ │ │ │ ├── set_spawn.py
│ │ │ │ ├── sgdr.py
│ │ │ │ ├── structured.py
│ │ │ │ ├── swa.py
│ │ │ │ ├── text.py
│ │ │ │ ├── torch_imports.py
│ │ │ │ ├── transforms.py
│ │ │ │ ├── transforms_pil.py
│ │ │ │ └── utils.py
│ │ │ ├── fish.ipynb
│ │ │ ├── images
│ │ │ │ ├── pretrained.png
│ │ │ │ ├── sgdr.png
│ │ │ │ ├── zeiler1.png
│ │ │ │ ├── zeiler2.png
│ │ │ │ ├── zeiler3.png
│ │ │ │ └── zeiler4.png
│ │ │ ├── keras_lesson1.ipynb
│ │ │ ├── lang_model-arxiv.ipynb
│ │ │ ├── lang_model.ipynb
│ │ │ ├── lesson1-rxt50.ipynb
│ │ │ ├── lesson1-vgg.ipynb
│ │ │ ├── lesson1.ipynb
│ │ │ ├── lesson2-image_models.ipynb
│ │ │ ├── lesson3-rossman.ipynb
│ │ │ ├── lesson4-imdb.ipynb
│ │ │ ├── lesson5-movielens.ipynb
│ │ │ ├── lesson6-rnn.ipynb
│ │ │ ├── lesson6-sgd.ipynb
│ │ │ ├── lesson7-CAM.ipynb
│ │ │ ├── lesson7-cifar10.ipynb
│ │ │ ├── nasnet.ipynb
│ │ │ ├── nlp-arxiv.ipynb
│ │ │ ├── nlp.ipynb
│ │ │ ├── planet.py
│ │ │ ├── planet_cv.ipynb
│ │ │ ├── ppt
│ │ │ │ └── lesson6.pptx
│ │ │ ├── rossman_exp.py
│ │ │ ├── scripts
│ │ │ │ └── train_planet.py
│ │ │ ├── test_transforms.ipynb
│ │ │ └── xor.ipynb
│ │ ├── dl2
│ │ │ ├── .gitignore
│ │ │ ├── carvana-unet-lrg.ipynb
│ │ │ ├── carvana-unet.ipynb
│ │ │ ├── carvana.ipynb
│ │ │ ├── cgan
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ ├── data
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── aligned_dataset.py
│ │ │ │ │ ├── base_data_loader.py
│ │ │ │ │ ├── base_dataset.py
│ │ │ │ │ ├── custom_dataset_data_loader.py
│ │ │ │ │ ├── data_loader.py
│ │ │ │ │ ├── image_folder.py
│ │ │ │ │ ├── single_dataset.py
│ │ │ │ │ └── unaligned_dataset.py
│ │ │ │ ├── models
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base_model.py
│ │ │ │ │ ├── cycle_gan_model.py
│ │ │ │ │ ├── models.py
│ │ │ │ │ ├── networks.py
│ │ │ │ │ ├── pix2pix_model.py
│ │ │ │ │ └── test_model.py
│ │ │ │ ├── options
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base_options.py
│ │ │ │ │ ├── test_options.py
│ │ │ │ │ └── train_options.py
│ │ │ │ ├── test.py
│ │ │ │ ├── train.py
│ │ │ │ └── util
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── get_data.py
│ │ │ │ │ ├── html.py
│ │ │ │ │ ├── image_pool.py
│ │ │ │ │ ├── util.py
│ │ │ │ │ └── visualizer.py
│ │ │ ├── cifar10-darknet.ipynb
│ │ │ ├── cifar10-dawn.ipynb
│ │ │ ├── cyclegan.ipynb
│ │ │ ├── devise.ipynb
│ │ │ ├── enhance.ipynb
│ │ │ ├── fastai
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ ├── adaptive_softmax.py
│ │ │ │ ├── column_data.py
│ │ │ │ ├── conv_learner.py
│ │ │ │ ├── core.py
│ │ │ │ ├── dataloader.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── executors.py
│ │ │ │ ├── fp16.py
│ │ │ │ ├── images
│ │ │ │ │ └── industrial_fishing.png
│ │ │ │ ├── imports.py
│ │ │ │ ├── initializers.py
│ │ │ │ ├── io.py
│ │ │ │ ├── layer_optimizer.py
│ │ │ │ ├── layers.py
│ │ │ │ ├── learner.py
│ │ │ │ ├── lm_rnn.py
│ │ │ │ ├── losses.py
│ │ │ │ ├── lsuv_initializer.py
│ │ │ │ ├── metrics.py
│ │ │ │ ├── model.py
│ │ │ │ ├── models
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── cifar10
│ │ │ │ │ │ ├── main.sh
│ │ │ │ │ │ ├── main_dxy.py
│ │ │ │ │ │ ├── main_kuangliu.py
│ │ │ │ │ │ ├── preact_resnet.py
│ │ │ │ │ │ ├── resnext.py
│ │ │ │ │ │ ├── senet.py
│ │ │ │ │ │ ├── utils.py
│ │ │ │ │ │ ├── utils_kuangliu.py
│ │ │ │ │ │ └── wideresnet.py
│ │ │ │ │ ├── convert_torch.py
│ │ │ │ │ ├── darknet.py
│ │ │ │ │ ├── fa_resnet.py
│ │ │ │ │ ├── inceptionresnetv2.py
│ │ │ │ │ ├── inceptionv4.py
│ │ │ │ │ ├── nasnet.py
│ │ │ │ │ ├── resnet.py
│ │ │ │ │ ├── resnext_101_32x4d.py
│ │ │ │ │ ├── resnext_101_64x4d.py
│ │ │ │ │ ├── resnext_50_32x4d.py
│ │ │ │ │ ├── unet.py
│ │ │ │ │ ├── wideresnet.py
│ │ │ │ │ └── wrn_50_2f.py
│ │ │ │ ├── nlp.py
│ │ │ │ ├── plots.py
│ │ │ │ ├── rnn_reg.py
│ │ │ │ ├── rnn_train.py
│ │ │ │ ├── set_spawn.py
│ │ │ │ ├── sgdr.py
│ │ │ │ ├── structured.py
│ │ │ │ ├── swa.py
│ │ │ │ ├── text.py
│ │ │ │ ├── torch_imports.py
│ │ │ │ ├── transforms.py
│ │ │ │ ├── transforms_pil.py
│ │ │ │ └── utils.py
│ │ │ ├── imdb.ipynb
│ │ │ ├── imdb_scripts
│ │ │ │ ├── README.md
│ │ │ │ ├── create_toks.py
│ │ │ │ ├── tok2id.py
│ │ │ │ ├── train_clas.py
│ │ │ │ ├── train_tri_lm.py
│ │ │ │ └── train_tri_wt.py
│ │ │ ├── lsun_scripts
│ │ │ │ ├── lsun-data.py
│ │ │ │ └── lsun-download.py
│ │ │ ├── pascal-multi.ipynb
│ │ │ ├── pascal.ipynb
│ │ │ ├── ppt
│ │ │ │ └── lesson8.pptx
│ │ │ ├── sampled_sm.py
│ │ │ ├── style-transfer-net.ipynb
│ │ │ ├── style-transfer.ipynb
│ │ │ ├── training_phase.ipynb
│ │ │ ├── translate.ipynb
│ │ │ ├── wgan.ipynb
│ │ │ └── xl
│ │ │ │ └── dl-examples.xlsx
│ │ └── ml1
│ │ │ ├── Ethics in Data Science.ipynb
│ │ │ ├── bulldozer_dl.ipynb
│ │ │ ├── bulldozer_linreg.ipynb
│ │ │ ├── excel
│ │ │ └── naivebayes.xlsx
│ │ │ ├── fastai
│ │ │ ├── .gitignore
│ │ │ ├── __init__.py
│ │ │ ├── adaptive_softmax.py
│ │ │ ├── column_data.py
│ │ │ ├── conv_learner.py
│ │ │ ├── core.py
│ │ │ ├── dataloader.py
│ │ │ ├── dataset.py
│ │ │ ├── executors.py
│ │ │ ├── fp16.py
│ │ │ ├── images
│ │ │ │ └── industrial_fishing.png
│ │ │ ├── imports.py
│ │ │ ├── initializers.py
│ │ │ ├── io.py
│ │ │ ├── layer_optimizer.py
│ │ │ ├── layers.py
│ │ │ ├── learner.py
│ │ │ ├── lm_rnn.py
│ │ │ ├── losses.py
│ │ │ ├── lsuv_initializer.py
│ │ │ ├── metrics.py
│ │ │ ├── model.py
│ │ │ ├── models
│ │ │ │ ├── .gitignore
│ │ │ │ ├── cifar10
│ │ │ │ │ ├── main.sh
│ │ │ │ │ ├── main_dxy.py
│ │ │ │ │ ├── main_kuangliu.py
│ │ │ │ │ ├── preact_resnet.py
│ │ │ │ │ ├── resnext.py
│ │ │ │ │ ├── senet.py
│ │ │ │ │ ├── utils.py
│ │ │ │ │ ├── utils_kuangliu.py
│ │ │ │ │ └── wideresnet.py
│ │ │ │ ├── convert_torch.py
│ │ │ │ ├── darknet.py
│ │ │ │ ├── fa_resnet.py
│ │ │ │ ├── inceptionresnetv2.py
│ │ │ │ ├── inceptionv4.py
│ │ │ │ ├── nasnet.py
│ │ │ │ ├── resnet.py
│ │ │ │ ├── resnext_101_32x4d.py
│ │ │ │ ├── resnext_101_64x4d.py
│ │ │ │ ├── resnext_50_32x4d.py
│ │ │ │ ├── unet.py
│ │ │ │ ├── wideresnet.py
│ │ │ │ └── wrn_50_2f.py
│ │ │ ├── nlp.py
│ │ │ ├── plots.py
│ │ │ ├── rnn_reg.py
│ │ │ ├── rnn_train.py
│ │ │ ├── set_spawn.py
│ │ │ ├── sgdr.py
│ │ │ ├── structured.py
│ │ │ ├── swa.py
│ │ │ ├── text.py
│ │ │ ├── torch_imports.py
│ │ │ ├── transforms.py
│ │ │ ├── transforms_pil.py
│ │ │ └── utils.py
│ │ │ ├── images
│ │ │ ├── bulldozers_data.png
│ │ │ ├── bulldozers_data2.png
│ │ │ ├── digit.gif
│ │ │ ├── ethics_recidivism.jpg
│ │ │ ├── mnist.png
│ │ │ ├── overfitting2.png
│ │ │ ├── sgd2.gif
│ │ │ ├── what_is_pytorch.png
│ │ │ ├── zeiler1.png
│ │ │ ├── zeiler2.png
│ │ │ ├── zeiler3.png
│ │ │ └── zeiler4.png
│ │ │ ├── lesson1-rf.ipynb
│ │ │ ├── lesson2-rf_interpretation.ipynb
│ │ │ ├── lesson3-rf_foundations.ipynb
│ │ │ ├── lesson4-mnist_sgd.ipynb
│ │ │ ├── lesson5-nlp.ipynb
│ │ │ └── ppt
│ │ │ ├── 2017-12-ethics.pptx
│ │ │ └── ml_applications.pptx
│ ├── docs
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── abbr.md
│ │ ├── anatomy.adoc
│ │ ├── dataloader.adoc
│ │ ├── expand_adoc_templ.ipynb
│ │ ├── gen_ascii_docs.py
│ │ ├── md_expander.py
│ │ ├── module-decisions.md
│ │ ├── style.md
│ │ ├── templates.py
│ │ ├── testing.adoc
│ │ ├── transforms-tmpl.adoc
│ │ ├── transforms.adoc
│ │ └── transforms.html
│ ├── environment-cpu.yml
│ ├── environment-nopytorch.yml
│ ├── environment-old.yml
│ ├── environment.yml
│ ├── fastai
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── adaptive_softmax.py
│ │ ├── column_data.py
│ │ ├── conv_learner.py
│ │ ├── core.py
│ │ ├── dataloader.py
│ │ ├── dataset.py
│ │ ├── executors.py
│ │ ├── fp16.py
│ │ ├── images
│ │ │ └── industrial_fishing.png
│ │ ├── imports.py
│ │ ├── initializers.py
│ │ ├── io.py
│ │ ├── layer_optimizer.py
│ │ ├── layers.py
│ │ ├── learner.py
│ │ ├── lm_rnn.py
│ │ ├── losses.py
│ │ ├── lsuv_initializer.py
│ │ ├── metrics.py
│ │ ├── model.py
│ │ ├── models
│ │ │ ├── .gitignore
│ │ │ ├── cifar10
│ │ │ │ ├── main.sh
│ │ │ │ ├── main_dxy.py
│ │ │ │ ├── main_kuangliu.py
│ │ │ │ ├── preact_resnet.py
│ │ │ │ ├── resnext.py
│ │ │ │ ├── senet.py
│ │ │ │ ├── utils.py
│ │ │ │ ├── utils_kuangliu.py
│ │ │ │ └── wideresnet.py
│ │ │ ├── convert_torch.py
│ │ │ ├── darknet.py
│ │ │ ├── fa_resnet.py
│ │ │ ├── inceptionresnetv2.py
│ │ │ ├── inceptionv4.py
│ │ │ ├── nasnet.py
│ │ │ ├── resnet.py
│ │ │ ├── resnext_101_32x4d.py
│ │ │ ├── resnext_101_64x4d.py
│ │ │ ├── resnext_50_32x4d.py
│ │ │ ├── unet.py
│ │ │ ├── wideresnet.py
│ │ │ └── wrn_50_2f.py
│ │ ├── nlp.py
│ │ ├── plots.py
│ │ ├── rnn_reg.py
│ │ ├── rnn_train.py
│ │ ├── set_spawn.py
│ │ ├── sgdr.py
│ │ ├── structured.py
│ │ ├── swa.py
│ │ ├── text.py
│ │ ├── torch_imports.py
│ │ ├── transforms.py
│ │ ├── transforms_pil.py
│ │ └── utils.py
│ ├── pytest.ini
│ ├── requirements.txt
│ ├── setup.cfg
│ ├── setup.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_core.py
│ │ ├── test_layer_optimizer.py
│ │ ├── test_lsuv_initializer.py
│ │ ├── test_samplers.py
│ │ └── test_transform.py
│ └── tutorials
│ │ ├── __init__.py
│ │ ├── fastai
│ │ ├── .gitignore
│ │ ├── __init__.py
│ │ ├── adaptive_softmax.py
│ │ ├── column_data.py
│ │ ├── conv_learner.py
│ │ ├── core.py
│ │ ├── dataloader.py
│ │ ├── dataset.py
│ │ ├── executors.py
│ │ ├── fp16.py
│ │ ├── images
│ │ │ └── industrial_fishing.png
│ │ ├── imports.py
│ │ ├── initializers.py
│ │ ├── io.py
│ │ ├── layer_optimizer.py
│ │ ├── layers.py
│ │ ├── learner.py
│ │ ├── lm_rnn.py
│ │ ├── losses.py
│ │ ├── lsuv_initializer.py
│ │ ├── metrics.py
│ │ ├── model.py
│ │ ├── models
│ │ │ ├── .gitignore
│ │ │ ├── cifar10
│ │ │ │ ├── main.sh
│ │ │ │ ├── main_dxy.py
│ │ │ │ ├── main_kuangliu.py
│ │ │ │ ├── preact_resnet.py
│ │ │ │ ├── resnext.py
│ │ │ │ ├── senet.py
│ │ │ │ ├── utils.py
│ │ │ │ ├── utils_kuangliu.py
│ │ │ │ └── wideresnet.py
│ │ │ ├── convert_torch.py
│ │ │ ├── darknet.py
│ │ │ ├── fa_resnet.py
│ │ │ ├── inceptionresnetv2.py
│ │ │ ├── inceptionv4.py
│ │ │ ├── nasnet.py
│ │ │ ├── resnet.py
│ │ │ ├── resnext_101_32x4d.py
│ │ │ ├── resnext_101_64x4d.py
│ │ │ ├── resnext_50_32x4d.py
│ │ │ ├── unet.py
│ │ │ ├── wideresnet.py
│ │ │ └── wrn_50_2f.py
│ │ ├── nlp.py
│ │ ├── plots.py
│ │ ├── rnn_reg.py
│ │ ├── rnn_train.py
│ │ ├── set_spawn.py
│ │ ├── sgdr.py
│ │ ├── structured.py
│ │ ├── swa.py
│ │ ├── text.py
│ │ ├── torch_imports.py
│ │ ├── transforms.py
│ │ ├── transforms_pil.py
│ │ └── utils.py
│ │ ├── images
│ │ ├── cifar10.png
│ │ ├── demba_combustion_engine.png
│ │ ├── digit.gif
│ │ ├── fashion-mnist.png
│ │ ├── markov_health.jpg
│ │ ├── mnist.png
│ │ ├── normal.jpg
│ │ ├── overfitting.png
│ │ ├── overfitting2.png
│ │ ├── sgd2.gif
│ │ ├── shop.png
│ │ ├── what_is_pytorch.png
│ │ ├── zeiler1.png
│ │ ├── zeiler2.png
│ │ ├── zeiler3.png
│ │ └── zeiler4.png
│ │ ├── kmeans.py
│ │ ├── linalg_pytorch.ipynb
│ │ └── meanshift.ipynb
├── feature_extractor.py
├── general_utils.py
├── lang_model_utils.py
├── seq2seq_utils.py
└── visitor.py
├── pytorch_model
├── README.md
├── codesearcher.py
├── configs.py
├── data.py
├── java
│ ├── test.apiseq.h5
│ ├── test.desc.h5
│ ├── test.methname.h5
│ ├── test.rawcode.txt
│ ├── test.tokens.h5
│ ├── train.apiseq.h5
│ ├── train.desc.h5
│ ├── train.methname.h5
│ ├── train.tokens.h5
│ ├── use.apiseq.h5
│ ├── use.codevecs.normalized.h5
│ ├── use.methname.h5
│ ├── use.rawcode.txt
│ ├── use.tokens.h5
│ ├── vocab.apiseq.pkl
│ ├── vocab.desc.pkl
│ ├── vocab.methname.pkl
│ └── vocab.tokens.pkl
├── models.py
├── python
│ ├── small.rawcode.txt
│ ├── small.test.apiseq.npy
│ ├── small.test.desc.npy
│ ├── small.test.methname.npy
│ ├── small.test.tokens.npy
│ ├── test.apiseq.npy
│ ├── test.desc.npy
│ ├── test.methname.npy
│ ├── test.tokens.npy
│ ├── train.apiseq.npy
│ ├── train.desc.npy
│ ├── train.methname.npy
│ ├── train.tokens.npy
│ ├── vocab.apiseq.pkl
│ ├── vocab.desc.pkl
│ ├── vocab.methname.pkl
│ └── vocab.tokens.pkl
├── requirements.txt
└── utils.py
└── screenshot.png
/.gitattributes:
--------------------------------------------------------------------------------
1 | pytorch_model/java/* filter=lfs diff=lfs merge=lfs -text
2 | pytorch_model/python/* filter=lfs diff=lfs merge=lfs -text
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Chintan Shah
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Deep Semantic Code Search
2 | Code for Paper: [Paper](https://drive.google.com/file/d/1FFTmqfuz3ghLGomzGIARA6p0I1jpVnQ3/view?usp=drivesdk)
3 |
4 | Deep Semantic Code Search aims to explore a joint embedding space for code and description vectors and then use it for a code search application.
5 |
6 | In these experiments, there are 2 parts:
7 |
8 | 1. The first one uses an approach suggested in [1] and we train their architecture on our own python dataset.
9 | 2. The second approach expands on the first one through methodology suggested in [2] and we achieve reasonably good results.
10 |
11 | We can observe that some sort of semantic information is captured the results:
12 |
13 | 
14 |
15 |
16 | ### Instructions on reproducing our results
17 |
18 | Implementation of [1] is within [Joint Training Model](pytorch_model) and [2] is within [Code Summarization Transfer Learning](code_summarization_transfer_learning)
19 |
20 | ### Dataset
21 |
22 | For [1], our dataset is provided within [Joint Training Model](pytorch_model).
23 | For [2], the full dataset is available on [Google Cloud Platform](http://storage.googleapis.com/deep-code-search-models/).
24 |
25 | For how to access data on GCP, please follow this link https://cloud.google.com/storage/docs/access-public-data
26 |
27 | ### References:
28 |
29 | [1] https://guxd.github.io/papers/deepcs.pdf
30 |
31 | [2] https://towardsdatascience.com/semantic-code-search-3cd6d244a39c
32 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/README.md:
--------------------------------------------------------------------------------
1 | ## Code summarization using transfer learning
2 |
3 |
4 | ### How to run?
5 |
6 | These notebooks should be run sequentially using the docker containers provided below.
7 |
8 | 1. The first notebook fetches and creates the dataset.
9 | 2. The second notebook vectorizes the code sequence and description sequence and trains 3 seq2seq models:
10 | * Seq2Seq model from function tokens -> docstring
11 | * Seq2Seq model from api seq -> docstring
12 | * Seq2Seq model from method name -> docstring
13 | 3. This notebook trains an AWD LSTM model for docstring using FastAI's implementation.
14 | 4. This notebooks trains the final joint embedder from code to docstring vectors.
15 | 5. In this notebook, we build a search engine that uses the trained networks to output query results.
16 | 6. This notebook evaluates the model.
17 |
18 | In order to run these sets of notebooks (1 - 6), we would highly suggest using these docker containers:
19 |
20 | #### Docker Containers
21 |
22 | - [hamelsmu/ml-gpu](https://hub.docker.com/r/hamelsmu/ml-gpu/): Use this container for any *gpu* bound parts.
23 |
24 | - [hamelsmu/ml-cpu](https://hub.docker.com/r/hamelsmu/ml-cpu/): Use this container for any *cpu* bound parts.
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | .pypirc
3 | ~*
4 | tmp*
5 | sample_data/
6 | tags
7 | data
8 |
9 | # Byte-compiled / optimized / DLL files
10 | __pycache__/
11 | *.py[cod]
12 | *$py.class
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | eggs/
25 | .eggs/
26 | lib/
27 | lib64/
28 | parts/
29 | sdist/
30 | var/
31 | wheels/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | .hypothesis/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # AsciiDoc documentation
76 | docs/fastai/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # celery beat schedule file
88 | celerybeat-schedule
89 |
90 | # SageMath parsed files
91 | *.sage.py
92 |
93 | # dotenv
94 | .env
95 |
96 | # virtualenv
97 | .venv
98 | venv/
99 | ENV/
100 |
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 |
105 | # Rope project settings
106 | .ropeproject
107 |
108 | # mkdocs documentation
109 | /site
110 |
111 | # mypy
112 | .mypy_cache/
113 |
114 | .vscode
115 | *.swp
116 |
117 | # osx generated files
118 | .DS_Store
119 | .DS_Store?
120 | .Trashes
121 | ehthumbs.db
122 | Thumbs.db
123 | .idea
124 |
125 | # pytest
126 | .pytest_cache
127 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | # We don't actually use the Travis Python, but this keeps it organized.
4 | # - "2.7"
5 | # - "3.5"
6 | - "3.6"
7 | install:
8 | - sudo apt-get update
9 | # We do this conditionally because it saves us some downloading if the
10 | # version is the same.
11 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
12 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
13 | else
14 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
15 | fi
16 | - bash miniconda.sh -b -p $HOME/miniconda
17 | - export PATH="$HOME/miniconda/bin:$PATH"
18 | - hash -r
19 | - conda config --set always_yes yes --set changeps1 no
20 | - conda update -q conda
21 | # Useful for debugging any issues with conda
22 | - conda info -a
23 | - conda env update -f environment-cpu.yml
24 | - source activate fastai-cpu
25 | - pip install -U pytest
26 | # for some reason the pip section is not supported by conda env update for the time being i've paste it here:
27 | - pip install opencv-python
28 | - pip install graphviz
29 | - pip install torchvision>=0.1.9
30 | - pip install opencv-python
31 | - pip install isoweek
32 | - pip install pandas_summary
33 | - pip install torchtext
34 | - pip install graphviz
35 | - pip install sklearn_pandas
36 | - pip install feather-format
37 | - pip install jupyter_contrib_nbextensions
38 | - pip install plotnine
39 | - pip install awscli
40 | - pip install kaggle-cli
41 | - pip install ipywidgets
42 | - pip install jupyter_contrib_nbextensions
43 | - pip install git+https://github.com/SauceCat/PDPbox.git
44 |
45 | # this libs are required by opencv on ubuntu 16.04
46 | - sudo apt install -y libsm6 libxext6 libxrender-dev libgl1-mesa-glx
47 |
48 |
49 | script:
50 | #- python -m unittest
51 | - python -m pytest tests
52 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include *.txt
3 | include *.yml
4 | include Makefile
5 | recursive-include fastai/models *
6 | recursive-include fastai/images *
7 |
8 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/README.md:
--------------------------------------------------------------------------------
1 | # fast.ai
2 | The fast.ai deep learning library, lessons, and tutorials.
3 |
4 | Copyright 2017 onwards, Jeremy Howard. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. A copy of the License is provided in the LICENSE file in this repository.
5 |
6 | ## Current Status
7 | This is an alpha version.
8 |
9 | Most of the library is quite well tested since many students have used it to complete the [Practical Deep Learning for Coders](http://course.fast.ai) course. However it hasn't been widely used yet outside of the course, so you may find some missing features or rough edges.
10 |
11 | If you're interested in using the library in your own projects, we're happy to help support any bug fixes or feature additions you need—please use [http://forums.fast.ai](http://forums.fast.ai) to discuss.
12 |
13 | ## To install
14 |
15 | ### Prerequisites
16 | * [Anaconda](https://conda.io/docs/user-guide/install/index.html#), manages Python environment and dependencies
17 |
18 | ### Normal installation
19 | 1. Download project: `git clone https://github.com/fastai/fastai.git`
20 | 1. Move into root folder: `cd fastai`
21 | 1. Set up Python environment: `conda env update`
22 | 1. Activate Python environment: `conda activate fastai`
23 | - If this fails, use instead: `source activate fastai`
24 |
25 | ### Install as pip package
26 | You can also install this library in the local environment using `pip`
27 |
28 | `pip install fastai`
29 |
30 | However this is not currently the recommended approach, since the library is being updated much more frequently than the pip release, fewer people are using and testing the pip version, and pip needs to compile many libraries from scratch (which can be slow).
31 |
32 | ### CPU only environment
33 | Use this if you do not have an NVidia GPU. Note you are encouraged to use Paperspace to access a GPU in the cloud by following this [guide](https://github.com/reshamas/fastai_deeplearn_part1/blob/master/tools/paperspace.md).
34 |
35 | `conda env update -f environment-cpu.yml`
36 |
37 | ## To update
38 | To update everything at any time:
39 |
40 | 1. Update code: `git pull`
41 | 1. Update dependencies: `conda env update`
42 |
43 | ## To test
44 | Before submitting a pull request, run the unit tests:
45 |
46 | 1. Activate Python environment: `conda activate fastai`
47 | - If this fails, use instead: `source activate fastai`
48 | 1. Run tests: `python -m pytest tests`
49 |
50 | ### To run specific test file
51 | 1. Activate Python environment: `conda activate fastai`
52 | - If this fails, use instead: `source activate fastai`
53 | 1. `python -m pytest tests/[file_name.py]`
54 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/.gitignore:
--------------------------------------------------------------------------------
1 | *-Copy?.ipynb
2 | *-Copy??.ipynb
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/excel/collab_filter.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/collab_filter.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/excel/conv-example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/conv-example.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/excel/entropy_example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/entropy_example.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/excel/graddesc.xlsm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/graddesc.xlsm
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/excel/layers_example.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/layers_example.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | weights/
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/fastai/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/executors.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import time
4 | from concurrent.futures import ThreadPoolExecutor
5 |
6 |
7 | class LazyThreadPoolExecutor(ThreadPoolExecutor):
8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None):
9 | """
10 | Collects iterables lazily, rather than immediately.
11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor
12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707
13 | """
14 | if timeout is not None: end_time = timeout + time.time()
15 | if prefetch is None: prefetch = self._max_workers
16 | if prefetch < 0: raise ValueError("prefetch count may not be negative")
17 | argsiter = zip(*iterables)
18 | fs = collections.deque(self.submit(fn, *args) for args in
19 | itertools.islice(argsiter, self._max_workers + prefetch))
20 |
21 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required.
22 | def result_iterator():
23 | nonlocal argsiter
24 | try:
25 | while fs:
26 | res = fs[0].result() if timeout is None else fs[0].result(
27 | end_time - time.time())
28 | # Got a result, future needn't be cancelled
29 | del fs[0]
30 | # Dispatch next task before yielding to keep pipeline full
31 | if argsiter:
32 | try:
33 | args = next(argsiter)
34 | except StopIteration:
35 | argsiter = None
36 | else:
37 | fs.append(self.submit(fn, *args))
38 | yield res
39 | finally:
40 | for future in fs: future.cancel()
41 |
42 | return result_iterator()
43 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/fp16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class FP16(nn.Module):
6 | def __init__(self, module):
7 | super(FP16, self).__init__()
8 | self.module = batchnorm_to_fp32(module.half())
9 |
10 | def forward(self, input):
11 | return self.module(input.half())
12 |
13 | def load_state_dict(self, *inputs, **kwargs):
14 | self.module.load_state_dict(*inputs, **kwargs)
15 |
16 | def state_dict(self, *inputs, **kwargs):
17 | return self.module.state_dict(*inputs, **kwargs)
18 |
19 |
20 | def batchnorm_to_fp32(module):
21 | '''
22 | BatchNorm layers to have parameters in single precision.
23 | Find all layers and convert them back to float. This can't
24 | be done with built in .apply as that function will apply
25 | fn to all modules, parameters, and buffers. Thus we wouldn't
26 | be able to guard the float conversion based on the module type.
27 | '''
28 | if isinstance(module, nn.modules.batchnorm._BatchNorm):
29 | module.float()
30 | for child in module.children():
31 | batchnorm_to_fp32(child)
32 | return module
33 |
34 |
35 | def copy_model_to_fp32(m, optim):
36 | """ Creates a fp32 copy of model parameters and sets optimizer parameters
37 | """
38 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in
39 | m.parameters()]
40 | optim_groups = [group['params'] for group in optim.param_groups]
41 | iter_fp32_params = iter(fp32_params)
42 | for group_params in optim_groups:
43 | for i in range(len(group_params)):
44 | fp32_param = next(iter_fp32_params)
45 | fp32_param.requires_grad = group_params[i].requires_grad
46 | group_params[i] = fp32_param
47 | return fp32_params
48 |
49 |
50 | def copy_fp32_to_model(m, fp32_params):
51 | m_params = list(m.parameters())
52 | for fp32_param, m_param in zip(fp32_params, m_params):
53 | m_param.data.copy_(fp32_param.data)
54 |
55 |
56 | def update_fp32_grads(fp32_params, m):
57 | m_params = list(m.parameters())
58 | for fp32_param, m_param in zip(fp32_params, m_params):
59 | if fp32_param.grad is None:
60 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size()))
61 | fp32_param.grad.data.copy_(m_param.grad.data)
62 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/images/industrial_fishing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/fastai/images/industrial_fishing.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/imports.py:
--------------------------------------------------------------------------------
1 | from IPython.lib.deepreload import reload as dreload
2 | import PIL, os, numpy as np, threading, json, bcolz, scipy
3 | import pandas as pd, pickle, string, sys, re, time, shutil, copy
4 | import seaborn as sns, matplotlib
5 | from abc import abstractmethod
6 | from functools import partial
7 | from pandas_summary import DataFrameSummary
8 | from IPython.lib.display import FileLink
9 | from sklearn import metrics, ensemble, preprocessing
10 | from operator import itemgetter, attrgetter
11 |
12 | from matplotlib import pyplot as plt, rcParams, animation
13 |
14 | matplotlib.rc('animation', html='html5')
15 | np.set_printoptions(precision=5, linewidth=110, suppress=True)
16 |
17 | from ipykernel.kernelapp import IPKernelApp
18 |
19 |
20 | def in_notebook(): return IPKernelApp.initialized()
21 |
22 |
23 | def in_ipynb():
24 | try:
25 | cls = get_ipython().__class__.__name__
26 | return cls == 'ZMQInteractiveShell'
27 | except NameError:
28 | return False
29 |
30 |
31 | import tqdm as tq
32 |
33 |
34 | def clear_tqdm():
35 | inst = getattr(tq.tqdm, '_instances', None)
36 | if not inst: return
37 | try:
38 | for i in range(len(inst)): inst.pop().close()
39 | except Exception:
40 | pass
41 |
42 |
43 | if in_notebook():
44 | def tqdm(*args, **kwargs):
45 | clear_tqdm()
46 | return tq.tqdm(*args, file=sys.stdout, **kwargs)
47 |
48 |
49 | def trange(*args, **kwargs):
50 | clear_tqdm()
51 | return tq.trange(*args, file=sys.stdout, **kwargs)
52 | else:
53 | from tqdm import tqdm, trange
54 |
55 | tnrange = trange
56 | tqdm_notebook = tqdm
57 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/initializers.py:
--------------------------------------------------------------------------------
1 | def cond_init(m, init_fn):
2 | if not isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
3 | if hasattr(m, 'weight'): init_fn(m.weight)
4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.)
5 |
6 |
7 | def apply_init(m, init_fn):
8 | m.apply(lambda x: cond_init(x, init_fn))
9 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/io.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlretrieve
2 |
3 | from tqdm import tqdm
4 |
5 | from .torch_imports import *
6 |
7 |
8 | class TqdmUpTo(tqdm):
9 | def update_to(self, b=1, bsize=1, tsize=None):
10 | if tsize is not None: self.total = tsize
11 | self.update(b * bsize - self.n)
12 |
13 |
14 | def get_data(url, filename):
15 | if not os.path.exists(filename):
16 |
17 | dirname = os.path.dirname(filename)
18 | if not os.path.exists(dirname):
19 | os.makedirs(dirname)
20 |
21 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
22 | urlretrieve(url, filename, reporthook=t.update_to)
23 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/layers.py:
--------------------------------------------------------------------------------
1 | class AdaptiveConcatPool2d(nn.Module):
2 | def __init__(self, sz=None):
3 | super().__init__()
4 | sz = sz or (1, 1)
5 | self.ap = nn.AdaptiveAvgPool2d(sz)
6 | self.mp = nn.AdaptiveMaxPool2d(sz)
7 |
8 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
9 |
10 |
11 | class Lambda(nn.Module):
12 | def __init__(self, f): super().__init__(); self.f = f
13 |
14 | def forward(self, x): return self.f(x)
15 |
16 |
17 | class Flatten(nn.Module):
18 | def __init__(self): super().__init__()
19 |
20 | def forward(self, x): return x.view(x.size(0), -1)
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/losses.py:
--------------------------------------------------------------------------------
1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9):
2 | y_pred = (y_pred.float() > threshold).float()
3 | y_true = y_true.float()
4 | tp = (y_pred * y_true).sum(dim=1)
5 | precision = tp / (y_pred.sum(dim=1) + eps)
6 | recall = tp / (y_true.sum(dim=1) + eps)
7 | return torch.mean(
8 | precision * recall / (precision * (beta ** 2) + recall + eps) * (1 + beta ** 2))
9 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/metrics.py:
--------------------------------------------------------------------------------
1 | from .imports import *
2 |
3 |
4 | def accuracy_np(preds, targs):
5 | preds = np.argmax(preds, 1)
6 | return (preds == targs).mean()
7 |
8 |
9 | def accuracy(preds, targs):
10 | preds = torch.max(preds, dim=1)[1]
11 | return (preds == targs).float().mean()
12 |
13 |
14 | def accuracy_thresh(thresh):
15 | return lambda preds, targs: accuracy_multi(preds, targs, thresh)
16 |
17 |
18 | def accuracy_multi(preds, targs, thresh):
19 | return ((preds > thresh).float() == targs).float().mean()
20 |
21 |
22 | def accuracy_multi_np(preds, targs, thresh):
23 | return ((preds > thresh) == targs).mean()
24 |
25 |
26 | def recall(preds, targs, thresh=0.5):
27 | pred_pos = preds > thresh
28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
29 | return tpos.sum() / targs.sum()
30 |
31 |
32 | def precision(preds, targs, thresh=0.5):
33 | pred_pos = preds > thresh
34 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
35 | return tpos.sum() / pred_pos.sum()
36 |
37 |
38 | def fbeta(preds, targs, beta, thresh=0.5):
39 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall).
40 | This is the micro averaged version where the true positives, false negatives and
41 | false positives are calculated globally (as opposed to on a per label basis).
42 |
43 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and
44 | beta > 1 favors recall.
45 | """
46 | assert beta > 0, 'beta needs to be greater than 0'
47 | beta2 = beta ** 2
48 | rec = recall(preds, targs, thresh)
49 | prec = precision(preds, targs, thresh)
50 | return (1 + beta2) * prec * rec / (beta2 * prec + rec)
51 |
52 |
53 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh)
54 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.tar
3 | checkpoint*
4 | log*
5 | wgts/
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/cifar10/main.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python main.py --lr=0.1
4 | python main.py --resume --lr=0.01
5 | python main.py --resume --lr=0.001
6 |
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/darknet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .layers import *
4 | from .layers import *
5 |
6 |
7 | class ConvBN(nn.Module):
8 | "convolutional layer then batchnorm"
9 |
10 | def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=0):
11 | super().__init__()
12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride,
13 | padding=padding, bias=False)
14 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
15 | self.relu = nn.LeakyReLU(0.1, inplace=True)
16 |
17 | def forward(self, x): return self.relu(self.bn(self.conv(x)))
18 |
19 |
20 | class DarknetBlock(nn.Module):
21 | def __init__(self, ch_in):
22 | super().__init__()
23 | ch_hid = ch_in // 2
24 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
25 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
26 |
27 | def forward(self, x): return self.conv2(self.conv1(x)) + x
28 |
29 |
30 | class Darknet(nn.Module):
31 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)"
32 |
33 | def make_group_layer(self, ch_in, num_blocks, stride=1):
34 | layers = [ConvBN(ch_in, ch_in * 2, stride=stride)]
35 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in * 2))
36 | return layers
37 |
38 | def __init__(self, num_blocks, num_classes=1000, start_nf=32):
39 | super().__init__()
40 | nf = start_nf
41 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
42 | for i, nb in enumerate(num_blocks):
43 | layers += self.make_group_layer(nf, nb, stride=(1 if i == 1 else 2))
44 | nf *= 2
45 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
46 | self.layers = nn.Sequential(*layers)
47 |
48 | def forward(self, x):
49 | return self.layers(x)
50 |
51 |
52 | def darknet_53(num_classes=1000): return Darknet([1, 2, 8, 8, 4], num_classes)
53 |
54 |
55 | def darknet_small(num_classes=1000): return Darknet([1, 2, 4, 8, 4], num_classes)
56 |
57 |
58 | def darknet_mini(num_classes=1000): return Darknet([1, 2, 4, 4, 2], num_classes, start_nf=24)
59 |
60 |
61 | def darknet_mini2(num_classes=1000): return Darknet([1, 2, 8, 8, 4], num_classes, start_nf=16)
62 |
63 |
64 | def darknet_mini3(num_classes=1000): return Darknet([1, 2, 4, 4], num_classes)
65 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/rnn_train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/set_spawn.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import set_start_method
2 | set_start_method('spawn')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/fastai/transforms_pil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h)
30 | x = np.random.randint(w)
31 |
32 | y1 = np.clip(y - self.length / 2, 0, h)
33 | y2 = np.clip(y + self.length / 2, 0, h)
34 | x1 = np.clip(x - self.length / 2, 0, w)
35 | x2 = np.clip(x + self.length / 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/pretrained.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/pretrained.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/sgdr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/sgdr.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler1.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler3.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler4.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/planet.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from fastai.dataset import *
4 | from fastai.imports import *
5 | from fastai.transforms import *
6 | from sklearn.metrics import fbeta_score
7 |
8 |
9 | def f2(preds, targs, start=0.17, end=0.24, step=0.01):
10 | with warnings.catch_warnings():
11 | warnings.simplefilter("ignore")
12 | return max([fbeta_score(targs, (preds>th), 2, average='samples')
13 | for th in np.arange(start,end,step)])
14 |
15 | def opt_th(preds, targs, start=0.17, end=0.24, step=0.01):
16 | ths = np.arange(start,end,step)
17 | idx = np.argmax([fbeta_score(targs, (preds>th), 2, average='samples')
18 | for th in ths])
19 | return ths[idx]
20 |
21 | def get_data(path, tfms,bs, n, cv_idx):
22 | val_idxs = get_cv_idxs(n, cv_idx)
23 | return ImageClassifierData.from_csv(path, 'train-jpg', f'{path}train_v2.csv', bs, tfms,
24 | suffix='.jpg', val_idxs=val_idxs, test_name='test-jpg')
25 |
26 | def get_data_zoom(f_model, path, sz, bs, n, cv_idx):
27 | tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.05)
28 | return get_data(path, tfms, bs, n, cv_idx)
29 |
30 | def get_data_pad(f_model, path, sz, bs, n, cv_idx):
31 | transforms_pt = [RandomRotateZoom(9, 0.18, 0.1), RandomLighting(0.05, 0.1), RandomDihedral()]
32 | tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_pt, pad=sz//12)
33 | return get_data(path, tfms, bs, n, cv_idx)
34 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/ppt/lesson6.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/ppt/lesson6.pptx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl1/scripts/train_planet.py:
--------------------------------------------------------------------------------
1 | from dataset_pt import *
2 | from fast_gen import *
3 | from learner import *
4 | from planet import *
5 | from pt_models import *
6 | from sgdr_pt import *
7 |
8 | bs=64; f_model = resnet34
9 | path = "/data/jhoward/fast/planet/"
10 | cv_idx = int(sys.argv[1])
11 | torch.cuda.set_device(cv_idx % 4)
12 | if cv_idx==1: torch.cuda.set_device(2)
13 | n=len(list(open(f'{path}train_v2.csv')))-1
14 |
15 | def train_sz(sz, load=None, save_name=None, suf=None):
16 | print(f'\n***** {sz} *****')
17 | #data=get_data_pad(f_model, path, sz, bs, n, cv_idx)
18 | data=get_data_zoom(f_model, path, sz, bs, n, cv_idx)
19 | learn = Learner.pretrained_convnet(f_model, data, metrics=[f2])
20 | if load: learn.load(f'{load}_{cv_idx}{suf}')
21 | print('--- FC')
22 | learn.fit(0.3, 2, cycle_len=1)
23 | print('--- Gradual')
24 | for i in range(6,3,-1):
25 | learn.freeze_to(i)
26 | learn.fit(0.1*(i-3), 1, cycle_len=1)
27 | learn.unfreeze()
28 | print('--- All')
29 | learn.fit(0.2, 15, cycle_len=3, cycle_save_name=f'{save_name}{suf}')
30 | learn.save(f'{sz}_{cv_idx}{suf}')
31 |
32 | suf='_zoom'
33 | train_sz(64, suf=suf)
34 | train_sz(128, load=64, suf=suf)
35 | train_sz(244, load=128, save_name=f'170809_{cv_idx}', suf=suf)
36 |
37 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/.gitignore:
--------------------------------------------------------------------------------
1 | checkpoints/
2 | *-Copy?.ipynb
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/.gitignore:
--------------------------------------------------------------------------------
1 | !data
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/aligned_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 |
4 | import torch
5 | import torchvision.transforms as transforms
6 | from PIL import Image
7 |
8 | from .base_dataset import BaseDataset
9 | from .image_folder import make_dataset
10 |
11 |
12 | class AlignedDataset(BaseDataset):
13 | def initialize(self, opt):
14 | self.opt = opt
15 | self.root = opt.dataroot
16 | self.dir_AB = os.path.join(opt.dataroot, opt.phase)
17 | self.AB_paths = sorted(make_dataset(self.dir_AB))
18 | assert(opt.resize_or_crop == 'resize_and_crop')
19 |
20 | def __getitem__(self, index):
21 | AB_path = self.AB_paths[index]
22 | AB = Image.open(AB_path).convert('RGB')
23 | w, h = AB.size
24 | w2 = int(w / 2)
25 | A = AB.crop((0, 0, w2, h)).resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC)
26 | B = AB.crop((w2, 0, w, h)).resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC)
27 | A = transforms.ToTensor()(A)
28 | B = transforms.ToTensor()(B)
29 | w_offset = random.randint(0, max(0, self.opt.loadSize - self.opt.fineSize - 1))
30 | h_offset = random.randint(0, max(0, self.opt.loadSize - self.opt.fineSize - 1))
31 |
32 | A = A[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize]
33 | B = B[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize]
34 |
35 | A = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A)
36 | B = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(B)
37 |
38 | if self.opt.which_direction == 'BtoA':
39 | input_nc = self.opt.output_nc
40 | output_nc = self.opt.input_nc
41 | else:
42 | input_nc = self.opt.input_nc
43 | output_nc = self.opt.output_nc
44 |
45 | if (not self.opt.no_flip) and random.random() < 0.5:
46 | idx = [i for i in range(A.size(2) - 1, -1, -1)]
47 | idx = torch.LongTensor(idx)
48 | A = A.index_select(2, idx)
49 | B = B.index_select(2, idx)
50 |
51 | if input_nc == 1: # RGB to gray
52 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114
53 | A = tmp.unsqueeze(0)
54 |
55 | if output_nc == 1: # RGB to gray
56 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114
57 | B = tmp.unsqueeze(0)
58 |
59 | return {'A': A, 'B': B,
60 | 'A_paths': AB_path, 'B_paths': AB_path}
61 |
62 | def __len__(self):
63 | return len(self.AB_paths)
64 |
65 | def name(self):
66 | return 'AlignedDataset'
67 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/base_data_loader.py:
--------------------------------------------------------------------------------
1 | class BaseDataLoader():
2 | def __init__(self): pass
3 | def load_data(): return None
4 | def initialize(self, opt): self.opt = opt
5 |
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/base_dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | import torchvision.transforms as transforms
3 | from PIL import Image
4 |
5 |
6 | class BaseDataset(data.Dataset):
7 | def __init__(self):
8 | super(BaseDataset, self).__init__()
9 |
10 | def name(self):
11 | return 'BaseDataset'
12 |
13 | def initialize(self, opt):
14 | pass
15 |
16 |
17 | def get_transform(opt):
18 | transform_list = []
19 | if opt.resize_or_crop == 'resize_and_crop':
20 | osize = [opt.loadSize, opt.loadSize]
21 | transform_list.append(transforms.Scale(osize, Image.BICUBIC))
22 | transform_list.append(transforms.RandomCrop(opt.fineSize))
23 | elif opt.resize_or_crop == 'crop':
24 | transform_list.append(transforms.RandomCrop(opt.fineSize))
25 | elif opt.resize_or_crop == 'scale_width':
26 | transform_list.append(transforms.Lambda(
27 | lambda img: __scale_width(img, opt.fineSize)))
28 | elif opt.resize_or_crop == 'scale_width_and_crop':
29 | transform_list.append(transforms.Lambda(
30 | lambda img: __scale_width(img, opt.loadSize)))
31 | transform_list.append(transforms.RandomCrop(opt.fineSize))
32 |
33 | if opt.isTrain and not opt.no_flip:
34 | transform_list.append(transforms.RandomHorizontalFlip())
35 |
36 | transform_list += [transforms.ToTensor(),
37 | transforms.Normalize((0.5, 0.5, 0.5),
38 | (0.5, 0.5, 0.5))]
39 | return transforms.Compose(transform_list)
40 |
41 |
42 | def __scale_width(img, target_width):
43 | ow, oh = img.size
44 | if (ow == target_width):
45 | return img
46 | w = target_width
47 | h = int(target_width * oh / ow)
48 | return img.resize((w, h), Image.BICUBIC)
49 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/custom_dataset_data_loader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 |
3 | from .base_data_loader import BaseDataLoader
4 |
5 |
6 | def CreateDataset(opt):
7 | dataset = None
8 | if opt.dataset_mode == 'aligned':
9 | from .aligned_dataset import AlignedDataset
10 | dataset = AlignedDataset()
11 | elif opt.dataset_mode == 'unaligned':
12 | from .unaligned_dataset import UnalignedDataset
13 | dataset = UnalignedDataset()
14 | elif opt.dataset_mode == 'single':
15 | from .single_dataset import SingleDataset
16 | dataset = SingleDataset()
17 | else:
18 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode)
19 |
20 | print("dataset [%s] was created" % (dataset.name()))
21 | dataset.initialize(opt)
22 | return dataset
23 |
24 |
25 | class CustomDatasetDataLoader(BaseDataLoader):
26 | def initialize(self, opt):
27 | BaseDataLoader.initialize(self, opt)
28 | self.dataset = CreateDataset(opt)
29 | self.dataloader = torch.utils.data.DataLoader(
30 | self.dataset, batch_size=opt.batchSize,
31 | shuffle=not opt.serial_batches, num_workers=int(opt.nThreads))
32 |
33 | def __iter__(self):
34 | for i, data in enumerate(self.dataloader):
35 | if i >= self.opt.max_dataset_size: break
36 | yield data
37 |
38 | def name(self): return 'CustomDatasetDataLoader'
39 | def load_data(self): return self
40 | def __len__(self): return min(len(self.dataset), self.opt.max_dataset_size)
41 |
42 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/data_loader.py:
--------------------------------------------------------------------------------
1 | from ..data.custom_dataset_data_loader import CustomDatasetDataLoader
2 |
3 | def CreateDataLoader(opt):
4 | data_loader = CustomDatasetDataLoader()
5 | print(data_loader.name())
6 | data_loader.initialize(opt)
7 | return data_loader
8 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/image_folder.py:
--------------------------------------------------------------------------------
1 | ###############################################################################
2 | # Code from
3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
4 | # Modified the original code so that it also loads images from the current
5 | # directory as well as the subdirectories
6 | ###############################################################################
7 |
8 | import os
9 | import os.path
10 |
11 | import torch.utils.data as data
12 | from PIL import Image
13 |
14 | IMG_EXTENSIONS = [
15 | '.jpg', '.JPG', '.jpeg', '.JPEG',
16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
17 | ]
18 |
19 |
20 | def is_image_file(filename):
21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
22 |
23 |
24 | def make_dataset(dir):
25 | images = []
26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir
27 |
28 | for root, _, fnames in sorted(os.walk(dir)):
29 | for fname in fnames:
30 | if is_image_file(fname):
31 | path = os.path.join(root, fname)
32 | images.append(path)
33 |
34 | return images
35 |
36 |
37 | def default_loader(path):
38 | return Image.open(path).convert('RGB')
39 |
40 |
41 | class ImageFolder(data.Dataset):
42 |
43 | def __init__(self, root, transform=None, return_paths=False,
44 | loader=default_loader):
45 | imgs = make_dataset(root)
46 | if len(imgs) == 0:
47 | raise(RuntimeError("Found 0 images in: " + root + "\n"
48 | "Supported image extensions are: " +
49 | ",".join(IMG_EXTENSIONS)))
50 |
51 | self.root = root
52 | self.imgs = imgs
53 | self.transform = transform
54 | self.return_paths = return_paths
55 | self.loader = loader
56 |
57 | def __getitem__(self, index):
58 | path = self.imgs[index]
59 | img = self.loader(path)
60 | if self.transform is not None:
61 | img = self.transform(img)
62 | if self.return_paths:
63 | return img, path
64 | else:
65 | return img
66 |
67 | def __len__(self):
68 | return len(self.imgs)
69 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/single_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 |
3 | from PIL import Image
4 |
5 | from .base_dataset import BaseDataset, get_transform
6 | from .image_folder import make_dataset
7 |
8 |
9 | class SingleDataset(BaseDataset):
10 | def initialize(self, opt):
11 | self.opt = opt
12 | self.root = opt.dataroot
13 | self.dir_A = os.path.join(opt.dataroot)
14 |
15 | self.A_paths = make_dataset(self.dir_A)
16 |
17 | self.A_paths = sorted(self.A_paths)
18 |
19 | self.transform = get_transform(opt)
20 |
21 | def __getitem__(self, index):
22 | A_path = self.A_paths[index]
23 | A_img = Image.open(A_path).convert('RGB')
24 | A = self.transform(A_img)
25 | if self.opt.which_direction == 'BtoA':
26 | input_nc = self.opt.output_nc
27 | else:
28 | input_nc = self.opt.input_nc
29 |
30 | if input_nc == 1: # RGB to gray
31 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114
32 | A = tmp.unsqueeze(0)
33 |
34 | return {'A': A, 'A_paths': A_path}
35 |
36 | def __len__(self):
37 | return len(self.A_paths)
38 |
39 | def name(self):
40 | return 'SingleImageDataset'
41 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/unaligned_dataset.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | import random
3 |
4 | from PIL import Image
5 |
6 | from .base_dataset import BaseDataset, get_transform
7 | from .image_folder import make_dataset
8 |
9 |
10 | class UnalignedDataset(BaseDataset):
11 | def initialize(self, opt):
12 | self.opt = opt
13 | self.root = opt.dataroot
14 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')
15 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')
16 |
17 | self.A_paths = make_dataset(self.dir_A)
18 | self.B_paths = make_dataset(self.dir_B)
19 |
20 | self.A_paths = sorted(self.A_paths)
21 | self.B_paths = sorted(self.B_paths)
22 | self.A_size = len(self.A_paths)
23 | self.B_size = len(self.B_paths)
24 | self.transform = get_transform(opt)
25 |
26 | def __getitem__(self, index):
27 | A_path = self.A_paths[index % self.A_size]
28 | if self.opt.serial_batches:
29 | index_B = index % self.B_size
30 | else:
31 | index_B = random.randint(0, self.B_size - 1)
32 | B_path = self.B_paths[index_B]
33 | # print('(A, B) = (%d, %d)' % (index_A, index_B))
34 | A_img = Image.open(A_path).convert('RGB')
35 | B_img = Image.open(B_path).convert('RGB')
36 |
37 | A = self.transform(A_img)
38 | B = self.transform(B_img)
39 | if self.opt.which_direction == 'BtoA':
40 | input_nc = self.opt.output_nc
41 | output_nc = self.opt.input_nc
42 | else:
43 | input_nc = self.opt.input_nc
44 | output_nc = self.opt.output_nc
45 |
46 | if input_nc == 1: # RGB to gray
47 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114
48 | A = tmp.unsqueeze(0)
49 |
50 | if output_nc == 1: # RGB to gray
51 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114
52 | B = tmp.unsqueeze(0)
53 |
54 | return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path}
55 |
56 | def __len__(self): return max(self.A_size, self.B_size)
57 |
58 | def name(self): return 'UnalignedDataset'
59 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/base_model.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 |
5 |
6 | class BaseModel():
7 | def name(self): return 'BaseModel'
8 |
9 | def initialize(self, opt):
10 | self.opt = opt
11 | self.gpu_ids = opt.gpu_ids
12 | self.isTrain = opt.isTrain
13 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor
14 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name)
15 |
16 | def set_input(self, input): self.input = input
17 | def forward(self): pass
18 | def test(self): pass
19 | def get_image_paths(self): pass
20 | def optimize_parameters(self): pass
21 | def get_current_visuals(self): return self.input
22 | def get_current_errors(self): return {}
23 | def save(self, label): pass
24 |
25 | # helper saving function that can be used by subclasses
26 | def save_network(self, network, network_label, epoch_label, gpu_ids):
27 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
28 | save_path = os.path.join(self.save_dir, save_filename)
29 | torch.save(network.cpu().state_dict(), save_path)
30 | if len(gpu_ids) and torch.cuda.is_available(): network.cuda(gpu_ids[0])
31 |
32 | # helper loading function that can be used by subclasses
33 | def load_network(self, network, network_label, epoch_label):
34 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
35 | save_path = os.path.join(self.save_dir, save_filename)
36 | network.load_state_dict(torch.load(save_path))
37 |
38 | # update learning rate (called once every epoch)
39 | def update_learning_rate(self):
40 | for scheduler in self.schedulers: scheduler.step()
41 | lr = self.optimizers[0].param_groups[0]['lr']
42 | print('learning rate = %.7f' % lr)
43 |
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/models.py:
--------------------------------------------------------------------------------
1 | def create_model(opt):
2 | model = None
3 | print(opt.model)
4 | if opt.model == 'cycle_gan':
5 | assert(opt.dataset_mode == 'unaligned')
6 | from .cycle_gan_model import CycleGANModel
7 | model = CycleGANModel()
8 | elif opt.model == 'pix2pix':
9 | assert(opt.dataset_mode == 'aligned')
10 | from .pix2pix_model import Pix2PixModel
11 | model = Pix2PixModel()
12 | elif opt.model == 'test':
13 | assert(opt.dataset_mode == 'single')
14 | from .test_model import TestModel
15 | model = TestModel()
16 | else:
17 | raise ValueError("Model [%s] not recognized." % opt.model)
18 | model.initialize(opt)
19 | print("model [%s] was created" % (model.name()))
20 | return model
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/test_model.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | from torch.autograd import Variable
4 |
5 | from . import networks
6 | from .base_model import BaseModel
7 |
8 |
9 | class TestModel(BaseModel):
10 | def name(self):
11 | return 'TestModel'
12 |
13 | def initialize(self, opt):
14 | assert(not opt.isTrain)
15 | BaseModel.initialize(self, opt)
16 | self.netG = networks.define_G(opt.input_nc, opt.output_nc,
17 | opt.ngf, opt.which_model_netG,
18 | opt.norm, not opt.no_dropout,
19 | opt.init_type,
20 | self.gpu_ids)
21 | which_epoch = opt.which_epoch
22 | self.load_network(self.netG, 'G', which_epoch)
23 |
24 | print('---------- Networks initialized -------------')
25 | networks.print_network(self.netG)
26 | print('-----------------------------------------------')
27 |
28 | def set_input(self, input):
29 | # we need to use single_dataset mode
30 | input_A = input['A']
31 | if len(self.gpu_ids) > 0:
32 | input_A = input_A.cuda(self.gpu_ids[0], async=True)
33 | self.input_A = input_A
34 | self.image_paths = input['A_paths']
35 |
36 | def test(self):
37 | self.real_A = Variable(self.input_A)
38 | self.fake_B = self.netG(self.real_A)
39 |
40 | # get image paths
41 | def get_image_paths(self):
42 | return self.image_paths
43 |
44 | def get_current_visuals(self):
45 | real_A = util.tensor2im(self.real_A.data)
46 | fake_B = util.tensor2im(self.fake_B.data)
47 | return OrderedDict([('real_A', real_A), ('fake_B', fake_B)])
48 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/test_options.py:
--------------------------------------------------------------------------------
1 | from .base_options import BaseOptions
2 |
3 |
4 | class TestOptions(BaseOptions):
5 | def initialize(self):
6 | BaseOptions.initialize(self)
7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
12 | self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run')
13 | self.isTrain = False
14 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/test.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from data.data_loader import CreateDataLoader
4 | from models.models import create_model
5 | from options.test_options import TestOptions
6 | from util import html
7 | from util.visualizer import Visualizer
8 |
9 | opt = TestOptions().parse()
10 | opt.nThreads = 1 # test code only supports nThreads = 1
11 | opt.batchSize = 1 # test code only supports batchSize = 1
12 | opt.serial_batches = True # no shuffle
13 | opt.no_flip = True # no flip
14 |
15 | data_loader = CreateDataLoader(opt)
16 | dataset = data_loader.load_data()
17 | model = create_model(opt)
18 | visualizer = Visualizer(opt)
19 | # create website
20 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))
21 | webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch))
22 | # test
23 | for i, data in enumerate(dataset):
24 | if i >= opt.how_many: break
25 | model.set_input(data)
26 | model.test()
27 | visuals = model.get_current_visuals()
28 | img_path = model.get_image_paths()
29 | print('%04d: process image... %s' % (i, img_path))
30 | visualizer.save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio)
31 |
32 | webpage.save()
33 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/train.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | from data.data_loader import CreateDataLoader
4 | from models.models import create_model
5 | from options.train_options import TrainOptions
6 | from util.visualizer import Visualizer
7 |
8 | opt = TrainOptions().parse()
9 | data_loader = CreateDataLoader(opt)
10 | dataset = data_loader.load_data()
11 | dataset_size = len(data_loader)
12 | print('#training images = %d' % dataset_size)
13 |
14 | model = create_model(opt)
15 | visualizer = Visualizer(opt)
16 | total_steps = 0
17 |
18 | for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1):
19 | epoch_start_time = time.time()
20 | iter_data_time = time.time()
21 | epoch_iter = 0
22 |
23 | for i, data in enumerate(dataset):
24 | iter_start_time = time.time()
25 | if total_steps % opt.print_freq == 0:
26 | t_data = iter_start_time - iter_data_time
27 | visualizer.reset()
28 | total_steps += opt.batchSize
29 | epoch_iter += opt.batchSize
30 | model.set_input(data)
31 | model.optimize_parameters()
32 |
33 | if total_steps % opt.display_freq == 0:
34 | save_result = total_steps % opt.update_html_freq == 0
35 | visualizer.display_current_results(model.get_current_visuals(), epoch, save_result)
36 |
37 | if total_steps % opt.print_freq == 0:
38 | errors = model.get_current_errors()
39 | t = (time.time() - iter_start_time) / opt.batchSize
40 | visualizer.print_current_errors(epoch, epoch_iter, errors, t, t_data)
41 | if opt.display_id > 0:
42 | visualizer.plot_current_errors(epoch, float(epoch_iter) / dataset_size, opt, errors)
43 |
44 | if total_steps % opt.save_latest_freq == 0:
45 | print('saving the latest model (epoch %d, total_steps %d)' %
46 | (epoch, total_steps))
47 | model.save('latest')
48 |
49 | iter_data_time = time.time()
50 | if epoch % opt.save_epoch_freq == 0:
51 | print('saving the model at the end of epoch %d, iters %d' %
52 | (epoch, total_steps))
53 | model.save('latest')
54 | model.save(epoch)
55 |
56 | print('End of epoch %d / %d \t Time Taken: %d sec' %
57 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))
58 | model.update_learning_rate()
59 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/html.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import dominate
4 | from dominate.tags import *
5 |
6 |
7 | class HTML:
8 | def __init__(self, web_dir, title, refresh=0):
9 | self.title = title
10 | self.web_dir = web_dir
11 | self.img_dir = os.path.join(self.web_dir, 'images')
12 | if not os.path.exists(self.web_dir):
13 | os.makedirs(self.web_dir)
14 | if not os.path.exists(self.img_dir):
15 | os.makedirs(self.img_dir)
16 | # print(self.img_dir)
17 |
18 | self.doc = dominate.document(title=title)
19 | if refresh > 0:
20 | with self.doc.head:
21 | meta(http_equiv="refresh", content=str(refresh))
22 |
23 | def get_image_dir(self):
24 | return self.img_dir
25 |
26 | def add_header(self, str):
27 | with self.doc:
28 | h3(str)
29 |
30 | def add_table(self, border=1):
31 | self.t = table(border=border, style="table-layout: fixed;")
32 | self.doc.add(self.t)
33 |
34 | def add_images(self, ims, txts, links, width=400):
35 | self.add_table()
36 | with self.t:
37 | with tr():
38 | for im, txt, link in zip(ims, txts, links):
39 | with td(style="word-wrap: break-word;", halign="center", valign="top"):
40 | with p():
41 | with a(href=os.path.join('images', link)):
42 | img(style="width:%dpx" % width, src=os.path.join('images', im))
43 | br()
44 | p(txt)
45 |
46 | def save(self):
47 | html_file = '%s/index.html' % self.web_dir
48 | f = open(html_file, 'wt')
49 | f.write(self.doc.render())
50 | f.close()
51 |
52 |
53 | if __name__ == '__main__':
54 | html = HTML('web/', 'test_html')
55 | html.add_header('hello world')
56 |
57 | ims = []
58 | txts = []
59 | links = []
60 | for n in range(4):
61 | ims.append('image_%d.png' % n)
62 | txts.append('text_%d' % n)
63 | links.append('image_%d.png' % n)
64 | html.add_images(ims, txts, links)
65 | html.save()
66 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/image_pool.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import torch
4 | from torch.autograd import Variable
5 |
6 |
7 | class ImagePool():
8 | def __init__(self, pool_size):
9 | self.pool_size = pool_size
10 | if self.pool_size > 0:
11 | self.num_imgs = 0
12 | self.images = []
13 |
14 | def query(self, images):
15 | if self.pool_size == 0:
16 | return Variable(images)
17 | return_images = []
18 | for image in images:
19 | image = torch.unsqueeze(image, 0)
20 | if self.num_imgs < self.pool_size:
21 | self.num_imgs = self.num_imgs + 1
22 | self.images.append(image)
23 | return_images.append(image)
24 | else:
25 | p = random.uniform(0, 1)
26 | if p > 0.5:
27 | random_id = random.randint(0, self.pool_size - 1)
28 | tmp = self.images[random_id].clone()
29 | self.images[random_id] = image
30 | return_images.append(tmp)
31 | else:
32 | return_images.append(image)
33 | return_images = Variable(torch.cat(return_images, 0))
34 | return return_images
35 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/util.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 |
5 | import numpy as np
6 | import torch
7 | from PIL import Image
8 |
9 |
10 | # Converts a Tensor into a Numpy array
11 | # |imtype|: the desired type of the converted numpy array
12 | def tensor2im(image_tensor, imtype=np.uint8):
13 | image_numpy = image_tensor[0].cpu().float().numpy()
14 | if image_numpy.shape[0] == 1:
15 | image_numpy = np.tile(image_numpy, (3, 1, 1))
16 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0
17 | return image_numpy.astype(imtype)
18 |
19 |
20 | def diagnose_network(net, name='network'):
21 | mean = 0.0
22 | count = 0
23 | for param in net.parameters():
24 | if param.grad is not None:
25 | mean += torch.mean(torch.abs(param.grad.data))
26 | count += 1
27 | if count > 0:
28 | mean = mean / count
29 | print(name)
30 | print(mean)
31 |
32 |
33 | def save_image(image_numpy, image_path):
34 | image_pil = Image.fromarray(image_numpy)
35 | image_pil.save(image_path)
36 |
37 |
38 | def print_numpy(x, val=True, shp=False):
39 | x = x.astype(np.float64)
40 | if shp:
41 | print('shape,', x.shape)
42 | if val:
43 | x = x.flatten()
44 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % (
45 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x)))
46 |
47 |
48 | def mkdirs(paths):
49 | if isinstance(paths, list) and not isinstance(paths, str):
50 | for path in paths:
51 | mkdir(path)
52 | else:
53 | mkdir(paths)
54 |
55 |
56 | def mkdir(path):
57 | if not os.path.exists(path):
58 | os.makedirs(path)
59 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | weights/
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/fastai/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/executors.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import time
4 | from concurrent.futures import ThreadPoolExecutor
5 |
6 |
7 | class LazyThreadPoolExecutor(ThreadPoolExecutor):
8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None):
9 | """
10 | Collects iterables lazily, rather than immediately.
11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor
12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707
13 | """
14 | if timeout is not None: end_time = timeout + time.time()
15 | if prefetch is None: prefetch = self._max_workers
16 | if prefetch < 0: raise ValueError("prefetch count may not be negative")
17 | argsiter = zip(*iterables)
18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch))
19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required.
20 | def result_iterator():
21 | nonlocal argsiter
22 | try:
23 | while fs:
24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time())
25 | # Got a result, future needn't be cancelled
26 | del fs[0]
27 | # Dispatch next task before yielding to keep pipeline full
28 | if argsiter:
29 | try:
30 | args = next(argsiter)
31 | except StopIteration:
32 | argsiter = None
33 | else:
34 | fs.append(self.submit(fn, *args))
35 | yield res
36 | finally:
37 | for future in fs: future.cancel()
38 | return result_iterator()
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/fp16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class FP16(nn.Module):
6 | def __init__(self, module):
7 | super(FP16, self).__init__()
8 | self.module = batchnorm_to_fp32(module.half())
9 |
10 | def forward(self, input):
11 | return self.module(input.half())
12 |
13 | def load_state_dict(self, *inputs, **kwargs):
14 | self.module.load_state_dict(*inputs, **kwargs)
15 |
16 | def state_dict(self, *inputs, **kwargs):
17 | return self.module.state_dict(*inputs, **kwargs)
18 |
19 | def batchnorm_to_fp32(module):
20 | '''
21 | BatchNorm layers to have parameters in single precision.
22 | Find all layers and convert them back to float. This can't
23 | be done with built in .apply as that function will apply
24 | fn to all modules, parameters, and buffers. Thus we wouldn't
25 | be able to guard the float conversion based on the module type.
26 | '''
27 | if isinstance(module, nn.modules.batchnorm._BatchNorm):
28 | module.float()
29 | for child in module.children():
30 | batchnorm_to_fp32(child)
31 | return module
32 |
33 | def copy_model_to_fp32(m, optim):
34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters
35 | """
36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()]
37 | optim_groups = [group['params'] for group in optim.param_groups]
38 | iter_fp32_params = iter(fp32_params)
39 | for group_params in optim_groups:
40 | for i in range(len(group_params)):
41 | fp32_param = next(iter_fp32_params)
42 | fp32_param.requires_grad = group_params[i].requires_grad
43 | group_params[i] = fp32_param
44 | return fp32_params
45 |
46 | def copy_fp32_to_model(m, fp32_params):
47 | m_params = list(m.parameters())
48 | for fp32_param, m_param in zip(fp32_params, m_params):
49 | m_param.data.copy_(fp32_param.data)
50 |
51 | def update_fp32_grads(fp32_params, m):
52 | m_params = list(m.parameters())
53 | for fp32_param, m_param in zip(fp32_params, m_params):
54 | if fp32_param.grad is None:
55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size()))
56 | fp32_param.grad.data.copy_(m_param.grad.data)
57 |
58 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/images/industrial_fishing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/fastai/images/industrial_fishing.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/imports.py:
--------------------------------------------------------------------------------
1 | from IPython.lib.deepreload import reload as dreload
2 | import os, numpy as np, math, json, bcolz, scipy, cv2
3 | import pandas as pd, pickle, string, sys
4 | import matplotlib
5 | import contextlib
6 | from abc import abstractmethod
7 | from functools import partial
8 | from isoweek import Week
9 | from pandas_summary import DataFrameSummary
10 | from sklearn import metrics, ensemble, preprocessing
11 | from pathlib import Path
12 | from distutils.version import LooseVersion
13 |
14 | from matplotlib import pyplot as plt, rcParams, animation
15 |
16 | matplotlib.rc('animation', html='html5')
17 | np.set_printoptions(precision=5, linewidth=110, suppress=True)
18 |
19 | from ipykernel.kernelapp import IPKernelApp
20 | def in_notebook(): return IPKernelApp.initialized()
21 |
22 | def in_ipynb():
23 | try:
24 | cls = get_ipython().__class__.__name__
25 | return cls == 'ZMQInteractiveShell'
26 | except NameError:
27 | return False
28 |
29 | import tqdm as tq
30 | from tqdm import tqdm_notebook, tnrange
31 |
32 | def clear_tqdm():
33 | inst = getattr(tq.tqdm, '_instances', None)
34 | if not inst: return
35 | try:
36 | for i in range(len(inst)): inst.pop().close()
37 | except Exception:
38 | pass
39 |
40 | if in_notebook():
41 | def tqdm(*args, **kwargs):
42 | clear_tqdm()
43 | return tq.tqdm(*args, file=sys.stdout, **kwargs)
44 | def trange(*args, **kwargs):
45 | clear_tqdm()
46 | return tq.trange(*args, file=sys.stdout, **kwargs)
47 | else:
48 | from tqdm import tqdm, trange
49 | tnrange=trange
50 | tqdm_notebook=tqdm
51 |
52 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/initializers.py:
--------------------------------------------------------------------------------
1 | def cond_init(m, init_fn):
2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)):
3 | if hasattr(m, 'weight'): init_fn(m.weight)
4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.)
5 |
6 | def apply_init(m, init_fn):
7 | m.apply(lambda x: cond_init(x, init_fn))
8 |
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/io.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlretrieve
2 |
3 | from tqdm import tqdm
4 |
5 |
6 | class TqdmUpTo(tqdm):
7 | def update_to(self, b=1, bsize=1, tsize=None):
8 | if tsize is not None: self.total = tsize
9 | self.update(b * bsize - self.n)
10 |
11 | def get_data(url, filename):
12 | if not os.path.exists(filename):
13 |
14 | dirname = os.path.dirname(filename)
15 | if not os.path.exists(dirname):
16 | os.makedirs(dirname)
17 |
18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
19 | urlretrieve(url, filename, reporthook=t.update_to)
20 |
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/layer_optimizer.py:
--------------------------------------------------------------------------------
1 | from .core import *
2 |
3 | def opt_params(parm, lr, wd):
4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd}
5 |
6 | class LayerOptimizer():
7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None):
8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups]
9 | if not isinstance(lrs, Iterable): lrs=[lrs]
10 | if len(lrs)==1: lrs=lrs*len(layer_groups)
11 | if wds is None: wds=0.
12 | if not isinstance(wds, Iterable): wds=[wds]
13 | if len(wds)==1: wds=wds*len(layer_groups)
14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds
15 | self.opt = opt_fn(self.opt_params())
16 |
17 | def opt_params(self):
18 | assert(len(self.layer_groups) == len(self.lrs))
19 | assert(len(self.layer_groups) == len(self.wds))
20 | params = list(zip(self.layer_groups,self.lrs,self.wds))
21 | return [opt_params(*p) for p in params]
22 |
23 | @property
24 | def lr(self): return self.lrs[-1]
25 |
26 | @property
27 | def mom(self):
28 | if 'betas' in self.opt.param_groups[0]:
29 | return self.opt.param_groups[0]['betas'][0]
30 | else:
31 | return self.opt.param_groups[0]['momentum']
32 |
33 | def set_lrs(self, lrs):
34 | if not isinstance(lrs, Iterable): lrs=[lrs]
35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups)
36 | set_lrs(self.opt, lrs)
37 | self.lrs=lrs
38 |
39 | def set_wds(self, wds):
40 | if not isinstance(wds, Iterable): wds=[wds]
41 | if len(wds)==1: wds=wds*len(self.layer_groups)
42 | set_wds(self.opt, wds)
43 | self.wds=wds
44 |
45 | def set_mom(self,momentum):
46 | if 'betas' in self.opt.param_groups[0]:
47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1])
48 | else:
49 | for pg in self.opt.param_groups: pg['momentum'] = momentum
50 |
51 | def set_beta(self,beta):
52 | if 'betas' in self.opt.param_groups[0]:
53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta)
54 | elif 'alpha' in self.opt.param_groups[0]:
55 | for pg in self.opt.param_groups: pg['alpha'] = beta
56 |
57 | def set_opt_fn(self, opt_fn):
58 | if type(self.opt) != type(opt_fn(self.opt_params())):
59 | self.opt = opt_fn(self.opt_params())
60 |
61 | def zip_strict_(l, r):
62 | assert(len(l) == len(r))
63 | return zip(l, r)
64 |
65 | def set_lrs(opt, lrs):
66 | if not isinstance(lrs, Iterable): lrs=[lrs]
67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups)
68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr
69 |
70 | def set_wds(opt, wds):
71 | if not isinstance(wds, Iterable): wds=[wds]
72 | if len(wds)==1: wds=wds*len(opt.param_groups)
73 | assert(len(opt.param_groups) == len(wds))
74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd
75 |
76 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/layers.py:
--------------------------------------------------------------------------------
1 | class AdaptiveConcatPool2d(nn.Module):
2 | def __init__(self, sz=None):
3 | super().__init__()
4 | sz = sz or (1,1)
5 | self.ap = nn.AdaptiveAvgPool2d(sz)
6 | self.mp = nn.AdaptiveMaxPool2d(sz)
7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
8 |
9 | class Lambda(nn.Module):
10 | def __init__(self, f): super().__init__(); self.f=f
11 | def forward(self, x): return self.f(x)
12 |
13 | class Flatten(nn.Module):
14 | def __init__(self): super().__init__()
15 | def forward(self, x): return x.view(x.size(0), -1)
16 |
17 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/losses.py:
--------------------------------------------------------------------------------
1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9):
2 | y_pred = (y_pred.float() > threshold).float()
3 | y_true = y_true.float()
4 | tp = (y_pred * y_true).sum(dim=1)
5 | precision = tp / (y_pred.sum(dim=1)+eps)
6 | recall = tp / (y_true.sum(dim=1)+eps)
7 | return torch.mean(
8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2))
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/metrics.py:
--------------------------------------------------------------------------------
1 | from .imports import *
2 |
3 |
4 | def accuracy_np(preds, targs):
5 | preds = np.argmax(preds, 1)
6 | return (preds==targs).mean()
7 |
8 | def accuracy(preds, targs):
9 | preds = torch.max(preds, dim=1)[1]
10 | return (preds==targs).float().mean()
11 |
12 | def accuracy_thresh(thresh):
13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh)
14 |
15 | def accuracy_multi(preds, targs, thresh):
16 | return ((preds>thresh).float()==targs).float().mean()
17 |
18 | def accuracy_multi_np(preds, targs, thresh):
19 | return ((preds>thresh)==targs).mean()
20 |
21 | def recall(preds, targs, thresh=0.5):
22 | pred_pos = preds > thresh
23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
24 | return tpos.sum()/targs.sum()
25 |
26 | def precision(preds, targs, thresh=0.5):
27 | pred_pos = preds > thresh
28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
29 | return tpos.sum()/pred_pos.sum()
30 |
31 | def fbeta(preds, targs, beta, thresh=0.5):
32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall).
33 | This is the micro averaged version where the true positives, false negatives and
34 | false positives are calculated globally (as opposed to on a per label basis).
35 |
36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and
37 | beta > 1 favors recall.
38 | """
39 | assert beta > 0, 'beta needs to be greater than 0'
40 | beta2 = beta ** 2
41 | rec = recall(preds, targs, thresh)
42 | prec = precision(preds, targs, thresh)
43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec)
44 |
45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh)
46 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.tar
3 | checkpoint*
4 | log*
5 | wgts/
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/cifar10/main.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python main.py --lr=0.1
4 | python main.py --resume --lr=0.01
5 | python main.py --resume --lr=0.001
6 |
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/cifar10/wideresnet.py:
--------------------------------------------------------------------------------
1 | # Cifar10 Wideresnet for Dawn Submission
2 |
3 | from ...layers import *
4 |
5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False)
6 |
7 | def bn(ni, init_zero=False):
8 | m = nn.BatchNorm2d(ni)
9 | m.weight.data.fill_(0 if init_zero else 1)
10 | m.bias.data.zero_()
11 | return m
12 |
13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False):
14 | bn_initzero = bn(ni, init_zero=init_zero)
15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride))
16 |
17 | def noop(x): return x
18 |
19 | class BasicBlock(nn.Module):
20 | def __init__(self, ni, nf, stride, drop_p=0.0):
21 | super().__init__()
22 | self.bn = nn.BatchNorm2d(ni)
23 | self.conv1 = conv_2d(ni, nf, 3, stride)
24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1)
25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None
26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop
27 |
28 | def forward(self, x):
29 | x2 = F.relu(self.bn(x), inplace=True)
30 | r = self.shortcut(x2)
31 | x = self.conv1(x2)
32 | if self.drop: x = self.drop(x)
33 | x = self.conv2(x) * 0.2
34 | return x.add_(r)
35 |
36 |
37 | def _make_group(N, ni, nf, block, stride, drop_p):
38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)]
39 |
40 | class WideResNet(nn.Module):
41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16):
42 | super().__init__()
43 | n_channels = [start_nf]
44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k)
45 |
46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1
47 | for i in range(num_groups):
48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p)
49 |
50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1),
51 | Flatten(), nn.Linear(n_channels[3], num_classes)]
52 | self.features = nn.Sequential(*layers)
53 |
54 | def forward(self, x): return self.features(x)
55 |
56 |
57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.)
58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.)
59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.)
60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2)
61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.)
62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.)
63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2)
64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2)
65 |
66 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/darknet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .layers import *
4 | from .layers import *
5 |
6 |
7 | class ConvBN(nn.Module):
8 | "convolutional layer then batchnorm"
9 |
10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0):
11 | super().__init__()
12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
14 | self.relu = nn.LeakyReLU(0.1, inplace=True)
15 |
16 | def forward(self, x): return self.relu(self.bn(self.conv(x)))
17 |
18 | class DarknetBlock(nn.Module):
19 | def __init__(self, ch_in):
20 | super().__init__()
21 | ch_hid = ch_in//2
22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, x): return self.conv2(self.conv1(x)) + x
26 |
27 | class Darknet(nn.Module):
28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)"
29 |
30 | def make_group_layer(self, ch_in, num_blocks, stride=1):
31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
33 | return layers
34 |
35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32):
36 | super().__init__()
37 | nf = start_nf
38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
39 | for i,nb in enumerate(num_blocks):
40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
41 | nf *= 2
42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
43 | self.layers = nn.Sequential(*layers)
44 |
45 | def forward(self, x): return self.layers(x)
46 |
47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes)
48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes)
49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24)
50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16)
51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes)
52 |
53 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/rnn_train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/set_spawn.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import set_start_method
2 | set_start_method('spawn')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/fastai/transforms_pil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h)
30 | x = np.random.randint(w)
31 |
32 | y1 = np.clip(y - self.length / 2, 0, h)
33 | y2 = np.clip(y + self.length / 2, 0, h)
34 | x1 = np.clip(x - self.length / 2, 0, w)
35 | x2 = np.clip(x + self.length / 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/create_toks.py:
--------------------------------------------------------------------------------
1 | import html
2 |
3 | import fire
4 | from fastai.text import *
5 |
6 | BOS = 'xbos' # beginning-of-sentence tag
7 | FLD = 'xfld' # data field tag
8 |
9 | re1 = re.compile(r' +')
10 |
11 |
12 | def fixup(x):
13 | x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace(
14 | 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace(
15 | '
', "\n").replace('\\"', '"').replace('','u_n').replace(' @.@ ','.').replace(
16 | ' @-@ ','-').replace('\\', ' \\ ')
17 | return re1.sub(' ', html.unescape(x))
18 |
19 |
20 | def get_texts(df, n_lbls):
21 | if len(df.columns) == 1:
22 | labels = []
23 | texts = f'\n{BOS} {FLD} 1 ' + df[0].astype(str)
24 | texts = texts.apply(fixup).values.astype(str)
25 | else:
26 | labels = df.iloc[:,range(n_lbls)].values.astype(np.int64)
27 | texts = f'\n{BOS} {FLD} 1 ' + df[n_lbls].astype(str)
28 | for i in range(n_lbls+1, len(df.columns)): texts += f' {FLD} {i-n_lbls} ' + df[i].astype(str)
29 | texts = texts.apply(fixup).values.astype(str)
30 |
31 | tok = Tokenizer().proc_all_mp(partition_by_cores(texts))
32 | return tok, list(labels)
33 |
34 |
35 | def get_all(df, n_lbls):
36 | tok, labels = [], []
37 | for i, r in enumerate(df):
38 | print(i)
39 | tok_, labels_ = get_texts(r, n_lbls)
40 | tok += tok_;
41 | labels += labels_
42 | return tok, labels
43 |
44 |
45 | def create_toks(prefix, pr_abbr, chunksize=24000, n_lbls=1):
46 | PATH = f'data/nlp_clas/{prefix}/'
47 |
48 | df_trn = pd.read_csv(f'{PATH}train.csv', header=None, chunksize=chunksize)
49 | df_val = pd.read_csv(f'{PATH}test.csv', header=None, chunksize=chunksize)
50 | print(prefix)
51 |
52 | os.makedirs(f'{PATH}tmp', exist_ok=True)
53 | tok_trn, trn_labels = get_all(df_trn, n_lbls)
54 | tok_val, val_labels = get_all(df_val, n_lbls)
55 |
56 | np.save(f'{PATH}tmp/tok_trn.npy', tok_trn)
57 | np.save(f'{PATH}tmp/tok_val.npy', tok_val)
58 | np.save(f'{PATH}tmp/lbl_trn.npy', trn_labels)
59 | np.save(f'{PATH}tmp/lbl_val.npy', val_labels)
60 |
61 | trn_joined = [' '.join(o) for o in tok_trn]
62 | mdl_fn = f'{PATH}tmp/{pr_abbr}_joined.txt'
63 | open(mdl_fn, 'w', encoding='utf-8').writelines(trn_joined)
64 |
65 |
66 | if __name__ == '__main__': fire.Fire(create_toks)
67 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/tok2id.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from fastai.text import *
3 |
4 |
5 | def tok2id(prefix, max_vocab=60000, min_freq=1):
6 | print(f'prefix {prefix} max_vocab {max_vocab} min_freq {min_freq}')
7 | PATH=f'data/nlp_clas/{prefix}/'
8 | trn_tok = np.load(f'{PATH}tmp/tok_trn.npy')
9 | val_tok = np.load(f'{PATH}tmp/tok_val.npy')
10 |
11 | freq = Counter(p for o in trn_tok for p in o)
12 | print(freq.most_common(25))
13 | itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq]
14 | itos.insert(0, '_pad_')
15 | itos.insert(0, '_unk_')
16 | stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
17 | print(len(itos))
18 |
19 | trn_lm = np.array([[stoi[o] for o in p] for p in trn_tok])
20 | val_lm = np.array([[stoi[o] for o in p] for p in val_tok])
21 |
22 | np.save(f'{PATH}tmp/trn_ids.npy', trn_lm)
23 | np.save(f'{PATH}tmp/val_ids.npy', val_lm)
24 | pickle.dump(itos, open(f'{PATH}tmp/itos.pkl', 'wb'))
25 |
26 | if __name__ == '__main__': fire.Fire(tok2id)
27 |
28 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/train_tri_wt.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from fastai.learner import *
3 | from fastai.lm_rnn import *
4 | from fastai.rnn_reg import *
5 | from fastai.rnn_train import *
6 | from fastai.text import *
7 | from sampled_sm import *
8 |
9 |
10 | def train_lm(prefix, cuda_id, cl=1, bs=64, backwards=False, lr=3e-4, startat=0, sampled=True, preload=True):
11 | print(f'prefix {prefix}; cuda_id {cuda_id}; cl {cl}; bs {bs}; backwards {backwards} sampled {sampled} '
12 | f'lr {lr} startat {startat}')
13 | torch.cuda.set_device(cuda_id)
14 | PRE = 'bwd_' if backwards else 'fwd_'
15 | PRE2 = PRE
16 | PRE2 = 'bwd_'
17 | IDS = 'ids'
18 | NLPPATH=Path('data/nlp_clas')
19 | PATH=NLPPATH / prefix
20 | PATH2=NLPPATH / 'wikitext-103_2'
21 | bptt=70
22 | em_sz,nh,nl = 400,1150,3
23 | opt_fn = partial(optim.Adam, betas=(0.8, 0.99))
24 |
25 | if backwards:
26 | trn_lm = np.load(PATH / f'tmp/trn_{IDS}_bwd.npy')
27 | val_lm = np.load(PATH / f'tmp/val_{IDS}_bwd.npy')
28 | else:
29 | trn_lm = np.load(PATH / f'tmp/trn_{IDS}.npy')
30 | val_lm = np.load(PATH / f'tmp/val_{IDS}.npy')
31 | trn_lm = np.concatenate(trn_lm)
32 | val_lm = np.concatenate(val_lm)
33 |
34 | itos = pickle.load(open(PATH / 'tmp/itos.pkl', 'rb'))
35 | vs = len(itos)
36 |
37 | trn_dl = LanguageModelLoader(trn_lm, bs, bptt)
38 | val_dl = LanguageModelLoader(val_lm, bs//5 if sampled else bs, bptt)
39 | md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt)
40 |
41 | tprs = get_prs(trn_lm, vs)
42 | drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.5
43 | learner,crit = get_learner(drops, 15000, sampled, md, em_sz, nh, nl, opt_fn, tprs)
44 | wd=1e-7
45 | learner.metrics = [accuracy]
46 |
47 | if (startat<1) and preload:
48 | wgts = torch.load(PATH2 / f'models/{PRE2}lm_3.h5', map_location=lambda storage, loc: storage)
49 | ew = to_np(wgts['0.encoder.weight'])
50 | row_m = ew.mean(0)
51 |
52 | itos2 = pickle.load(open(PATH2 / 'tmp/itos.pkl', 'rb'))
53 | stoi2 = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos2)})
54 | nw = np.zeros((vs, em_sz), dtype=np.float32)
55 | for i,w in enumerate(itos):
56 | r = stoi2[w]
57 | nw[i] = ew[r] if r>=0 else row_m
58 |
59 | wgts['0.encoder.weight'] = T(nw)
60 | wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(nw))
61 | wgts['1.decoder.weight'] = T(np.copy(nw))
62 | learner.model.load_state_dict(wgts)
63 | elif startat==1: learner.load(f'{PRE}lm_4')
64 | learner.metrics = [accuracy]
65 |
66 | lrs = np.array([lr/6,lr/3,lr,lr])
67 | #lrs=lr
68 |
69 | learner.unfreeze()
70 | learner.fit(lrs, 1, wds=wd, use_clr=(32,10), cycle_len=cl)
71 | learner.save(f'{PRE}lm_4')
72 | learner.save_encoder(f'{PRE}lm_4_enc')
73 |
74 | if __name__ == '__main__': fire.Fire(train_lm)
75 |
76 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/lsun_scripts/lsun-data.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import argparse
4 | import os
5 | from os.path import exists, join
6 |
7 | import lmdb
8 | from tqdm import tqdm
9 |
10 | __author__ = 'Fisher Yu'
11 | __email__ = 'fy@cs.princeton.edu'
12 | __license__ = 'MIT'
13 | # (Minor edits by Jeremy Howard)
14 |
15 |
16 | def export_images(db_path, out_dir, flat=False):
17 | print('Exporting', db_path, 'to', out_dir)
18 | env = lmdb.open(db_path, map_size=1099511627776,
19 | max_readers=100, readonly=True)
20 | with env.begin(write=False) as txn:
21 | cursor = txn.cursor()
22 | for key, val in tqdm(cursor):
23 | key = key.decode()
24 | if not flat: image_out_dir = join(out_dir, '/'.join(key[:3]))
25 | else: image_out_dir = out_dir
26 | if not exists(image_out_dir): os.makedirs(image_out_dir)
27 | image_out_path = join(image_out_dir, key + '.jpg')
28 | with open(image_out_path, 'wb') as fp: fp.write(val)
29 |
30 |
31 | def main():
32 | parser = argparse.ArgumentParser()
33 | parser.add_argument('lmdb_path', nargs='+', type=str,
34 | help='The path to the lmdb database folder. '
35 | 'Support multiple database paths.')
36 | parser.add_argument('--out_dir', type=str, default='')
37 | parser.add_argument('--flat', action='store_true',
38 | help='If enabled, the images are imported into output '
39 | 'directory directly instead of hierarchical '
40 | 'directories.')
41 | args = parser.parse_args()
42 | lmdb_paths = args.lmdb_path
43 | for lmdb_path in lmdb_paths: export_images(lmdb_path, args.out_dir, args.flat)
44 |
45 |
46 | if __name__ == '__main__': main()
47 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/lsun_scripts/lsun-download.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from __future__ import print_function, division
5 |
6 | import argparse
7 | import json
8 | import subprocess
9 | from os.path import join
10 |
11 | from six.moves.urllib.request import urlopen
12 |
13 | __author__ = 'Fisher Yu'
14 | __email__ = 'fy@cs.princeton.edu'
15 | __license__ = 'MIT'
16 |
17 |
18 | def list_categories(tag):
19 | url = 'http://lsun.cs.princeton.edu/htbin/list.cgi?tag=' + tag
20 | f = urlopen(url)
21 | return json.loads(f.read())
22 |
23 |
24 | def download(out_dir, category, set_name, tag):
25 | url = 'http://lsun.cs.princeton.edu/htbin/download.cgi?tag={tag}' \
26 | '&category={category}&set={set_name}'.format(**locals())
27 | if set_name == 'test':
28 | out_name = 'test_lmdb.zip'
29 | else:
30 | out_name = '{category}_{set_name}_lmdb.zip'.format(**locals())
31 | out_path = join(out_dir, out_name)
32 | cmd = ['curl', url, '-o', out_path]
33 | print('Downloading', category, set_name, 'set')
34 | subprocess.call(cmd)
35 |
36 |
37 | def main():
38 | parser = argparse.ArgumentParser()
39 | parser.add_argument('--tag', type=str, default='latest')
40 | parser.add_argument('-o', '--out_dir', default='')
41 | parser.add_argument('-c', '--category', default=None)
42 | args = parser.parse_args()
43 |
44 | categories = list_categories(args.tag)
45 | if args.category is None:
46 | print('Downloading', len(categories), 'categories')
47 | for category in categories:
48 | download(args.out_dir, category, 'train', args.tag)
49 | download(args.out_dir, category, 'val', args.tag)
50 | download(args.out_dir, '', 'test', args.tag)
51 | else:
52 | if args.category == 'test':
53 | download(args.out_dir, '', 'test', args.tag)
54 | elif args.category not in categories:
55 | print('Error:', args.category, "doesn't exist in",
56 | args.tag, 'LSUN release')
57 | else:
58 | download(args.out_dir, args.category, 'train', args.tag)
59 | download(args.out_dir, args.category, 'val', args.tag)
60 |
61 |
62 | if __name__ == '__main__':
63 | main()
64 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/ppt/lesson8.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/ppt/lesson8.pptx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/dl2/xl/dl-examples.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/xl/dl-examples.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/excel/naivebayes.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/excel/naivebayes.xlsx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | weights/
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/fastai/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/executors.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import time
4 | from concurrent.futures import ThreadPoolExecutor
5 |
6 |
7 | class LazyThreadPoolExecutor(ThreadPoolExecutor):
8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None):
9 | """
10 | Collects iterables lazily, rather than immediately.
11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor
12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707
13 | """
14 | if timeout is not None: end_time = timeout + time.time()
15 | if prefetch is None: prefetch = self._max_workers
16 | if prefetch < 0: raise ValueError("prefetch count may not be negative")
17 | argsiter = zip(*iterables)
18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch))
19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required.
20 | def result_iterator():
21 | nonlocal argsiter
22 | try:
23 | while fs:
24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time())
25 | # Got a result, future needn't be cancelled
26 | del fs[0]
27 | # Dispatch next task before yielding to keep pipeline full
28 | if argsiter:
29 | try:
30 | args = next(argsiter)
31 | except StopIteration:
32 | argsiter = None
33 | else:
34 | fs.append(self.submit(fn, *args))
35 | yield res
36 | finally:
37 | for future in fs: future.cancel()
38 | return result_iterator()
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/fp16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class FP16(nn.Module):
6 | def __init__(self, module):
7 | super(FP16, self).__init__()
8 | self.module = batchnorm_to_fp32(module.half())
9 |
10 | def forward(self, input):
11 | return self.module(input.half())
12 |
13 | def load_state_dict(self, *inputs, **kwargs):
14 | self.module.load_state_dict(*inputs, **kwargs)
15 |
16 | def state_dict(self, *inputs, **kwargs):
17 | return self.module.state_dict(*inputs, **kwargs)
18 |
19 | def batchnorm_to_fp32(module):
20 | '''
21 | BatchNorm layers to have parameters in single precision.
22 | Find all layers and convert them back to float. This can't
23 | be done with built in .apply as that function will apply
24 | fn to all modules, parameters, and buffers. Thus we wouldn't
25 | be able to guard the float conversion based on the module type.
26 | '''
27 | if isinstance(module, nn.modules.batchnorm._BatchNorm):
28 | module.float()
29 | for child in module.children():
30 | batchnorm_to_fp32(child)
31 | return module
32 |
33 | def copy_model_to_fp32(m, optim):
34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters
35 | """
36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()]
37 | optim_groups = [group['params'] for group in optim.param_groups]
38 | iter_fp32_params = iter(fp32_params)
39 | for group_params in optim_groups:
40 | for i in range(len(group_params)):
41 | fp32_param = next(iter_fp32_params)
42 | fp32_param.requires_grad = group_params[i].requires_grad
43 | group_params[i] = fp32_param
44 | return fp32_params
45 |
46 | def copy_fp32_to_model(m, fp32_params):
47 | m_params = list(m.parameters())
48 | for fp32_param, m_param in zip(fp32_params, m_params):
49 | m_param.data.copy_(fp32_param.data)
50 |
51 | def update_fp32_grads(fp32_params, m):
52 | m_params = list(m.parameters())
53 | for fp32_param, m_param in zip(fp32_params, m_params):
54 | if fp32_param.grad is None:
55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size()))
56 | fp32_param.grad.data.copy_(m_param.grad.data)
57 |
58 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/images/industrial_fishing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/fastai/images/industrial_fishing.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/imports.py:
--------------------------------------------------------------------------------
1 | import PIL, numpy as np, collections, bcolz, random, cv2
2 | import pandas as pd, pickle, string, sys, re, time, copy
3 | import matplotlib
4 | from pathlib import Path
5 |
6 | matplotlib.rc('animation', html='html5')
7 | np.set_printoptions(precision=5, linewidth=110, suppress=True)
8 |
9 | from ipykernel.kernelapp import IPKernelApp
10 | def in_notebook(): return IPKernelApp.initialized()
11 |
12 | def in_ipynb():
13 | try:
14 | cls = get_ipython().__class__.__name__
15 | return cls == 'ZMQInteractiveShell'
16 | except NameError:
17 | return False
18 |
19 | import tqdm as tq
20 |
21 |
22 | def clear_tqdm():
23 | inst = getattr(tq.tqdm, '_instances', None)
24 | if not inst: return
25 | try:
26 | for i in range(len(inst)): inst.pop().close()
27 | except Exception:
28 | pass
29 |
30 | if in_notebook():
31 | def tqdm(*args, **kwargs):
32 | clear_tqdm()
33 | return tq.tqdm(*args, file=sys.stdout, **kwargs)
34 | def trange(*args, **kwargs):
35 | clear_tqdm()
36 | return tq.trange(*args, file=sys.stdout, **kwargs)
37 | else:
38 | from tqdm import tqdm, trange
39 | tnrange=trange
40 | tqdm_notebook=tqdm
41 |
42 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/initializers.py:
--------------------------------------------------------------------------------
1 | def cond_init(m, init_fn):
2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)):
3 | if hasattr(m, 'weight'): init_fn(m.weight)
4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.)
5 |
6 | def apply_init(m, init_fn):
7 | m.apply(lambda x: cond_init(x, init_fn))
8 |
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/io.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlretrieve
2 |
3 | from tqdm import tqdm
4 |
5 |
6 | class TqdmUpTo(tqdm):
7 | def update_to(self, b=1, bsize=1, tsize=None):
8 | if tsize is not None: self.total = tsize
9 | self.update(b * bsize - self.n)
10 |
11 | def get_data(url, filename):
12 | if not os.path.exists(filename):
13 |
14 | dirname = os.path.dirname(filename)
15 | if not os.path.exists(dirname):
16 | os.makedirs(dirname)
17 |
18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
19 | urlretrieve(url, filename, reporthook=t.update_to)
20 |
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/layer_optimizer.py:
--------------------------------------------------------------------------------
1 | from .core import *
2 |
3 | def opt_params(parm, lr, wd):
4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd}
5 |
6 | class LayerOptimizer():
7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None):
8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups]
9 | if not isinstance(lrs, Iterable): lrs=[lrs]
10 | if len(lrs)==1: lrs=lrs*len(layer_groups)
11 | if wds is None: wds=0.
12 | if not isinstance(wds, Iterable): wds=[wds]
13 | if len(wds)==1: wds=wds*len(layer_groups)
14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds
15 | self.opt = opt_fn(self.opt_params())
16 |
17 | def opt_params(self):
18 | assert(len(self.layer_groups) == len(self.lrs))
19 | assert(len(self.layer_groups) == len(self.wds))
20 | params = list(zip(self.layer_groups,self.lrs,self.wds))
21 | return [opt_params(*p) for p in params]
22 |
23 | @property
24 | def lr(self): return self.lrs[-1]
25 |
26 | @property
27 | def mom(self):
28 | if 'betas' in self.opt.param_groups[0]:
29 | return self.opt.param_groups[0]['betas'][0]
30 | else:
31 | return self.opt.param_groups[0]['momentum']
32 |
33 | def set_lrs(self, lrs):
34 | if not isinstance(lrs, Iterable): lrs=[lrs]
35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups)
36 | set_lrs(self.opt, lrs)
37 | self.lrs=lrs
38 |
39 | def set_wds(self, wds):
40 | if not isinstance(wds, Iterable): wds=[wds]
41 | if len(wds)==1: wds=wds*len(self.layer_groups)
42 | set_wds(self.opt, wds)
43 | self.wds=wds
44 |
45 | def set_mom(self,momentum):
46 | if 'betas' in self.opt.param_groups[0]:
47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1])
48 | else:
49 | for pg in self.opt.param_groups: pg['momentum'] = momentum
50 |
51 | def set_beta(self,beta):
52 | if 'betas' in self.opt.param_groups[0]:
53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta)
54 | elif 'alpha' in self.opt.param_groups[0]:
55 | for pg in self.opt.param_groups: pg['alpha'] = beta
56 |
57 | def set_opt_fn(self, opt_fn):
58 | if type(self.opt) != type(opt_fn(self.opt_params())):
59 | self.opt = opt_fn(self.opt_params())
60 |
61 | def zip_strict_(l, r):
62 | assert(len(l) == len(r))
63 | return zip(l, r)
64 |
65 | def set_lrs(opt, lrs):
66 | if not isinstance(lrs, Iterable): lrs=[lrs]
67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups)
68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr
69 |
70 | def set_wds(opt, wds):
71 | if not isinstance(wds, Iterable): wds=[wds]
72 | if len(wds)==1: wds=wds*len(opt.param_groups)
73 | assert(len(opt.param_groups) == len(wds))
74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd
75 |
76 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/layers.py:
--------------------------------------------------------------------------------
1 | class AdaptiveConcatPool2d(nn.Module):
2 | def __init__(self, sz=None):
3 | super().__init__()
4 | sz = sz or (1,1)
5 | self.ap = nn.AdaptiveAvgPool2d(sz)
6 | self.mp = nn.AdaptiveMaxPool2d(sz)
7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
8 |
9 | class Lambda(nn.Module):
10 | def __init__(self, f): super().__init__(); self.f=f
11 | def forward(self, x): return self.f(x)
12 |
13 | class Flatten(nn.Module):
14 | def __init__(self): super().__init__()
15 | def forward(self, x): return x.view(x.size(0), -1)
16 |
17 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/losses.py:
--------------------------------------------------------------------------------
1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9):
2 | y_pred = (y_pred.float() > threshold).float()
3 | y_true = y_true.float()
4 | tp = (y_pred * y_true).sum(dim=1)
5 | precision = tp / (y_pred.sum(dim=1)+eps)
6 | recall = tp / (y_true.sum(dim=1)+eps)
7 | return torch.mean(
8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2))
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/metrics.py:
--------------------------------------------------------------------------------
1 | from .imports import *
2 |
3 |
4 | def accuracy_np(preds, targs):
5 | preds = np.argmax(preds, 1)
6 | return (preds==targs).mean()
7 |
8 | def accuracy(preds, targs):
9 | preds = torch.max(preds, dim=1)[1]
10 | return (preds==targs).float().mean()
11 |
12 | def accuracy_thresh(thresh):
13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh)
14 |
15 | def accuracy_multi(preds, targs, thresh):
16 | return ((preds>thresh).float()==targs).float().mean()
17 |
18 | def accuracy_multi_np(preds, targs, thresh):
19 | return ((preds>thresh)==targs).mean()
20 |
21 | def recall(preds, targs, thresh=0.5):
22 | pred_pos = preds > thresh
23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
24 | return tpos.sum()/targs.sum()
25 |
26 | def precision(preds, targs, thresh=0.5):
27 | pred_pos = preds > thresh
28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
29 | return tpos.sum()/pred_pos.sum()
30 |
31 | def fbeta(preds, targs, beta, thresh=0.5):
32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall).
33 | This is the micro averaged version where the true positives, false negatives and
34 | false positives are calculated globally (as opposed to on a per label basis).
35 |
36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and
37 | beta > 1 favors recall.
38 | """
39 | assert beta > 0, 'beta needs to be greater than 0'
40 | beta2 = beta ** 2
41 | rec = recall(preds, targs, thresh)
42 | prec = precision(preds, targs, thresh)
43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec)
44 |
45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh)
46 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.tar
3 | checkpoint*
4 | log*
5 | wgts/
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/cifar10/main.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python main.py --lr=0.1
4 | python main.py --resume --lr=0.01
5 | python main.py --resume --lr=0.001
6 |
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/cifar10/wideresnet.py:
--------------------------------------------------------------------------------
1 | # Cifar10 Wideresnet for Dawn Submission
2 |
3 | from ...layers import *
4 |
5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False)
6 |
7 | def bn(ni, init_zero=False):
8 | m = nn.BatchNorm2d(ni)
9 | m.weight.data.fill_(0 if init_zero else 1)
10 | m.bias.data.zero_()
11 | return m
12 |
13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False):
14 | bn_initzero = bn(ni, init_zero=init_zero)
15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride))
16 |
17 | def noop(x): return x
18 |
19 | class BasicBlock(nn.Module):
20 | def __init__(self, ni, nf, stride, drop_p=0.0):
21 | super().__init__()
22 | self.bn = nn.BatchNorm2d(ni)
23 | self.conv1 = conv_2d(ni, nf, 3, stride)
24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1)
25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None
26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop
27 |
28 | def forward(self, x):
29 | x2 = F.relu(self.bn(x), inplace=True)
30 | r = self.shortcut(x2)
31 | x = self.conv1(x2)
32 | if self.drop: x = self.drop(x)
33 | x = self.conv2(x) * 0.2
34 | return x.add_(r)
35 |
36 |
37 | def _make_group(N, ni, nf, block, stride, drop_p):
38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)]
39 |
40 | class WideResNet(nn.Module):
41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16):
42 | super().__init__()
43 | n_channels = [start_nf]
44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k)
45 |
46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1
47 | for i in range(num_groups):
48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p)
49 |
50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1),
51 | Flatten(), nn.Linear(n_channels[3], num_classes)]
52 | self.features = nn.Sequential(*layers)
53 |
54 | def forward(self, x): return self.features(x)
55 |
56 |
57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.)
58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.)
59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.)
60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2)
61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.)
62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.)
63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2)
64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2)
65 |
66 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/darknet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .layers import *
4 | from .layers import *
5 |
6 |
7 | class ConvBN(nn.Module):
8 | "convolutional layer then batchnorm"
9 |
10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0):
11 | super().__init__()
12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
14 | self.relu = nn.LeakyReLU(0.1, inplace=True)
15 |
16 | def forward(self, x): return self.relu(self.bn(self.conv(x)))
17 |
18 | class DarknetBlock(nn.Module):
19 | def __init__(self, ch_in):
20 | super().__init__()
21 | ch_hid = ch_in//2
22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, x): return self.conv2(self.conv1(x)) + x
26 |
27 | class Darknet(nn.Module):
28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)"
29 |
30 | def make_group_layer(self, ch_in, num_blocks, stride=1):
31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
33 | return layers
34 |
35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32):
36 | super().__init__()
37 | nf = start_nf
38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
39 | for i,nb in enumerate(num_blocks):
40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
41 | nf *= 2
42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
43 | self.layers = nn.Sequential(*layers)
44 |
45 | def forward(self, x): return self.layers(x)
46 |
47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes)
48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes)
49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24)
50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16)
51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes)
52 |
53 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/rnn_train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/set_spawn.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import set_start_method
2 | set_start_method('spawn')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/fastai/transforms_pil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h)
30 | x = np.random.randint(w)
31 |
32 | y1 = np.clip(y - self.length / 2, 0, h)
33 | y2 = np.clip(y + self.length / 2, 0, h)
34 | x1 = np.clip(x - self.length / 2, 0, w)
35 | x2 = np.clip(x + self.length / 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/digit.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/digit.gif
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/ethics_recidivism.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/ethics_recidivism.jpg
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/mnist.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/overfitting2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/sgd2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/sgd2.gif
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/what_is_pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/what_is_pytorch.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler1.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler3.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler4.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/ppt/2017-12-ethics.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/ppt/2017-12-ethics.pptx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/courses/ml1/ppt/ml_applications.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/ppt/ml_applications.pptx
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/README.md:
--------------------------------------------------------------------------------
1 | # fastai doc project
2 |
3 | The fastai doc project is just getting underway! So now is a great time to get involved. Here are some thoughts and guidelines to help you get oriented...
4 |
5 | ## Project goals and approach
6 |
7 | The idea of this project is to create documentation that makes readers say "wow that's the most fantastic documentation I've ever read". So... no pressure. :) How do we do this? By taking the philosophies demonstrated in fast.ai's courses and bringing them to the world of documentation. Here are a few guidelines to consider:
8 |
9 | - Assume the reader is intelligent and interested
10 | - Don't assume the reader has any specific knowledge about the field you're documenting
11 | - If you need the reader to have some knowledge to understand your documentation, and there is some effective external resource they can learn from, point them there rather than trying to do it all yourself
12 | - Use code to describe what's going on where possible, not math
13 | - Create a notebook demonstrating the ideas you're documenting (include the notebook in this repo) and show examples from the notebook directly in your docs
14 | - Use a top-down approach; that is, first explain what problem the code is meant to solve, and at a high level how it solves it, and then go deeper into the details once those concepts are well understood
15 | - For common tasks, show full end-to-end examples of how to complete the task.
16 |
17 | Use pictures, tables, analogies, and other explanatory devices (even embedded video!) wherever they can help the reader understand. Use hyperlinks liberally, both within these docs and to external resources.
18 |
19 | We don't want this detailed documentation to create clutter in the code, and we also don't want to overwhelm the user when they just want a quick summary of what a method does. Therefore, docstrings should generally be limited to a single line. The python standard library is documented this way--for instance, the docstring for `re.compile()` is the single line "*Compile a regular expression pattern, returning a pattern object.*" But the full documentation of the `re` library on the python web site goes into detail about this method, how it's used, and its relation to other parts of the library.
20 |
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/docs/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/dataloader.adoc:
--------------------------------------------------------------------------------
1 | = fastai.dataloader
2 |
3 | == Introduction and overview
4 |
5 | *Note:* the fastai DataLoader has a similar API to the PyTorch DataLoader. Please see http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader[the PyTorch documentation] for usage and details. The documentation presented here focuses on the differences between the two classes.
6 |
7 | == {{class DataLoader,dataset,batch_size=1,shuffle=False,sampler=None,batch_sampler=None,pad_idx=0,num_workers=None,pin_memory=False,drop_last=False,pre_pad=True,half=False,transpose=False,transpose_y=False}}
8 |
9 | .Used to iterate through a dataset to pass data into a model for training.
10 |
11 | === {{arguments}}
12 |
13 | For information on arguments with no descriptions, please see http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader[the PyTorch documentation]
14 |
15 | {{arg dataset,Dataset}}
16 |
17 | {{arg batch_size,int,1}}
18 |
19 | {{arg shuffle,bool,False}}
20 |
21 | {{arg sampler,Sampler,None}}
22 |
23 | {{arg batch_sampler,BatchSampler,None}}
24 |
25 | {{arg pad_idx,int,0}}
26 | A padding index representing how many zeros to add to each batch. See: `pre_pad`
27 |
28 | {{arg num_workers,int,None}}
29 | Allows the user to manually set the number of workers. If left as `None`, it will default to the number of CPU cores the system has. If > 0, the dataloader will create `num_workers` number of jobs using `concurrent.futures.ThreadPoolExecutor`.
30 |
31 | {{arg pin_memory,bool,False}}
32 |
33 | {{arg drop_last,bool,False}}
34 |
35 | {{arg pre_pad,bool,True}}
36 | If `pad_idx` is non-zero, this determines if the zeros should go at the beginning of the batch, or at the end. By default, the zeros are added at the beginning of the batch.
37 |
38 | {{arg half,bool,False}}
39 | If `True`, `torch.cuda.HalfTensor()` will be used instead of `torch.FloatTensor()`.
40 |
41 | {{arg transpose,bool,False}}
42 | If `True`, each batch will have its inputs transposed.
43 |
44 | {{arg transpose_y,bool,False}}
45 | If `True`, each batch will have its outputs (labels) transposed.
46 |
47 | === {{methods}}
48 |
49 | {{method jag_stack,b}}
50 |
51 | Helper method for `np_collate()`. Returns a np.array of the batch passed in, with zeros added for any shorter rows inside the batch, plus extra zeros if `self.pad_idx > 0`. If all items inside the batch are the same length, no zero padding is added.
52 |
53 | {{method np_collate,batch}}
54 |
55 | Helper method for `get_batch()`. Based on the input data type, it creates an appropriate np.array, list, or dict. If the method is passed a string or list of strings, it simply returns the parameter without modification. Batches must contain numbers, strings, dicts, or lists, and this method also ensures this is the case.
56 |
57 | {{method get_batch,indices}}
58 |
59 | Helper method for `__iter__()`. When an iterator of the dataloader object is created, `get_batch()` is used to retrieve items from the dataset and apply transposes if needed based on `self.transpose` and `self.transpose_y`.
60 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/md_expander.py:
--------------------------------------------------------------------------------
1 | import re
2 | import sys
3 |
4 |
5 | def expand(filename):
6 |
7 | f = open(filename, "r")
8 | contents = f.read()
9 |
10 | regex_inside = r"\{\{(.*?)\}\}"
11 | regex_outside = r"(^|\}\})(.*?)(\{\{|$)"
12 |
13 | within = re.finditer(regex_inside, contents, re.MULTILINE | re.DOTALL)
14 | outside = re.finditer(regex_outside, contents, re.MULTILINE | re.DOTALL)
15 |
16 | for matchNum, match in enumerate(within):
17 | for groupNum in range(0, len(match.groups())):
18 | group = match.group(1)
19 | if group.startswith("class"):
20 | classname = re.search(r" (.*?),", group).groups()[0]
21 | params = re.search(r",(.*)", group).groups()[0]
22 | print('Class: ' + classname + '(' + params + '
')
23 |
24 | print (match.group(1))
25 |
26 | # split = re.split(regex_inside, contents)
27 | #
28 | # for i, item in enumerate(split):
29 |
30 |
31 |
32 | if __name__ == '__main__':
33 |
34 | expand(sys.argv[1])
35 |
36 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/module-decisions.md:
--------------------------------------------------------------------------------
1 | # Module Decisions
2 |
3 | ## Introduction
4 |
5 | There are many ways of doing one thing in programming. Instead of getting into debates about the one right way of doing things, in `fastai` library we would like to make decisions and then stick with them. This page is to list down any such decisions made.
6 |
7 | ### Image Data
8 | - Coordinates
9 | - Computer vision uses coordinates in format `(x, y)`. e.g. PIL
10 | - Maths uses `(y, x)`. e.g. Numpy, PyTorch
11 | - `fastai` will use `(y, x)`
12 | - Bounding Boxes
13 | - Will use `(coordinates top right, coordinates bottom right)` instead of `(coordinates top right, (height, width))`
14 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/templates.py:
--------------------------------------------------------------------------------
1 | HEADER = '''
2 | = fastai.{}
3 |
4 | == Introduction and overview
5 |
6 | ```
7 | ...example...
8 | ```
9 |
10 |
11 | '''
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/testing.adoc:
--------------------------------------------------------------------------------
1 | = Notes on fastai testing style
2 |
3 | We chose pytest as a framework since it's more modern, concise, and https://www.slant.co/topics/2621/~python-unit-testing-frameworks[recommended by coders].
4 |
5 | We also try to follow this suggestion from the http://docs.python-guide.org/en/latest/writing/tests/[python testing guide]:
6 |
7 | ____
8 | Use long and descriptive names for testing functions. The style guide here is slightly different than that of running code, where short names are often preferred. The reason is testing functions are never called explicitly. square() or even sqr() is ok in running code, but in testing code you would have names such as test_square_of_number_2(), test_square_negative_number(). These function names are displayed when a test fails, and should be as descriptive as possible.
9 | ____
10 |
11 | More generally, aim to write tests that also explain the code they are testing. A really good test suite can also serve as really good documentation.
12 |
13 | == Testing patterns
14 |
15 | * Do not use mock or fake objects. The library is nice enough that real versions of required objects can be used without prohibitive overhead.
16 | * Keep test methods small and tidy, just like any other code.
17 | * Aim to add a regression test as part of any bug fix PR.
18 | * Add tests before refactoring, so they can help prove correctness.
19 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/transforms-tmpl.adoc:
--------------------------------------------------------------------------------
1 | = fastai.transforms
2 |
3 | == Introduction and overview
4 |
5 | The fastai transforms pipeline for images is designed to convert your independent and dependent variables into a form ready to be batched by your DataLoader and passed to your model. It is most commonly used like this:
6 |
7 |
8 | ```
9 | ...example...
10 | ```
11 |
12 | The most common types of transforms are predefined in ...
13 |
14 | The most likely customizations you might need to do are ...
15 |
16 | You can create custom transform pipelines using an approach like: ...
17 |
18 | If you want to create a custom transform, you will need to : ...
19 |
20 | == {{class Transform,tfm_y=TfmType.NO}}
21 |
22 | .Abstract parent for all transforms.
23 |
24 | Override do_transform to implement transformation of a single object.
25 |
26 | === {{arguments}}
27 |
28 | {{arg tfm_y,TfmType,TfmType.NO}}
29 | Type of transform. For details, see {{xref TfmType}}.
30 |
31 | === {{methods}}
32 |
33 | {{method set_state,}}
34 |
35 | A transform may include a random component. If it does, it will often need to transform `y` using the same random values as `x` (e.g. a horizontal flip in segmentation must be applied to the mask as well). Therefore, this method is used to ensure all random state is calculated in one place.
36 |
37 | **NB:** Transformations are often run in multiple threads. Therefore any state must be stored in thread local storage. The `Transform` class provide a thread local `store` attribute for you to use. See {{xref RandomFlip}} for an example of how to use random state safely in `Transform` subclasses.
38 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/docs/transforms.adoc:
--------------------------------------------------------------------------------
1 | = fastai.transforms
2 | Jeremy Howard and contributors
3 | :toc:
4 |
5 | == Introduction and overview
6 |
7 | The fastai transforms pipeline for images is designed to convert your independent and dependent variables into a form ready to be batched by your DataLoader and passed to your model. It is most commonly used like this:
8 |
9 |
10 | ```
11 | ...example...
12 | ```
13 |
14 | The most common types of transforms are predefined in ...
15 |
16 | The most likely customizations you might need to do are ...
17 |
18 | You can create custom transform pipelines using an approach like: ...
19 |
20 | If you want to create a custom transform, you will need to : ...
21 |
22 | [[Transform]]
23 | == Class Transform [.small]#(tfm_y=TfmType.NO)#
24 |
25 | .Abstract parent for all transforms.
26 |
27 | Override do_transform to implement transformation of a single object.
28 |
29 | === Arguments
30 |
31 | tfm_y (type TfmType, default TfmType.NO)::
32 | Type of transform. For details, see xref:TfmType[TfmType]
33 |
34 | === Methods
35 |
36 | set_state::
37 | A transform may include a random component. If it does, it will often need to transform `y` using the same random values as `x` (e.g. a horizontal flip in segmentation must be applied to the mask as well). Therefore, this method is used to ensure all random state is calculated in one place.
38 | +
39 | **NB:** Transformations are often run in multiple threads. Therefore any state must be stored in thread local storage. The `Transform` class provide a thread local `store` attribute for you to use. See {{xref RandomFlip}} for an example of how to use random state safely in `Transform` subclasses.
40 |
41 | [[TfmType]]
42 | == Class TfmType:IntEnum
43 |
44 | .Type of transformation.
45 |
46 | NO:: the default, y does not get transformed when x is transformed.
47 | PIXEL:: x and y are images and should be transformed in the same way. _E.g.: image segmentation._
48 | COORD:: y are coordinates (i.e bounding boxes)
49 | CLASS:: y are class labels (same behaviour as PIXEL, except no normalization)
50 |
51 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/environment-cpu.yml:
--------------------------------------------------------------------------------
1 | name: fastai-cpu
2 | channels:
3 | - fastai
4 | - pytorch
5 | - defaults
6 | - peterjc123
7 | dependencies:
8 | - scipy
9 | - numpy
10 | - pillow
11 | - jpeg
12 | - spacy
13 | - zlib
14 | - freetype
15 | - libtiff
16 | - bleach
17 | - certifi
18 | - cffi
19 | - cycler
20 | - decorator
21 | - entrypoints
22 | - expat
23 | #- fontconfig
24 | #- glib
25 | - html5lib
26 | - icu
27 | - ipykernel
28 | - ipython
29 | - ipython_genutils
30 | - ipywidgets
31 | #- jbig
32 | - jedi
33 | - jinja2
34 | - jsonschema
35 | - jupyter
36 | - jupyter_client
37 | - jupyter_console
38 | - jupyter_core
39 | - conda-forge::jupyter_contrib_nbextensions
40 | #- libffi
41 | #- libgcc
42 | #- libgfortran
43 | - libiconv
44 | - libpng
45 | - libsodium
46 | - libxml2
47 | - markupsafe
48 | - matplotlib
49 | - mistune
50 | - mkl
51 | - nbconvert
52 | - nbformat
53 | - notebook
54 | - numpy
55 | - olefile
56 | - openssl
57 | - pandas
58 | - pandocfilters
59 | - path.py
60 | - patsy
61 | - pcre
62 | - pexpect
63 | - pickleshare
64 | - pillow
65 | - pip
66 | - prompt_toolkit
67 | #- ptyprocess
68 | - pycparser
69 | - pygments
70 | - pyparsing
71 | - pyqt
72 | - python>=3.6.0
73 | - python-dateutil
74 | - pytz
75 | - pyzmq
76 | - qt
77 | - qtconsole
78 | #- readline
79 | - scipy
80 | - seaborn
81 | - setuptools
82 | - simplegeneric
83 | - sip
84 | - six
85 | - sqlite
86 | - statsmodels
87 | #- terminado
88 | - testpath
89 | - tk
90 | - tornado<5
91 | - tqdm
92 | - traitlets
93 | - wcwidth
94 | - wheel
95 | - widgetsnbextension
96 | - xz
97 | - zeromq
98 | - pytorch<0.4
99 | - bcolz
100 | - prompt_toolkit
101 | - pytest
102 | - pip:
103 | - torchvision>=0.1.9
104 | - opencv-python
105 | - isoweek
106 | - pandas_summary
107 | - torchtext
108 | - graphviz
109 | - sklearn_pandas
110 | - feather-format
111 | - plotnine
112 | - kaggle-cli
113 | - ipywidgets
114 | # - git+https://github.com/SauceCat/PDPbox.git
115 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/environment-nopytorch.yml:
--------------------------------------------------------------------------------
1 | name: fastai
2 | channels:
3 | - fastai
4 | #- pytorch
5 | - defaults
6 | - peterjc123
7 | dependencies:
8 | - scipy
9 | #- cuda90
10 | #- cudnn
11 | - numpy
12 | - pillow
13 | - jpeg
14 | - spacy
15 | - zlib
16 | - freetype
17 | - libtiff
18 | - bleach
19 | - certifi
20 | - cffi
21 | - cycler
22 | - decorator
23 | - entrypoints
24 | - expat
25 | #- fontconfig
26 | #- glib
27 | - html5lib
28 | - icu
29 | - ipykernel
30 | - ipython
31 | - ipython_genutils
32 | - ipywidgets
33 | #- jbig
34 | - jedi
35 | - jinja2
36 | - jsonschema
37 | - jupyter
38 | - jupyter_client
39 | - jupyter_console
40 | - jupyter_core
41 | - conda-forge::jupyter_contrib_nbextensions
42 | #- libffi
43 | #- libgcc
44 | #- libgfortran
45 | - libiconv
46 | - libpng
47 | - libsodium
48 | - libxml2
49 | - markupsafe
50 | - matplotlib
51 | - mistune
52 | - mkl
53 | - nbconvert
54 | - nbformat
55 | - notebook
56 | - numpy
57 | - olefile
58 | - openssl
59 | - pandas
60 | - pandocfilters
61 | - path.py
62 | - patsy
63 | - pcre
64 | - pexpect
65 | - pickleshare
66 | - pillow
67 | - pip
68 | - prompt_toolkit
69 | #- ptyprocess
70 | - pycparser
71 | - pygments
72 | - pyparsing
73 | - pyqt
74 | - python>=3.6.0
75 | - python-dateutil
76 | - pytz
77 | - pyzmq
78 | - qt
79 | - qtconsole
80 | #- readline
81 | - scipy
82 | - seaborn
83 | - setuptools
84 | - simplegeneric
85 | - sip
86 | - six
87 | - sqlite
88 | - statsmodels
89 | #- terminado
90 | - testpath
91 | - tk
92 | - tornado<5
93 | - tqdm
94 | - traitlets
95 | - wcwidth
96 | - wheel
97 | - widgetsnbextension
98 | - xz
99 | - zeromq
100 | #- pytorch<0.4
101 | - bcolz
102 | - prompt_toolkit
103 | - pip:
104 | #- torchvision>=0.1.9
105 | - opencv-python
106 | - isoweek
107 | - pandas_summary
108 | #- torchtext
109 | - graphviz
110 | - sklearn_pandas
111 | - feather-format
112 | - plotnine
113 | - awscli
114 | - kaggle-cli
115 | - ipywidgets
116 | #- git+https://github.com/SauceCat/PDPbox.git
117 |
118 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/environment-old.yml:
--------------------------------------------------------------------------------
1 | name: fastai
2 | channels:
3 | - fastai
4 | - pytorch
5 | - defaults
6 | dependencies:
7 | - scipy
8 | - cuda90
9 | - numpy
10 | - pillow
11 | - jpeg
12 | - spacy
13 | - zlib
14 | - freetype
15 | - libtiff
16 | - bleach
17 | - certifi
18 | - cffi
19 | - cycler
20 | - decorator
21 | - entrypoints
22 | - expat
23 | - fontconfig
24 | - glib
25 | - html5lib
26 | - icu
27 | - ipykernel
28 | - ipython
29 | - ipython_genutils
30 | - ipywidgets
31 | - jbig
32 | - jedi
33 | - jinja2
34 | - jsonschema
35 | - jupyter
36 | - jupyter_client
37 | - jupyter_console
38 | - jupyter_core
39 | - conda-forge::jupyter_contrib_nbextensions
40 | - libffi
41 | - libgcc
42 | - libgfortran
43 | - libiconv
44 | - libpng
45 | - libsodium
46 | - libxml2
47 | - markupsafe
48 | - matplotlib
49 | - mistune
50 | - mkl
51 | - nbconvert
52 | - nbformat
53 | - notebook
54 | - numpy
55 | - olefile
56 | - openssl
57 | - pandas
58 | - pandocfilters
59 | - path.py
60 | - patsy
61 | - pcre
62 | - pexpect
63 | - pickleshare
64 | - pillow
65 | - pip
66 | - prompt_toolkit
67 | - ptyprocess
68 | - pycparser
69 | - pygments
70 | - pyparsing
71 | - pyqt
72 | - python>=3.6.0
73 | - python-dateutil
74 | - pytz
75 | - pyzmq
76 | - qt
77 | - qtconsole
78 | - readline
79 | - scipy
80 | - seaborn
81 | - setuptools
82 | - simplegeneric
83 | - sip
84 | - six
85 | - sqlite
86 | - statsmodels
87 | - terminado
88 | - testpath
89 | - tk
90 | - tornado<5
91 | - tqdm
92 | - traitlets
93 | - wcwidth
94 | - wheel
95 | - widgetsnbextension
96 | - xz
97 | - zeromq
98 | - pytorch>=0.2.0
99 | - torchvision>=0.1.9
100 | - bcolz
101 | - prompt_toolkit
102 | - pip:
103 | - opencv-python
104 | - isoweek
105 | - pandas_summary
106 | - torchtext
107 | - graphviz
108 | - sklearn_pandas
109 | - feather-format
110 | - plotnine
111 | - awscli
112 | - kaggle-cli
113 | - ipywidgets
114 | - git+https://github.com/SauceCat/PDPbox.git
115 |
116 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/environment.yml:
--------------------------------------------------------------------------------
1 | name: fastai
2 | channels:
3 | - fastai
4 | - pytorch
5 | - defaults
6 | - peterjc123
7 | dependencies:
8 | - scipy
9 | - cuda90
10 | - cudnn
11 | - numpy
12 | - pillow
13 | - jpeg
14 | - spacy
15 | - zlib
16 | - freetype
17 | - libtiff
18 | - bleach
19 | - certifi
20 | - cffi
21 | - cycler
22 | - decorator
23 | - entrypoints
24 | - expat
25 | #- fontconfig
26 | #- glib
27 | - html5lib
28 | - icu
29 | - ipykernel
30 | - ipython
31 | - ipython_genutils
32 | - ipywidgets
33 | #- jbig
34 | - jedi
35 | - jinja2
36 | - jsonschema
37 | - jupyter
38 | - jupyter_client
39 | - jupyter_console
40 | - jupyter_core
41 | - conda-forge::jupyter_contrib_nbextensions
42 | #- libffi
43 | #- libgcc
44 | #- libgfortran
45 | - libiconv
46 | - libpng
47 | - libsodium
48 | - libxml2
49 | - markupsafe
50 | - matplotlib
51 | - mistune
52 | - mkl
53 | - nbconvert
54 | - nbformat
55 | - notebook
56 | - numpy
57 | - olefile
58 | - openssl
59 | - pandas
60 | - pandocfilters
61 | - path.py
62 | - patsy
63 | - pcre
64 | - pexpect
65 | - pickleshare
66 | - pillow
67 | - pip
68 | - prompt_toolkit
69 | #- ptyprocess
70 | - pycparser
71 | - pygments
72 | - pyparsing
73 | - pyqt
74 | - python>=3.6.0
75 | - python-dateutil
76 | - pytz
77 | - pyzmq
78 | - qt
79 | - qtconsole
80 | #- readline
81 | - scipy
82 | - seaborn
83 | - setuptools
84 | - simplegeneric
85 | - sip
86 | - six
87 | - sqlite
88 | - statsmodels
89 | #- terminado
90 | - testpath
91 | - tk
92 | - tornado<5
93 | - tqdm
94 | - traitlets
95 | - wcwidth
96 | - wheel
97 | - widgetsnbextension
98 | - xz
99 | - zeromq
100 | - pytorch<0.4
101 | - bcolz
102 | - prompt_toolkit
103 | - pytest
104 | - pip:
105 | - torchvision>=0.1.9
106 | - opencv-python
107 | - isoweek
108 | - pandas_summary
109 | - torchtext
110 | - graphviz
111 | - sklearn_pandas
112 | - feather-format
113 | - plotnine
114 | - kaggle-cli
115 | - ipywidgets
116 | #- git+https://github.com/SauceCat/PDPbox.git
117 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | weights/
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/fastai/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/executors.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import time
4 | from concurrent.futures import ThreadPoolExecutor
5 |
6 |
7 | class LazyThreadPoolExecutor(ThreadPoolExecutor):
8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None):
9 | """
10 | Collects iterables lazily, rather than immediately.
11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor
12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707
13 | """
14 | if timeout is not None: end_time = timeout + time.time()
15 | if prefetch is None: prefetch = self._max_workers
16 | if prefetch < 0: raise ValueError("prefetch count may not be negative")
17 | argsiter = zip(*iterables)
18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch))
19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required.
20 | def result_iterator():
21 | nonlocal argsiter
22 | try:
23 | while fs:
24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time())
25 | # Got a result, future needn't be cancelled
26 | del fs[0]
27 | # Dispatch next task before yielding to keep pipeline full
28 | if argsiter:
29 | try:
30 | args = next(argsiter)
31 | except StopIteration:
32 | argsiter = None
33 | else:
34 | fs.append(self.submit(fn, *args))
35 | yield res
36 | finally:
37 | for future in fs: future.cancel()
38 | return result_iterator()
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/fp16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class FP16(nn.Module):
6 | def __init__(self, module):
7 | super(FP16, self).__init__()
8 | self.module = batchnorm_to_fp32(module.half())
9 |
10 | def forward(self, input):
11 | return self.module(input.half())
12 |
13 | def load_state_dict(self, *inputs, **kwargs):
14 | self.module.load_state_dict(*inputs, **kwargs)
15 |
16 | def state_dict(self, *inputs, **kwargs):
17 | return self.module.state_dict(*inputs, **kwargs)
18 |
19 | def batchnorm_to_fp32(module):
20 | '''
21 | BatchNorm layers to have parameters in single precision.
22 | Find all layers and convert them back to float. This can't
23 | be done with built in .apply as that function will apply
24 | fn to all modules, parameters, and buffers. Thus we wouldn't
25 | be able to guard the float conversion based on the module type.
26 | '''
27 | if isinstance(module, nn.modules.batchnorm._BatchNorm):
28 | module.float()
29 | for child in module.children():
30 | batchnorm_to_fp32(child)
31 | return module
32 |
33 | def copy_model_to_fp32(m, optim):
34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters
35 | """
36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()]
37 | optim_groups = [group['params'] for group in optim.param_groups]
38 | iter_fp32_params = iter(fp32_params)
39 | for group_params in optim_groups:
40 | for i in range(len(group_params)):
41 | fp32_param = next(iter_fp32_params)
42 | fp32_param.requires_grad = group_params[i].requires_grad
43 | group_params[i] = fp32_param
44 | return fp32_params
45 |
46 | def copy_fp32_to_model(m, fp32_params):
47 | m_params = list(m.parameters())
48 | for fp32_param, m_param in zip(fp32_params, m_params):
49 | m_param.data.copy_(fp32_param.data)
50 |
51 | def update_fp32_grads(fp32_params, m):
52 | m_params = list(m.parameters())
53 | for fp32_param, m_param in zip(fp32_params, m_params):
54 | if fp32_param.grad is None:
55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size()))
56 | fp32_param.grad.data.copy_(m_param.grad.data)
57 |
58 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/images/industrial_fishing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/fastai/images/industrial_fishing.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/imports.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 |
4 | import matplotlib
5 | import numpy as np
6 |
7 | matplotlib.rc('animation', html='html5')
8 | np.set_printoptions(precision=5, linewidth=110, suppress=True)
9 |
10 | from ipykernel.kernelapp import IPKernelApp
11 | def in_notebook(): return IPKernelApp.initialized()
12 |
13 | def in_ipynb():
14 | try:
15 | cls = get_ipython().__class__.__name__
16 | return cls == 'ZMQInteractiveShell'
17 | except NameError:
18 | return False
19 |
20 | import tqdm as tq
21 |
22 |
23 | def clear_tqdm():
24 | inst = getattr(tq.tqdm, '_instances', None)
25 | if not inst: return
26 | try:
27 | for i in range(len(inst)): inst.pop().close()
28 | except Exception:
29 | pass
30 |
31 | if in_notebook():
32 | def tqdm(*args, **kwargs):
33 | clear_tqdm()
34 | return tq.tqdm(*args, file=sys.stdout, **kwargs)
35 | def trange(*args, **kwargs):
36 | clear_tqdm()
37 | return tq.trange(*args, file=sys.stdout, **kwargs)
38 | else:
39 | from tqdm import tqdm, trange
40 | tnrange=trange
41 | tqdm_notebook=tqdm
42 |
43 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/initializers.py:
--------------------------------------------------------------------------------
1 | def cond_init(m, init_fn):
2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)):
3 | if hasattr(m, 'weight'): init_fn(m.weight)
4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.)
5 |
6 | def apply_init(m, init_fn):
7 | m.apply(lambda x: cond_init(x, init_fn))
8 |
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/io.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlretrieve
2 |
3 | from tqdm import tqdm
4 |
5 |
6 | class TqdmUpTo(tqdm):
7 | def update_to(self, b=1, bsize=1, tsize=None):
8 | if tsize is not None: self.total = tsize
9 | self.update(b * bsize - self.n)
10 |
11 | def get_data(url, filename):
12 | if not os.path.exists(filename):
13 |
14 | dirname = os.path.dirname(filename)
15 | if not os.path.exists(dirname):
16 | os.makedirs(dirname)
17 |
18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
19 | urlretrieve(url, filename, reporthook=t.update_to)
20 |
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/layer_optimizer.py:
--------------------------------------------------------------------------------
1 | from .core import *
2 |
3 | def opt_params(parm, lr, wd):
4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd}
5 |
6 | class LayerOptimizer():
7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None):
8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups]
9 | if not isinstance(lrs, Iterable): lrs=[lrs]
10 | if len(lrs)==1: lrs=lrs*len(layer_groups)
11 | if wds is None: wds=0.
12 | if not isinstance(wds, Iterable): wds=[wds]
13 | if len(wds)==1: wds=wds*len(layer_groups)
14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds
15 | self.opt = opt_fn(self.opt_params())
16 |
17 | def opt_params(self):
18 | assert(len(self.layer_groups) == len(self.lrs))
19 | assert(len(self.layer_groups) == len(self.wds))
20 | params = list(zip(self.layer_groups,self.lrs,self.wds))
21 | return [opt_params(*p) for p in params]
22 |
23 | @property
24 | def lr(self): return self.lrs[-1]
25 |
26 | @property
27 | def mom(self):
28 | if 'betas' in self.opt.param_groups[0]:
29 | return self.opt.param_groups[0]['betas'][0]
30 | else:
31 | return self.opt.param_groups[0]['momentum']
32 |
33 | def set_lrs(self, lrs):
34 | if not isinstance(lrs, Iterable): lrs=[lrs]
35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups)
36 | set_lrs(self.opt, lrs)
37 | self.lrs=lrs
38 |
39 | def set_wds(self, wds):
40 | if not isinstance(wds, Iterable): wds=[wds]
41 | if len(wds)==1: wds=wds*len(self.layer_groups)
42 | set_wds(self.opt, wds)
43 | self.wds=wds
44 |
45 | def set_mom(self,momentum):
46 | if 'betas' in self.opt.param_groups[0]:
47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1])
48 | else:
49 | for pg in self.opt.param_groups: pg['momentum'] = momentum
50 |
51 | def set_beta(self,beta):
52 | if 'betas' in self.opt.param_groups[0]:
53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta)
54 | elif 'alpha' in self.opt.param_groups[0]:
55 | for pg in self.opt.param_groups: pg['alpha'] = beta
56 |
57 | def set_opt_fn(self, opt_fn):
58 | if type(self.opt) != type(opt_fn(self.opt_params())):
59 | self.opt = opt_fn(self.opt_params())
60 |
61 | def zip_strict_(l, r):
62 | assert(len(l) == len(r))
63 | return zip(l, r)
64 |
65 | def set_lrs(opt, lrs):
66 | if not isinstance(lrs, Iterable): lrs=[lrs]
67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups)
68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr
69 |
70 | def set_wds(opt, wds):
71 | if not isinstance(wds, Iterable): wds=[wds]
72 | if len(wds)==1: wds=wds*len(opt.param_groups)
73 | assert(len(opt.param_groups) == len(wds))
74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd
75 |
76 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/layers.py:
--------------------------------------------------------------------------------
1 | class AdaptiveConcatPool2d(nn.Module):
2 | def __init__(self, sz=None):
3 | super().__init__()
4 | sz = sz or (1,1)
5 | self.ap = nn.AdaptiveAvgPool2d(sz)
6 | self.mp = nn.AdaptiveMaxPool2d(sz)
7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
8 |
9 | class Lambda(nn.Module):
10 | def __init__(self, f): super().__init__(); self.f=f
11 | def forward(self, x): return self.f(x)
12 |
13 | class Flatten(nn.Module):
14 | def __init__(self): super().__init__()
15 | def forward(self, x): return x.view(x.size(0), -1)
16 |
17 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/losses.py:
--------------------------------------------------------------------------------
1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9):
2 | y_pred = (y_pred.float() > threshold).float()
3 | y_true = y_true.float()
4 | tp = (y_pred * y_true).sum(dim=1)
5 | precision = tp / (y_pred.sum(dim=1)+eps)
6 | recall = tp / (y_true.sum(dim=1)+eps)
7 | return torch.mean(
8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2))
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/metrics.py:
--------------------------------------------------------------------------------
1 | from .imports import *
2 |
3 |
4 | def accuracy_np(preds, targs):
5 | preds = np.argmax(preds, 1)
6 | return (preds==targs).mean()
7 |
8 | def accuracy(preds, targs):
9 | preds = torch.max(preds, dim=1)[1]
10 | return (preds==targs).float().mean()
11 |
12 | def accuracy_thresh(thresh):
13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh)
14 |
15 | def accuracy_multi(preds, targs, thresh):
16 | return ((preds>thresh).float()==targs).float().mean()
17 |
18 | def accuracy_multi_np(preds, targs, thresh):
19 | return ((preds>thresh)==targs).mean()
20 |
21 | def recall(preds, targs, thresh=0.5):
22 | pred_pos = preds > thresh
23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
24 | return tpos.sum()/targs.sum()
25 |
26 | def precision(preds, targs, thresh=0.5):
27 | pred_pos = preds > thresh
28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
29 | return tpos.sum()/pred_pos.sum()
30 |
31 | def fbeta(preds, targs, beta, thresh=0.5):
32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall).
33 | This is the micro averaged version where the true positives, false negatives and
34 | false positives are calculated globally (as opposed to on a per label basis).
35 |
36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and
37 | beta > 1 favors recall.
38 | """
39 | assert beta > 0, 'beta needs to be greater than 0'
40 | beta2 = beta ** 2
41 | rec = recall(preds, targs, thresh)
42 | prec = precision(preds, targs, thresh)
43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec)
44 |
45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh)
46 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.tar
3 | checkpoint*
4 | log*
5 | wgts/
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/models/cifar10/main.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python main.py --lr=0.1
4 | python main.py --resume --lr=0.01
5 | python main.py --resume --lr=0.001
6 |
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/models/cifar10/wideresnet.py:
--------------------------------------------------------------------------------
1 | # Cifar10 Wideresnet for Dawn Submission
2 |
3 | from ...layers import *
4 |
5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False)
6 |
7 | def bn(ni, init_zero=False):
8 | m = nn.BatchNorm2d(ni)
9 | m.weight.data.fill_(0 if init_zero else 1)
10 | m.bias.data.zero_()
11 | return m
12 |
13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False):
14 | bn_initzero = bn(ni, init_zero=init_zero)
15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride))
16 |
17 | def noop(x): return x
18 |
19 | class BasicBlock(nn.Module):
20 | def __init__(self, ni, nf, stride, drop_p=0.0):
21 | super().__init__()
22 | self.bn = nn.BatchNorm2d(ni)
23 | self.conv1 = conv_2d(ni, nf, 3, stride)
24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1)
25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None
26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop
27 |
28 | def forward(self, x):
29 | x2 = F.relu(self.bn(x), inplace=True)
30 | r = self.shortcut(x2)
31 | x = self.conv1(x2)
32 | if self.drop: x = self.drop(x)
33 | x = self.conv2(x) * 0.2
34 | return x.add_(r)
35 |
36 |
37 | def _make_group(N, ni, nf, block, stride, drop_p):
38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)]
39 |
40 | class WideResNet(nn.Module):
41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16):
42 | super().__init__()
43 | n_channels = [start_nf]
44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k)
45 |
46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1
47 | for i in range(num_groups):
48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p)
49 |
50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1),
51 | Flatten(), nn.Linear(n_channels[3], num_classes)]
52 | self.features = nn.Sequential(*layers)
53 |
54 | def forward(self, x): return self.features(x)
55 |
56 |
57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.)
58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.)
59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.)
60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2)
61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.)
62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.)
63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2)
64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2)
65 |
66 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/models/darknet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .layers import *
4 | from .layers import *
5 |
6 |
7 | class ConvBN(nn.Module):
8 | "convolutional layer then batchnorm"
9 |
10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0):
11 | super().__init__()
12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
14 | self.relu = nn.LeakyReLU(0.1, inplace=True)
15 |
16 | def forward(self, x): return self.relu(self.bn(self.conv(x)))
17 |
18 | class DarknetBlock(nn.Module):
19 | def __init__(self, ch_in):
20 | super().__init__()
21 | ch_hid = ch_in//2
22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, x): return self.conv2(self.conv1(x)) + x
26 |
27 | class Darknet(nn.Module):
28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)"
29 |
30 | def make_group_layer(self, ch_in, num_blocks, stride=1):
31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
33 | return layers
34 |
35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32):
36 | super().__init__()
37 | nf = start_nf
38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
39 | for i,nb in enumerate(num_blocks):
40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
41 | nf *= 2
42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
43 | self.layers = nn.Sequential(*layers)
44 |
45 | def forward(self, x): return self.layers(x)
46 |
47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes)
48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes)
49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24)
50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16)
51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes)
52 |
53 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/rnn_train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/set_spawn.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import set_start_method
2 | set_start_method('spawn')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/fastai/transforms_pil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h)
30 | x = np.random.randint(w)
31 |
32 | y1 = np.clip(y - self.length / 2, 0, h)
33 | y2 = np.clip(y + self.length / 2, 0, h)
34 | x1 = np.clip(x - self.length / 2, 0, w)
35 | x2 = np.clip(x + self.length / 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | norecursedirs = .git courses
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/requirements.txt:
--------------------------------------------------------------------------------
1 | bcolz>=1.1.2
2 | bleach>=2.0.0
3 | certifi>=2016.2.28
4 | cycler>=0.10.0
5 | decorator>=4.1.2
6 | entrypoints>=0.2.3
7 | graphviz>=0.8.2
8 | html5lib>=0.999999999
9 | ipykernel>=4.6.1
10 | ipython>=6.2.0
11 | ipython-genutils>=0.2.0
12 | ipywidgets>=7.0.1
13 | isoweek>=1.3.3
14 | jedi>=0.10.2
15 | Jinja2>=2.9.6
16 | jsonschema>=2.6.0
17 | jupyter>=1.0.0
18 | jupyter-client>=5.1.0
19 | jupyter-console>=5.2.0
20 | jupyter-core>=4.3.0
21 | MarkupSafe>=1.0
22 | matplotlib>=2.0.2
23 | mistune>=0.7.4
24 | nbconvert>=5.3.1
25 | nbformat>=4.4.0
26 | notebook>=5.1.0
27 | numpy>=1.13.1
28 | olefile>=0.44
29 | opencv-python>=3.3.0.10
30 | pandas>=0.20.3
31 | pandas_summary>=0.0.41
32 | pandocfilters>=1.4.2
33 | pexpect>=4.2.1
34 | pickleshare>=0.7.4
35 | Pillow>=4.2.1
36 | prompt-toolkit>=1.0.15
37 | ptyprocess>=0.5.2
38 | Pygments>=2.2.0
39 | pyparsing>=2.2.0
40 | pytest>=3.5.0
41 | python-dateutil>=2.6.1
42 | pytz>=2017.2
43 | PyYAML>=3.12
44 | pyzmq>=16.0.2
45 | qtconsole>=4.3.1
46 | scipy>=0.19.1
47 | seaborn>=0.8.1
48 | simplegeneric>=0.8.1
49 | six>=1.11.0
50 | sklearn_pandas>=1.6.0
51 | terminado>=0.6
52 | testpath>=0.3.1
53 | torch<0.4
54 | torchtext>=0.2.3
55 | torchvision>=0.2.0
56 | tornado>=4.5.2,<5
57 | tqdm>=4.15.0
58 | traitlets>=4.3.2
59 | wcwidth>=0.1.7
60 | webencodings>=0.5.1
61 | widgetsnbextension>=3.0.3
62 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/setup.py:
--------------------------------------------------------------------------------
1 |
2 | # coding: utf-8
3 |
4 | """ Setup script for installing fastai """
5 |
6 | #from distutils.core import setup
7 | from setuptools import setup
8 |
9 | setup(
10 | name = "fastai",
11 | packages = ['fastai', 'fastai/models', 'fastai/models/cifar10'],
12 | version = '0.7.0',
13 | description = "The fastai deep learning and machine learning library.",
14 | author = "Jeremy Howard and contributors",
15 | author_email = "info@fast.ai",
16 | license = "Apache License 2.0",
17 | url = "https://github.com/fastai/fastai",
18 | download_url = 'https://github.com/fastai/fastai/archive/0.7.0.tar.gz',
19 | install_requires =
20 | ['bcolz', 'bleach', 'certifi', 'cycler', 'decorator', 'entrypoints', 'feather-format', 'graphviz', 'html5lib',
21 | 'ipykernel', 'ipython', 'ipython-genutils', 'ipywidgets', 'isoweek', 'jedi', 'Jinja2', 'jsonschema', 'jupyter',
22 | 'MarkupSafe', 'matplotlib', 'numpy', 'opencv-python', 'pandas',
23 | 'pandas_summary', 'pickleshare', 'Pillow', 'plotnine',
24 | 'ptyprocess', 'Pygments', 'pyparsing', 'python-dateutil', 'pytz', 'PyYAML', 'pyzmq', 'scipy',
25 | 'seaborn', 'simplegeneric', 'sklearn_pandas', 'testpath', 'torch<0.4', 'torchtext', 'torchvision', 'tornado', 'tqdm',
26 | 'traitlets', 'wcwidth', 'webencodings', 'widgetsnbextension'],
27 | keywords = ['deeplearning', 'pytorch', 'machinelearning'],
28 | classifiers = ['Development Status :: 3 - Alpha',
29 | 'Programming Language :: Python',
30 | 'Programming Language :: Python :: 3.6',
31 | 'Topic :: Scientific/Engineering :: Artificial Intelligence']
32 | )
33 |
34 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('agg')
3 |
4 | # the above imports are fixing the TLS issue:
5 | # ```ImportError: dlopen: cannot load any more object with static TLS```
6 | # they were set after experimenting with the test sets on ubuntu 16.04
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tests/test_core.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from fastai.core import partition
3 |
4 |
5 | def test_partition_functionality():
6 | sz = 2
7 | a = [1,2,3,4,5]
8 | ex = [[1,2],[3,4],[5]]
9 | result = partition(a, sz)
10 | assert len(result) == len(ex)
11 | assert all([a == b for a, b in zip(result, ex)])
12 |
13 | sz = 3
14 | ex = [[1,2,3],[4,5]]
15 | result = partition(a, sz)
16 | assert len(result) == len(ex)
17 | assert all([a == b for a,b in zip(result, ex)])
18 |
19 | sz = 1
20 | ex = [[1],[2],[3],[4],[5]]
21 | result = partition(a, sz)
22 | assert len(result) == len(ex)
23 | assert all([a == b for a,b in zip(result, ex)])
24 |
25 | sz = 6
26 | ex = [[1,2,3,4,5]]
27 | result = partition(a, sz)
28 | assert len(result) == len(ex)
29 | assert all([a == b for a,b in zip(result, ex)])
30 |
31 | sz = 3
32 | a = []
33 | result = partition(a, sz)
34 | assert len(result) == 0
35 |
36 | def test_partition_error_handling():
37 | sz = 0
38 | a = [1,2,3,4,5]
39 | with pytest.raises(ValueError):
40 | partition(a, sz)
41 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tests/test_lsuv_initializer.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import cv2
4 | import numpy as np
5 | import pytest
6 | import torch
7 | import torch.nn as nn
8 | import torchvision.models as models
9 | from fastai.core import VV
10 | from fastai.lsuv_initializer import apply_lsuv_init
11 |
12 |
13 | @pytest.fixture
14 | def image_data():
15 | images_to_process = []
16 | for img_fname in os.listdir('fastai/images'):
17 | img = cv2.imread(os.path.join('fastai/images', img_fname))
18 | images_to_process.append(np.transpose(cv2.resize(img, (224,224)), (2,0,1)))
19 | data = np.array(images_to_process).astype(np.float32)
20 | return VV(torch.from_numpy(data))
21 |
22 |
23 | def add_hooks(m, fn):
24 | hooks = []
25 | def add_hook(m):
26 | if (isinstance(m, nn.Conv2d)) or (isinstance(m, nn.Linear)):
27 | hooks.append(m.register_forward_hook(fn))
28 | m.apply(add_hook)
29 | return hooks
30 | def remove_hooks(hooks): [h.remove() for h in hooks]
31 |
32 | def run_with_capture(m, data):
33 | activation_variances = []
34 | def capture_hook(self, input, output):
35 | activation_variances.append(np.var(output.data.cpu().numpy()))
36 | hooks = add_hooks(m, capture_hook)
37 | m(data)
38 | remove_hooks(hooks)
39 | return activation_variances
40 |
41 | def test_fast_initialization_without_orthonormal(image_data):
42 | alexnet = models.alexnet(pretrained=False)
43 | pre_init_var = run_with_capture(alexnet, image_data)
44 | assert pre_init_var[0] >= 1000 # the first few pre-init variances are huge,
45 | assert pre_init_var[1] >= 100 # even larger than these conservative tests.
46 |
47 | tol = 0.1
48 | alexnet = apply_lsuv_init(alexnet, image_data, std_tol=tol, do_orthonorm=False, cuda=False)
49 | *post_init_var, final_var = run_with_capture(alexnet, image_data)
50 | for var in post_init_var:
51 | assert 2 <= var <= 4
52 | assert final_var == pytest.approx(1, tol**2)
53 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tests/test_samplers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from fastai.text import SortSampler, SortishSampler
4 |
5 |
6 | def test_sort_sampler_sorts_all_descending():
7 | bs = 4
8 | n = bs*100
9 | data = 2 * np.arange(n)
10 | samp = list(SortSampler(data, lambda i: data[i]))
11 |
12 | # The sample is a permutation of the indices.
13 | assert sorted(samp) == list(range(n))
14 | # And that "permutation" is for descending data order.
15 | assert all(s1 > s2 for s1, s2 in zip(samp, samp[1:]))
16 |
17 |
18 | def test_sortish_sampler_sorts_each_batch_descending():
19 | bs = 4
20 | n = bs*100
21 | data = 2 * np.arange(n)
22 | samp = list(SortishSampler(data, lambda i: data[i], bs))
23 |
24 | # The sample is a permutation of the indices.
25 | assert sorted(samp) == list(range(n))
26 | # And that permutation is kind of reverse sorted.
27 | assert all(
28 | s1 > s2 or (i+1) % bs == 0 # don't check batch boundaries
29 | for i, (s1, s2) in enumerate(zip(samp, samp[1:]))
30 | )
31 | assert samp[0] == max(samp)
32 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/__init__.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, '../')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/.gitignore:
--------------------------------------------------------------------------------
1 | weights/
2 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/fastai/__init__.py
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/executors.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import itertools
3 | import time
4 | from concurrent.futures import ThreadPoolExecutor
5 |
6 |
7 | class LazyThreadPoolExecutor(ThreadPoolExecutor):
8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None):
9 | """
10 | Collects iterables lazily, rather than immediately.
11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor
12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707
13 | """
14 | if timeout is not None: end_time = timeout + time.time()
15 | if prefetch is None: prefetch = self._max_workers
16 | if prefetch < 0: raise ValueError("prefetch count may not be negative")
17 | argsiter = zip(*iterables)
18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch))
19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required.
20 | def result_iterator():
21 | nonlocal argsiter
22 | try:
23 | while fs:
24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time())
25 | # Got a result, future needn't be cancelled
26 | del fs[0]
27 | # Dispatch next task before yielding to keep pipeline full
28 | if argsiter:
29 | try:
30 | args = next(argsiter)
31 | except StopIteration:
32 | argsiter = None
33 | else:
34 | fs.append(self.submit(fn, *args))
35 | yield res
36 | finally:
37 | for future in fs: future.cancel()
38 | return result_iterator()
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/fp16.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class FP16(nn.Module):
6 | def __init__(self, module):
7 | super(FP16, self).__init__()
8 | self.module = batchnorm_to_fp32(module.half())
9 |
10 | def forward(self, input):
11 | return self.module(input.half())
12 |
13 | def load_state_dict(self, *inputs, **kwargs):
14 | self.module.load_state_dict(*inputs, **kwargs)
15 |
16 | def state_dict(self, *inputs, **kwargs):
17 | return self.module.state_dict(*inputs, **kwargs)
18 |
19 | def batchnorm_to_fp32(module):
20 | '''
21 | BatchNorm layers to have parameters in single precision.
22 | Find all layers and convert them back to float. This can't
23 | be done with built in .apply as that function will apply
24 | fn to all modules, parameters, and buffers. Thus we wouldn't
25 | be able to guard the float conversion based on the module type.
26 | '''
27 | if isinstance(module, nn.modules.batchnorm._BatchNorm):
28 | module.float()
29 | for child in module.children():
30 | batchnorm_to_fp32(child)
31 | return module
32 |
33 | def copy_model_to_fp32(m, optim):
34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters
35 | """
36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()]
37 | optim_groups = [group['params'] for group in optim.param_groups]
38 | iter_fp32_params = iter(fp32_params)
39 | for group_params in optim_groups:
40 | for i in range(len(group_params)):
41 | fp32_param = next(iter_fp32_params)
42 | fp32_param.requires_grad = group_params[i].requires_grad
43 | group_params[i] = fp32_param
44 | return fp32_params
45 |
46 | def copy_fp32_to_model(m, fp32_params):
47 | m_params = list(m.parameters())
48 | for fp32_param, m_param in zip(fp32_params, m_params):
49 | m_param.data.copy_(fp32_param.data)
50 |
51 | def update_fp32_grads(fp32_params, m):
52 | m_params = list(m.parameters())
53 | for fp32_param, m_param in zip(fp32_params, m_params):
54 | if fp32_param.grad is None:
55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size()))
56 | fp32_param.grad.data.copy_(m_param.grad.data)
57 |
58 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/images/industrial_fishing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/fastai/images/industrial_fishing.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/imports.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | import matplotlib
4 | import numpy as np
5 |
6 | matplotlib.rc('animation', html='html5')
7 | np.set_printoptions(precision=5, linewidth=110, suppress=True)
8 |
9 | from ipykernel.kernelapp import IPKernelApp
10 | def in_notebook(): return IPKernelApp.initialized()
11 |
12 | def in_ipynb():
13 | try:
14 | cls = get_ipython().__class__.__name__
15 | return cls == 'ZMQInteractiveShell'
16 | except NameError:
17 | return False
18 |
19 | import tqdm as tq
20 |
21 |
22 | def clear_tqdm():
23 | inst = getattr(tq.tqdm, '_instances', None)
24 | if not inst: return
25 | try:
26 | for i in range(len(inst)): inst.pop().close()
27 | except Exception:
28 | pass
29 |
30 | if in_notebook():
31 | def tqdm(*args, **kwargs):
32 | clear_tqdm()
33 | return tq.tqdm(*args, file=sys.stdout, **kwargs)
34 | def trange(*args, **kwargs):
35 | clear_tqdm()
36 | return tq.trange(*args, file=sys.stdout, **kwargs)
37 | else:
38 | from tqdm import tqdm, trange
39 | tnrange=trange
40 | tqdm_notebook=tqdm
41 |
42 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/initializers.py:
--------------------------------------------------------------------------------
1 | def cond_init(m, init_fn):
2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)):
3 | if hasattr(m, 'weight'): init_fn(m.weight)
4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.)
5 |
6 | def apply_init(m, init_fn):
7 | m.apply(lambda x: cond_init(x, init_fn))
8 |
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/io.py:
--------------------------------------------------------------------------------
1 | from urllib.request import urlretrieve
2 |
3 | from tqdm import tqdm
4 |
5 |
6 | class TqdmUpTo(tqdm):
7 | def update_to(self, b=1, bsize=1, tsize=None):
8 | if tsize is not None: self.total = tsize
9 | self.update(b * bsize - self.n)
10 |
11 | def get_data(url, filename):
12 | if not os.path.exists(filename):
13 |
14 | dirname = os.path.dirname(filename)
15 | if not os.path.exists(dirname):
16 | os.makedirs(dirname)
17 |
18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t:
19 | urlretrieve(url, filename, reporthook=t.update_to)
20 |
21 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/layer_optimizer.py:
--------------------------------------------------------------------------------
1 | from .core import *
2 |
3 | def opt_params(parm, lr, wd):
4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd}
5 |
6 | class LayerOptimizer():
7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None):
8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups]
9 | if not isinstance(lrs, Iterable): lrs=[lrs]
10 | if len(lrs)==1: lrs=lrs*len(layer_groups)
11 | if wds is None: wds=0.
12 | if not isinstance(wds, Iterable): wds=[wds]
13 | if len(wds)==1: wds=wds*len(layer_groups)
14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds
15 | self.opt = opt_fn(self.opt_params())
16 |
17 | def opt_params(self):
18 | assert(len(self.layer_groups) == len(self.lrs))
19 | assert(len(self.layer_groups) == len(self.wds))
20 | params = list(zip(self.layer_groups,self.lrs,self.wds))
21 | return [opt_params(*p) for p in params]
22 |
23 | @property
24 | def lr(self): return self.lrs[-1]
25 |
26 | @property
27 | def mom(self):
28 | if 'betas' in self.opt.param_groups[0]:
29 | return self.opt.param_groups[0]['betas'][0]
30 | else:
31 | return self.opt.param_groups[0]['momentum']
32 |
33 | def set_lrs(self, lrs):
34 | if not isinstance(lrs, Iterable): lrs=[lrs]
35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups)
36 | set_lrs(self.opt, lrs)
37 | self.lrs=lrs
38 |
39 | def set_wds(self, wds):
40 | if not isinstance(wds, Iterable): wds=[wds]
41 | if len(wds)==1: wds=wds*len(self.layer_groups)
42 | set_wds(self.opt, wds)
43 | self.wds=wds
44 |
45 | def set_mom(self,momentum):
46 | if 'betas' in self.opt.param_groups[0]:
47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1])
48 | else:
49 | for pg in self.opt.param_groups: pg['momentum'] = momentum
50 |
51 | def set_beta(self,beta):
52 | if 'betas' in self.opt.param_groups[0]:
53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta)
54 | elif 'alpha' in self.opt.param_groups[0]:
55 | for pg in self.opt.param_groups: pg['alpha'] = beta
56 |
57 | def set_opt_fn(self, opt_fn):
58 | if type(self.opt) != type(opt_fn(self.opt_params())):
59 | self.opt = opt_fn(self.opt_params())
60 |
61 | def zip_strict_(l, r):
62 | assert(len(l) == len(r))
63 | return zip(l, r)
64 |
65 | def set_lrs(opt, lrs):
66 | if not isinstance(lrs, Iterable): lrs=[lrs]
67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups)
68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr
69 |
70 | def set_wds(opt, wds):
71 | if not isinstance(wds, Iterable): wds=[wds]
72 | if len(wds)==1: wds=wds*len(opt.param_groups)
73 | assert(len(opt.param_groups) == len(wds))
74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd
75 |
76 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/layers.py:
--------------------------------------------------------------------------------
1 | class AdaptiveConcatPool2d(nn.Module):
2 | def __init__(self, sz=None):
3 | super().__init__()
4 | sz = sz or (1,1)
5 | self.ap = nn.AdaptiveAvgPool2d(sz)
6 | self.mp = nn.AdaptiveMaxPool2d(sz)
7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1)
8 |
9 | class Lambda(nn.Module):
10 | def __init__(self, f): super().__init__(); self.f=f
11 | def forward(self, x): return self.f(x)
12 |
13 | class Flatten(nn.Module):
14 | def __init__(self): super().__init__()
15 | def forward(self, x): return x.view(x.size(0), -1)
16 |
17 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/losses.py:
--------------------------------------------------------------------------------
1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9):
2 | y_pred = (y_pred.float() > threshold).float()
3 | y_true = y_true.float()
4 | tp = (y_pred * y_true).sum(dim=1)
5 | precision = tp / (y_pred.sum(dim=1)+eps)
6 | recall = tp / (y_true.sum(dim=1)+eps)
7 | return torch.mean(
8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2))
9 |
10 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/metrics.py:
--------------------------------------------------------------------------------
1 | from .imports import *
2 |
3 |
4 | def accuracy_np(preds, targs):
5 | preds = np.argmax(preds, 1)
6 | return (preds==targs).mean()
7 |
8 | def accuracy(preds, targs):
9 | preds = torch.max(preds, dim=1)[1]
10 | return (preds==targs).float().mean()
11 |
12 | def accuracy_thresh(thresh):
13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh)
14 |
15 | def accuracy_multi(preds, targs, thresh):
16 | return ((preds>thresh).float()==targs).float().mean()
17 |
18 | def accuracy_multi_np(preds, targs, thresh):
19 | return ((preds>thresh)==targs).mean()
20 |
21 | def recall(preds, targs, thresh=0.5):
22 | pred_pos = preds > thresh
23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
24 | return tpos.sum()/targs.sum()
25 |
26 | def precision(preds, targs, thresh=0.5):
27 | pred_pos = preds > thresh
28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte())
29 | return tpos.sum()/pred_pos.sum()
30 |
31 | def fbeta(preds, targs, beta, thresh=0.5):
32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall).
33 | This is the micro averaged version where the true positives, false negatives and
34 | false positives are calculated globally (as opposed to on a per label basis).
35 |
36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and
37 | beta > 1 favors recall.
38 | """
39 | assert beta > 0, 'beta needs to be greater than 0'
40 | beta2 = beta ** 2
41 | rec = recall(preds, targs, thresh)
42 | prec = precision(preds, targs, thresh)
43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec)
44 |
45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh)
46 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/models/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.tar
3 | checkpoint*
4 | log*
5 | wgts/
6 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/models/cifar10/main.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | python main.py --lr=0.1
4 | python main.py --resume --lr=0.01
5 | python main.py --resume --lr=0.001
6 |
7 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/models/cifar10/wideresnet.py:
--------------------------------------------------------------------------------
1 | # Cifar10 Wideresnet for Dawn Submission
2 |
3 | from ...layers import *
4 |
5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False)
6 |
7 | def bn(ni, init_zero=False):
8 | m = nn.BatchNorm2d(ni)
9 | m.weight.data.fill_(0 if init_zero else 1)
10 | m.bias.data.zero_()
11 | return m
12 |
13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False):
14 | bn_initzero = bn(ni, init_zero=init_zero)
15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride))
16 |
17 | def noop(x): return x
18 |
19 | class BasicBlock(nn.Module):
20 | def __init__(self, ni, nf, stride, drop_p=0.0):
21 | super().__init__()
22 | self.bn = nn.BatchNorm2d(ni)
23 | self.conv1 = conv_2d(ni, nf, 3, stride)
24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1)
25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None
26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop
27 |
28 | def forward(self, x):
29 | x2 = F.relu(self.bn(x), inplace=True)
30 | r = self.shortcut(x2)
31 | x = self.conv1(x2)
32 | if self.drop: x = self.drop(x)
33 | x = self.conv2(x) * 0.2
34 | return x.add_(r)
35 |
36 |
37 | def _make_group(N, ni, nf, block, stride, drop_p):
38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)]
39 |
40 | class WideResNet(nn.Module):
41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16):
42 | super().__init__()
43 | n_channels = [start_nf]
44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k)
45 |
46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1
47 | for i in range(num_groups):
48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p)
49 |
50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1),
51 | Flatten(), nn.Linear(n_channels[3], num_classes)]
52 | self.features = nn.Sequential(*layers)
53 |
54 | def forward(self, x): return self.features(x)
55 |
56 |
57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.)
58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.)
59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.)
60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2)
61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.)
62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.)
63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2)
64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2)
65 |
66 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/models/darknet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | from .layers import *
4 | from .layers import *
5 |
6 |
7 | class ConvBN(nn.Module):
8 | "convolutional layer then batchnorm"
9 |
10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0):
11 | super().__init__()
12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01)
14 | self.relu = nn.LeakyReLU(0.1, inplace=True)
15 |
16 | def forward(self, x): return self.relu(self.bn(self.conv(x)))
17 |
18 | class DarknetBlock(nn.Module):
19 | def __init__(self, ch_in):
20 | super().__init__()
21 | ch_hid = ch_in//2
22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0)
23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1)
24 |
25 | def forward(self, x): return self.conv2(self.conv1(x)) + x
26 |
27 | class Darknet(nn.Module):
28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)"
29 |
30 | def make_group_layer(self, ch_in, num_blocks, stride=1):
31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)]
32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2))
33 | return layers
34 |
35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32):
36 | super().__init__()
37 | nf = start_nf
38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)]
39 | for i,nb in enumerate(num_blocks):
40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2))
41 | nf *= 2
42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)]
43 | self.layers = nn.Sequential(*layers)
44 |
45 | def forward(self, x): return self.layers(x)
46 |
47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes)
48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes)
49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24)
50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16)
51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes)
52 |
53 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/rnn_train.py:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/set_spawn.py:
--------------------------------------------------------------------------------
1 | from multiprocessing import set_start_method
2 | set_start_method('spawn')
3 |
4 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/fastai/transforms_pil.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h)
30 | x = np.random.randint(w)
31 |
32 | y1 = np.clip(y - self.length / 2, 0, h)
33 | y2 = np.clip(y + self.length / 2, 0, h)
34 | x1 = np.clip(x - self.length / 2, 0, w)
35 | x2 = np.clip(x + self.length / 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/cifar10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/cifar10.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/demba_combustion_engine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/demba_combustion_engine.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/digit.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/digit.gif
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/fashion-mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/fashion-mnist.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/markov_health.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/markov_health.jpg
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/mnist.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/normal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/normal.jpg
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/overfitting.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/overfitting2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/sgd2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/sgd2.gif
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/shop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/shop.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/what_is_pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/what_is_pytorch.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/zeiler1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler1.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/zeiler2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler2.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/zeiler3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler3.png
--------------------------------------------------------------------------------
/code_summarization_transfer_learning/fastai/tutorials/images/zeiler4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler4.png
--------------------------------------------------------------------------------
/pytorch_model/README.md:
--------------------------------------------------------------------------------
1 | ## Deep Code Search
2 |
3 | In this code base, we demonstrate a joint embedding model based on a code embedding network and description embedding network largely based on work done by https://github.com/guxd/deep-code-search
4 |
5 |
6 | Our contribution is mostly in ironing out bugs, trying out newer and more complicated models, and setting it up to work with Python.
7 |
8 | ### Usage
9 | In order for the codebase to execute, please install requirements as following
10 |
11 | ```bash
12 | pip install -r requirements.txt
13 | ```
14 |
15 | ### Train
16 |
17 | ```bash
18 | python codesearcher.py --mode train --language java|python
19 | ```
20 |
21 | ### Code Embedding
22 |
23 | ```bash
24 | python codesearcher.py --mode repr_code --language java|python
25 | ```
26 |
27 | ### Search
28 |
29 | ```bash
30 | python codesearcher.py --mode search --language java|python
31 | ```
--------------------------------------------------------------------------------
/pytorch_model/java/test.apiseq.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850
3 | size 259877
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/test.desc.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:212336e49778a8b2a665c9995cc6016fb2fdf9fae5e9f1e6c743bfd879939e62
3 | size 290579
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/test.methname.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c
3 | size 177624
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/test.rawcode.txt:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e708058d9933fc0e6c79d0dfbc6656a98394b5949626022b4fd6a6db89899b1d
3 | size 3387172
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/test.tokens.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9
3 | size 304017
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/train.apiseq.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850
3 | size 259877
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/train.desc.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:212336e49778a8b2a665c9995cc6016fb2fdf9fae5e9f1e6c743bfd879939e62
3 | size 290579
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/train.methname.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c
3 | size 177624
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/train.tokens.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9
3 | size 304017
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/use.apiseq.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850
3 | size 259877
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/use.codevecs.normalized.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a24d9df51b46934b5e4d050645e02f1cfad1e4ff656bead2174d7fe290c61d3c
3 | size 10101368
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/use.methname.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c
3 | size 177624
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/use.rawcode.txt:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e708058d9933fc0e6c79d0dfbc6656a98394b5949626022b4fd6a6db89899b1d
3 | size 3387172
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/use.tokens.h5:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9
3 | size 304017
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/vocab.apiseq.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3c75e10d3f8d4911846941edd1293bed9a25296addd7f049b2ddb1e03007ffe2
3 | size 225989
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/vocab.desc.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:59310289b8fd4e43fbf44d9f9d01c283dce498cea8dca088484fdf36bad6c0e2
3 | size 167955
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/vocab.methname.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7ebfe7b692b1a43318ed584150c931bcd59e69bd826aca88e9624f25a99f2f8f
3 | size 165351
4 |
--------------------------------------------------------------------------------
/pytorch_model/java/vocab.tokens.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c8666768e9c11fb5adf4fad54a245720b48b32bf5c028d4b3424a0c5ed4e42f6
3 | size 160199
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/small.rawcode.txt:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:eebc835b00f4378ed28c39c6da969eb1018d0941f9060290b392a3c6edda7a8b
3 | size 6342472
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/small.test.apiseq.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:3562437b8ce684e476fb63d565d5fa816af7e28c87ecd04ad38f04c7a3e6ad68
3 | size 1800128
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/small.test.desc.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:7dd7e2fb3e43bcebc481f5ea58e2cdaff2b1178341d64be00e72647ec1eb5b1f
3 | size 600128
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/small.test.methname.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:dc075bb3140324f931da4d387b68fca720cd18f5580ebb6c59d3400c114f9a5c
3 | size 200128
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/small.test.tokens.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:202b4708e7caec66550dbb33f335ca43b189109299e866784969dfd8cf88940c
3 | size 2200128
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/test.apiseq.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d311ccac692f7b3d9ba98e6e2c023a5111bb45491fffd0ac1f3fe5b1ac403bcb
3 | size 32264948
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/test.desc.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:fa9e15d9e05f1049080ff909d978e918106b1b8720f476165d3fe76cae4168eb
3 | size 10755068
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/test.methname.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d21e1d5504a448e313322eb37c481b9ffe0d585011ab6d4aa06d7b0682ea0e5d
3 | size 3585108
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/test.tokens.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:f9c8cf49f61ec5b549aed004962ada3399b2564a4bd159f65250afc4c5213a9b
3 | size 39434908
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/train.apiseq.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8eab78f2253a13ba95eea301d8e22448996c889bd232a17754e36cb4c00e7a69
3 | size 220083788
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/train.desc.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8f32233953f3e720638692f510c1d32f0a7a81270427713addaba39ee95a7757
3 | size 73361348
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/train.methname.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:e08163a86474d697ce6b6b00d9afc484c4dbb374d6c6276648716b6e54c81f04
3 | size 24453868
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/train.tokens.npy:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:67ac9dc30fbc09e2ff285f1e271a2f425a695274bdb86e668e02c1623d85805a
3 | size 268991268
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/vocab.apiseq.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:cfd1a06ffef46b1c97ad104eeb33581ba699815d8a840ff320eb1bd603134c7c
3 | size 187631
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/vocab.desc.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:a415c720fe7f100dfb662e39d91961d123ff5aa4468c17cbd94c1a1f53b0f6e9
3 | size 195427
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/vocab.methname.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:49622af8642d024b5a610679e7a48b96c1c542d6887d598ebd8df167040e124d
3 | size 192772
4 |
--------------------------------------------------------------------------------
/pytorch_model/python/vocab.tokens.pkl:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:db21920d44053464edc8e847fa3ee6b79d8b0a03d7109f30a24d3d3a17d540b2
3 | size 190334
4 |
--------------------------------------------------------------------------------
/pytorch_model/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==0.4.0
2 | torchvision==0.2.1
3 | tables
4 | numpy
5 | scipy
6 | tqdm
7 | tables
8 | tensorboardx
9 | tensorboard
--------------------------------------------------------------------------------
/pytorch_model/utils.py:
--------------------------------------------------------------------------------
1 | import math
2 | import time
3 |
4 | import numpy as np
5 | import torch
6 |
7 |
8 | def cos_np(data1, data2):
9 | """numpy implementation of cosine similarity for matrix"""
10 | dotted = np.dot(data1, np.transpose(data2))
11 | norm1 = np.linalg.norm(data1, axis=1)
12 | norm2 = np.linalg.norm(data2, axis=1)
13 | matrix_vector_norms = np.multiply(norm1, norm2)
14 | neighbors = np.divide(dotted, matrix_vector_norms)
15 | return neighbors
16 |
17 |
18 | def normalize(data):
19 | """normalize matrix by rows"""
20 | normalized_data = data / np.linalg.norm(data, axis=1).reshape((data.shape[0], 1))
21 | return normalized_data
22 |
23 |
24 | def dot_np(data1, data2):
25 | """cosine similarity for normalized vectors"""
26 | return np.dot(data1, np.transpose(data2))
27 |
28 |
29 | #######################################################################
30 |
31 | def asMinutes(s):
32 | m = math.floor(s / 60)
33 | s -= m * 60
34 | return '%d:%d' % (m, s)
35 |
36 |
37 | def timeSince(since, percent):
38 | now = time.time()
39 | s = now - since
40 | es = s / (percent)
41 | rs = es - s
42 | return '%s<%s' % (asMinutes(s), asMinutes(rs))
43 |
44 |
45 | #######################################################################
46 |
47 | def sent2indexes(sentence, vocab):
48 | '''sentence: a string
49 | return: a numpy array of word indices
50 | '''
51 | return np.array([vocab[word] for word in sentence.strip().split(' ')])
52 |
53 |
54 | ########################################################################
55 |
56 | use_cuda = torch.cuda.is_available()
57 |
58 |
59 | def gVar(data):
60 | tensor = data
61 | if isinstance(data, np.ndarray):
62 | tensor = torch.from_numpy(data)
63 | if use_cuda:
64 | tensor = tensor.cuda()
65 | return tensor
66 |
--------------------------------------------------------------------------------
/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/screenshot.png
--------------------------------------------------------------------------------