├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── code_summarization_transfer_learning ├── 1 - Preprocess Data.ipynb ├── 2 - Keras code summarization.ipynb ├── 3 - Train Language Model Using FastAI.ipynb ├── 4 - Train Model To Map Code Embeddings to Language Embeddings.ipynb ├── 5 - Build Search Index.ipynb ├── 6 - Eval metrics.ipynb ├── README.md ├── fastai │ ├── .gitignore │ ├── .travis.yml │ ├── CODE-OF-CONDUCT.md │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.md │ ├── courses │ │ ├── dl1 │ │ │ ├── .gitignore │ │ │ ├── adamw-sgdw-demo.ipynb │ │ │ ├── cifar10-simplenet.ipynb │ │ │ ├── cifar10.ipynb │ │ │ ├── embedding_refactoring_unit_tests.ipynb │ │ │ ├── excel │ │ │ │ ├── collab_filter.xlsx │ │ │ │ ├── conv-example.xlsx │ │ │ │ ├── entropy_example.xlsx │ │ │ │ ├── graddesc.xlsm │ │ │ │ └── layers_example.xlsx │ │ │ ├── fastai │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── adaptive_softmax.py │ │ │ │ ├── column_data.py │ │ │ │ ├── conv_learner.py │ │ │ │ ├── core.py │ │ │ │ ├── dataloader.py │ │ │ │ ├── dataset.py │ │ │ │ ├── executors.py │ │ │ │ ├── fp16.py │ │ │ │ ├── images │ │ │ │ │ └── industrial_fishing.png │ │ │ │ ├── imports.py │ │ │ │ ├── initializers.py │ │ │ │ ├── io.py │ │ │ │ ├── layer_optimizer.py │ │ │ │ ├── layers.py │ │ │ │ ├── learner.py │ │ │ │ ├── lm_rnn.py │ │ │ │ ├── losses.py │ │ │ │ ├── lsuv_initializer.py │ │ │ │ ├── metrics.py │ │ │ │ ├── model.py │ │ │ │ ├── models │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── cifar10 │ │ │ │ │ │ ├── main.sh │ │ │ │ │ │ ├── main_dxy.py │ │ │ │ │ │ ├── main_kuangliu.py │ │ │ │ │ │ ├── preact_resnet.py │ │ │ │ │ │ ├── resnext.py │ │ │ │ │ │ ├── senet.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ ├── utils_kuangliu.py │ │ │ │ │ │ └── wideresnet.py │ │ │ │ │ ├── convert_torch.py │ │ │ │ │ ├── darknet.py │ │ │ │ │ ├── fa_resnet.py │ │ │ │ │ ├── inceptionresnetv2.py │ │ │ │ │ ├── inceptionv4.py │ │ │ │ │ ├── nasnet.py │ │ │ │ │ ├── resnet.py │ │ │ │ │ ├── resnext_101_32x4d.py │ │ │ │ │ ├── resnext_101_64x4d.py │ │ │ │ │ ├── resnext_50_32x4d.py │ │ │ │ │ ├── unet.py │ │ │ │ │ ├── wideresnet.py │ │ │ │ │ └── wrn_50_2f.py │ │ │ │ ├── nlp.py │ │ │ │ ├── plots.py │ │ │ │ ├── rnn_reg.py │ │ │ │ ├── rnn_train.py │ │ │ │ ├── set_spawn.py │ │ │ │ ├── sgdr.py │ │ │ │ ├── structured.py │ │ │ │ ├── swa.py │ │ │ │ ├── text.py │ │ │ │ ├── torch_imports.py │ │ │ │ ├── transforms.py │ │ │ │ ├── transforms_pil.py │ │ │ │ └── utils.py │ │ │ ├── fish.ipynb │ │ │ ├── images │ │ │ │ ├── pretrained.png │ │ │ │ ├── sgdr.png │ │ │ │ ├── zeiler1.png │ │ │ │ ├── zeiler2.png │ │ │ │ ├── zeiler3.png │ │ │ │ └── zeiler4.png │ │ │ ├── keras_lesson1.ipynb │ │ │ ├── lang_model-arxiv.ipynb │ │ │ ├── lang_model.ipynb │ │ │ ├── lesson1-rxt50.ipynb │ │ │ ├── lesson1-vgg.ipynb │ │ │ ├── lesson1.ipynb │ │ │ ├── lesson2-image_models.ipynb │ │ │ ├── lesson3-rossman.ipynb │ │ │ ├── lesson4-imdb.ipynb │ │ │ ├── lesson5-movielens.ipynb │ │ │ ├── lesson6-rnn.ipynb │ │ │ ├── lesson6-sgd.ipynb │ │ │ ├── lesson7-CAM.ipynb │ │ │ ├── lesson7-cifar10.ipynb │ │ │ ├── nasnet.ipynb │ │ │ ├── nlp-arxiv.ipynb │ │ │ ├── nlp.ipynb │ │ │ ├── planet.py │ │ │ ├── planet_cv.ipynb │ │ │ ├── ppt │ │ │ │ └── lesson6.pptx │ │ │ ├── rossman_exp.py │ │ │ ├── scripts │ │ │ │ └── train_planet.py │ │ │ ├── test_transforms.ipynb │ │ │ └── xor.ipynb │ │ ├── dl2 │ │ │ ├── .gitignore │ │ │ ├── carvana-unet-lrg.ipynb │ │ │ ├── carvana-unet.ipynb │ │ │ ├── carvana.ipynb │ │ │ ├── cgan │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── data │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── aligned_dataset.py │ │ │ │ │ ├── base_data_loader.py │ │ │ │ │ ├── base_dataset.py │ │ │ │ │ ├── custom_dataset_data_loader.py │ │ │ │ │ ├── data_loader.py │ │ │ │ │ ├── image_folder.py │ │ │ │ │ ├── single_dataset.py │ │ │ │ │ └── unaligned_dataset.py │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_model.py │ │ │ │ │ ├── cycle_gan_model.py │ │ │ │ │ ├── models.py │ │ │ │ │ ├── networks.py │ │ │ │ │ ├── pix2pix_model.py │ │ │ │ │ └── test_model.py │ │ │ │ ├── options │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_options.py │ │ │ │ │ ├── test_options.py │ │ │ │ │ └── train_options.py │ │ │ │ ├── test.py │ │ │ │ ├── train.py │ │ │ │ └── util │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── get_data.py │ │ │ │ │ ├── html.py │ │ │ │ │ ├── image_pool.py │ │ │ │ │ ├── util.py │ │ │ │ │ └── visualizer.py │ │ │ ├── cifar10-darknet.ipynb │ │ │ ├── cifar10-dawn.ipynb │ │ │ ├── cyclegan.ipynb │ │ │ ├── devise.ipynb │ │ │ ├── enhance.ipynb │ │ │ ├── fastai │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── adaptive_softmax.py │ │ │ │ ├── column_data.py │ │ │ │ ├── conv_learner.py │ │ │ │ ├── core.py │ │ │ │ ├── dataloader.py │ │ │ │ ├── dataset.py │ │ │ │ ├── executors.py │ │ │ │ ├── fp16.py │ │ │ │ ├── images │ │ │ │ │ └── industrial_fishing.png │ │ │ │ ├── imports.py │ │ │ │ ├── initializers.py │ │ │ │ ├── io.py │ │ │ │ ├── layer_optimizer.py │ │ │ │ ├── layers.py │ │ │ │ ├── learner.py │ │ │ │ ├── lm_rnn.py │ │ │ │ ├── losses.py │ │ │ │ ├── lsuv_initializer.py │ │ │ │ ├── metrics.py │ │ │ │ ├── model.py │ │ │ │ ├── models │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── cifar10 │ │ │ │ │ │ ├── main.sh │ │ │ │ │ │ ├── main_dxy.py │ │ │ │ │ │ ├── main_kuangliu.py │ │ │ │ │ │ ├── preact_resnet.py │ │ │ │ │ │ ├── resnext.py │ │ │ │ │ │ ├── senet.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ ├── utils_kuangliu.py │ │ │ │ │ │ └── wideresnet.py │ │ │ │ │ ├── convert_torch.py │ │ │ │ │ ├── darknet.py │ │ │ │ │ ├── fa_resnet.py │ │ │ │ │ ├── inceptionresnetv2.py │ │ │ │ │ ├── inceptionv4.py │ │ │ │ │ ├── nasnet.py │ │ │ │ │ ├── resnet.py │ │ │ │ │ ├── resnext_101_32x4d.py │ │ │ │ │ ├── resnext_101_64x4d.py │ │ │ │ │ ├── resnext_50_32x4d.py │ │ │ │ │ ├── unet.py │ │ │ │ │ ├── wideresnet.py │ │ │ │ │ └── wrn_50_2f.py │ │ │ │ ├── nlp.py │ │ │ │ ├── plots.py │ │ │ │ ├── rnn_reg.py │ │ │ │ ├── rnn_train.py │ │ │ │ ├── set_spawn.py │ │ │ │ ├── sgdr.py │ │ │ │ ├── structured.py │ │ │ │ ├── swa.py │ │ │ │ ├── text.py │ │ │ │ ├── torch_imports.py │ │ │ │ ├── transforms.py │ │ │ │ ├── transforms_pil.py │ │ │ │ └── utils.py │ │ │ ├── imdb.ipynb │ │ │ ├── imdb_scripts │ │ │ │ ├── README.md │ │ │ │ ├── create_toks.py │ │ │ │ ├── tok2id.py │ │ │ │ ├── train_clas.py │ │ │ │ ├── train_tri_lm.py │ │ │ │ └── train_tri_wt.py │ │ │ ├── lsun_scripts │ │ │ │ ├── lsun-data.py │ │ │ │ └── lsun-download.py │ │ │ ├── pascal-multi.ipynb │ │ │ ├── pascal.ipynb │ │ │ ├── ppt │ │ │ │ └── lesson8.pptx │ │ │ ├── sampled_sm.py │ │ │ ├── style-transfer-net.ipynb │ │ │ ├── style-transfer.ipynb │ │ │ ├── training_phase.ipynb │ │ │ ├── translate.ipynb │ │ │ ├── wgan.ipynb │ │ │ └── xl │ │ │ │ └── dl-examples.xlsx │ │ └── ml1 │ │ │ ├── Ethics in Data Science.ipynb │ │ │ ├── bulldozer_dl.ipynb │ │ │ ├── bulldozer_linreg.ipynb │ │ │ ├── excel │ │ │ └── naivebayes.xlsx │ │ │ ├── fastai │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── adaptive_softmax.py │ │ │ ├── column_data.py │ │ │ ├── conv_learner.py │ │ │ ├── core.py │ │ │ ├── dataloader.py │ │ │ ├── dataset.py │ │ │ ├── executors.py │ │ │ ├── fp16.py │ │ │ ├── images │ │ │ │ └── industrial_fishing.png │ │ │ ├── imports.py │ │ │ ├── initializers.py │ │ │ ├── io.py │ │ │ ├── layer_optimizer.py │ │ │ ├── layers.py │ │ │ ├── learner.py │ │ │ ├── lm_rnn.py │ │ │ ├── losses.py │ │ │ ├── lsuv_initializer.py │ │ │ ├── metrics.py │ │ │ ├── model.py │ │ │ ├── models │ │ │ │ ├── .gitignore │ │ │ │ ├── cifar10 │ │ │ │ │ ├── main.sh │ │ │ │ │ ├── main_dxy.py │ │ │ │ │ ├── main_kuangliu.py │ │ │ │ │ ├── preact_resnet.py │ │ │ │ │ ├── resnext.py │ │ │ │ │ ├── senet.py │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── utils_kuangliu.py │ │ │ │ │ └── wideresnet.py │ │ │ │ ├── convert_torch.py │ │ │ │ ├── darknet.py │ │ │ │ ├── fa_resnet.py │ │ │ │ ├── inceptionresnetv2.py │ │ │ │ ├── inceptionv4.py │ │ │ │ ├── nasnet.py │ │ │ │ ├── resnet.py │ │ │ │ ├── resnext_101_32x4d.py │ │ │ │ ├── resnext_101_64x4d.py │ │ │ │ ├── resnext_50_32x4d.py │ │ │ │ ├── unet.py │ │ │ │ ├── wideresnet.py │ │ │ │ └── wrn_50_2f.py │ │ │ ├── nlp.py │ │ │ ├── plots.py │ │ │ ├── rnn_reg.py │ │ │ ├── rnn_train.py │ │ │ ├── set_spawn.py │ │ │ ├── sgdr.py │ │ │ ├── structured.py │ │ │ ├── swa.py │ │ │ ├── text.py │ │ │ ├── torch_imports.py │ │ │ ├── transforms.py │ │ │ ├── transforms_pil.py │ │ │ └── utils.py │ │ │ ├── images │ │ │ ├── bulldozers_data.png │ │ │ ├── bulldozers_data2.png │ │ │ ├── digit.gif │ │ │ ├── ethics_recidivism.jpg │ │ │ ├── mnist.png │ │ │ ├── overfitting2.png │ │ │ ├── sgd2.gif │ │ │ ├── what_is_pytorch.png │ │ │ ├── zeiler1.png │ │ │ ├── zeiler2.png │ │ │ ├── zeiler3.png │ │ │ └── zeiler4.png │ │ │ ├── lesson1-rf.ipynb │ │ │ ├── lesson2-rf_interpretation.ipynb │ │ │ ├── lesson3-rf_foundations.ipynb │ │ │ ├── lesson4-mnist_sgd.ipynb │ │ │ ├── lesson5-nlp.ipynb │ │ │ └── ppt │ │ │ ├── 2017-12-ethics.pptx │ │ │ └── ml_applications.pptx │ ├── docs │ │ ├── README.md │ │ ├── __init__.py │ │ ├── abbr.md │ │ ├── anatomy.adoc │ │ ├── dataloader.adoc │ │ ├── expand_adoc_templ.ipynb │ │ ├── gen_ascii_docs.py │ │ ├── md_expander.py │ │ ├── module-decisions.md │ │ ├── style.md │ │ ├── templates.py │ │ ├── testing.adoc │ │ ├── transforms-tmpl.adoc │ │ ├── transforms.adoc │ │ └── transforms.html │ ├── environment-cpu.yml │ ├── environment-nopytorch.yml │ ├── environment-old.yml │ ├── environment.yml │ ├── fastai │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── adaptive_softmax.py │ │ ├── column_data.py │ │ ├── conv_learner.py │ │ ├── core.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── executors.py │ │ ├── fp16.py │ │ ├── images │ │ │ └── industrial_fishing.png │ │ ├── imports.py │ │ ├── initializers.py │ │ ├── io.py │ │ ├── layer_optimizer.py │ │ ├── layers.py │ │ ├── learner.py │ │ ├── lm_rnn.py │ │ ├── losses.py │ │ ├── lsuv_initializer.py │ │ ├── metrics.py │ │ ├── model.py │ │ ├── models │ │ │ ├── .gitignore │ │ │ ├── cifar10 │ │ │ │ ├── main.sh │ │ │ │ ├── main_dxy.py │ │ │ │ ├── main_kuangliu.py │ │ │ │ ├── preact_resnet.py │ │ │ │ ├── resnext.py │ │ │ │ ├── senet.py │ │ │ │ ├── utils.py │ │ │ │ ├── utils_kuangliu.py │ │ │ │ └── wideresnet.py │ │ │ ├── convert_torch.py │ │ │ ├── darknet.py │ │ │ ├── fa_resnet.py │ │ │ ├── inceptionresnetv2.py │ │ │ ├── inceptionv4.py │ │ │ ├── nasnet.py │ │ │ ├── resnet.py │ │ │ ├── resnext_101_32x4d.py │ │ │ ├── resnext_101_64x4d.py │ │ │ ├── resnext_50_32x4d.py │ │ │ ├── unet.py │ │ │ ├── wideresnet.py │ │ │ └── wrn_50_2f.py │ │ ├── nlp.py │ │ ├── plots.py │ │ ├── rnn_reg.py │ │ ├── rnn_train.py │ │ ├── set_spawn.py │ │ ├── sgdr.py │ │ ├── structured.py │ │ ├── swa.py │ │ ├── text.py │ │ ├── torch_imports.py │ │ ├── transforms.py │ │ ├── transforms_pil.py │ │ └── utils.py │ ├── pytest.ini │ ├── requirements.txt │ ├── setup.cfg │ ├── setup.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_core.py │ │ ├── test_layer_optimizer.py │ │ ├── test_lsuv_initializer.py │ │ ├── test_samplers.py │ │ └── test_transform.py │ └── tutorials │ │ ├── __init__.py │ │ ├── fastai │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── adaptive_softmax.py │ │ ├── column_data.py │ │ ├── conv_learner.py │ │ ├── core.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── executors.py │ │ ├── fp16.py │ │ ├── images │ │ │ └── industrial_fishing.png │ │ ├── imports.py │ │ ├── initializers.py │ │ ├── io.py │ │ ├── layer_optimizer.py │ │ ├── layers.py │ │ ├── learner.py │ │ ├── lm_rnn.py │ │ ├── losses.py │ │ ├── lsuv_initializer.py │ │ ├── metrics.py │ │ ├── model.py │ │ ├── models │ │ │ ├── .gitignore │ │ │ ├── cifar10 │ │ │ │ ├── main.sh │ │ │ │ ├── main_dxy.py │ │ │ │ ├── main_kuangliu.py │ │ │ │ ├── preact_resnet.py │ │ │ │ ├── resnext.py │ │ │ │ ├── senet.py │ │ │ │ ├── utils.py │ │ │ │ ├── utils_kuangliu.py │ │ │ │ └── wideresnet.py │ │ │ ├── convert_torch.py │ │ │ ├── darknet.py │ │ │ ├── fa_resnet.py │ │ │ ├── inceptionresnetv2.py │ │ │ ├── inceptionv4.py │ │ │ ├── nasnet.py │ │ │ ├── resnet.py │ │ │ ├── resnext_101_32x4d.py │ │ │ ├── resnext_101_64x4d.py │ │ │ ├── resnext_50_32x4d.py │ │ │ ├── unet.py │ │ │ ├── wideresnet.py │ │ │ └── wrn_50_2f.py │ │ ├── nlp.py │ │ ├── plots.py │ │ ├── rnn_reg.py │ │ ├── rnn_train.py │ │ ├── set_spawn.py │ │ ├── sgdr.py │ │ ├── structured.py │ │ ├── swa.py │ │ ├── text.py │ │ ├── torch_imports.py │ │ ├── transforms.py │ │ ├── transforms_pil.py │ │ └── utils.py │ │ ├── images │ │ ├── cifar10.png │ │ ├── demba_combustion_engine.png │ │ ├── digit.gif │ │ ├── fashion-mnist.png │ │ ├── markov_health.jpg │ │ ├── mnist.png │ │ ├── normal.jpg │ │ ├── overfitting.png │ │ ├── overfitting2.png │ │ ├── sgd2.gif │ │ ├── shop.png │ │ ├── what_is_pytorch.png │ │ ├── zeiler1.png │ │ ├── zeiler2.png │ │ ├── zeiler3.png │ │ └── zeiler4.png │ │ ├── kmeans.py │ │ ├── linalg_pytorch.ipynb │ │ └── meanshift.ipynb ├── feature_extractor.py ├── general_utils.py ├── lang_model_utils.py ├── seq2seq_utils.py └── visitor.py ├── pytorch_model ├── README.md ├── codesearcher.py ├── configs.py ├── data.py ├── java │ ├── test.apiseq.h5 │ ├── test.desc.h5 │ ├── test.methname.h5 │ ├── test.rawcode.txt │ ├── test.tokens.h5 │ ├── train.apiseq.h5 │ ├── train.desc.h5 │ ├── train.methname.h5 │ ├── train.tokens.h5 │ ├── use.apiseq.h5 │ ├── use.codevecs.normalized.h5 │ ├── use.methname.h5 │ ├── use.rawcode.txt │ ├── use.tokens.h5 │ ├── vocab.apiseq.pkl │ ├── vocab.desc.pkl │ ├── vocab.methname.pkl │ └── vocab.tokens.pkl ├── models.py ├── python │ ├── small.rawcode.txt │ ├── small.test.apiseq.npy │ ├── small.test.desc.npy │ ├── small.test.methname.npy │ ├── small.test.tokens.npy │ ├── test.apiseq.npy │ ├── test.desc.npy │ ├── test.methname.npy │ ├── test.tokens.npy │ ├── train.apiseq.npy │ ├── train.desc.npy │ ├── train.methname.npy │ ├── train.tokens.npy │ ├── vocab.apiseq.pkl │ ├── vocab.desc.pkl │ ├── vocab.methname.pkl │ └── vocab.tokens.pkl ├── requirements.txt └── utils.py └── screenshot.png /.gitattributes: -------------------------------------------------------------------------------- 1 | pytorch_model/java/* filter=lfs diff=lfs merge=lfs -text 2 | pytorch_model/python/* filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Chintan Shah 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Semantic Code Search 2 | Code for Paper: [Paper](https://drive.google.com/file/d/1FFTmqfuz3ghLGomzGIARA6p0I1jpVnQ3/view?usp=drivesdk) 3 | 4 | Deep Semantic Code Search aims to explore a joint embedding space for code and description vectors and then use it for a code search application. 5 | 6 | In these experiments, there are 2 parts: 7 | 8 | 1. The first one uses an approach suggested in [1] and we train their architecture on our own python dataset. 9 | 2. The second approach expands on the first one through methodology suggested in [2] and we achieve reasonably good results. 10 | 11 | We can observe that some sort of semantic information is captured the results: 12 | 13 | ![Query Results](screenshot.png) 14 | 15 | 16 | ### Instructions on reproducing our results 17 | 18 | Implementation of [1] is within [Joint Training Model](pytorch_model) and [2] is within [Code Summarization Transfer Learning](code_summarization_transfer_learning) 19 | 20 | ### Dataset 21 | 22 | For [1], our dataset is provided within [Joint Training Model](pytorch_model). 23 | For [2], the full dataset is available on [Google Cloud Platform](http://storage.googleapis.com/deep-code-search-models/). 24 | 25 | For how to access data on GCP, please follow this link https://cloud.google.com/storage/docs/access-public-data 26 | 27 | ### References: 28 | 29 | [1] https://guxd.github.io/papers/deepcs.pdf 30 | 31 | [2] https://towardsdatascience.com/semantic-code-search-3cd6d244a39c 32 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/README.md: -------------------------------------------------------------------------------- 1 | ## Code summarization using transfer learning 2 | 3 | 4 | ### How to run? 5 | 6 | These notebooks should be run sequentially using the docker containers provided below. 7 | 8 | 1. The first notebook fetches and creates the dataset. 9 | 2. The second notebook vectorizes the code sequence and description sequence and trains 3 seq2seq models: 10 | * Seq2Seq model from function tokens -> docstring 11 | * Seq2Seq model from api seq -> docstring 12 | * Seq2Seq model from method name -> docstring 13 | 3. This notebook trains an AWD LSTM model for docstring using FastAI's implementation. 14 | 4. This notebooks trains the final joint embedder from code to docstring vectors. 15 | 5. In this notebook, we build a search engine that uses the trained networks to output query results. 16 | 6. This notebook evaluates the model. 17 | 18 | In order to run these sets of notebooks (1 - 6), we would highly suggest using these docker containers: 19 | 20 | #### Docker Containers 21 | 22 | - [hamelsmu/ml-gpu](https://hub.docker.com/r/hamelsmu/ml-gpu/): Use this container for any *gpu* bound parts. 23 | 24 | - [hamelsmu/ml-cpu](https://hub.docker.com/r/hamelsmu/ml-cpu/): Use this container for any *cpu* bound parts. -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .pypirc 3 | ~* 4 | tmp* 5 | sample_data/ 6 | tags 7 | data 8 | 9 | # Byte-compiled / optimized / DLL files 10 | __pycache__/ 11 | *.py[cod] 12 | *$py.class 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | wheels/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | .hypothesis/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # AsciiDoc documentation 76 | docs/fastai/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # dotenv 94 | .env 95 | 96 | # virtualenv 97 | .venv 98 | venv/ 99 | ENV/ 100 | 101 | # Spyder project settings 102 | .spyderproject 103 | .spyproject 104 | 105 | # Rope project settings 106 | .ropeproject 107 | 108 | # mkdocs documentation 109 | /site 110 | 111 | # mypy 112 | .mypy_cache/ 113 | 114 | .vscode 115 | *.swp 116 | 117 | # osx generated files 118 | .DS_Store 119 | .DS_Store? 120 | .Trashes 121 | ehthumbs.db 122 | Thumbs.db 123 | .idea 124 | 125 | # pytest 126 | .pytest_cache 127 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | # We don't actually use the Travis Python, but this keeps it organized. 4 | # - "2.7" 5 | # - "3.5" 6 | - "3.6" 7 | install: 8 | - sudo apt-get update 9 | # We do this conditionally because it saves us some downloading if the 10 | # version is the same. 11 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 12 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; 13 | else 14 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 15 | fi 16 | - bash miniconda.sh -b -p $HOME/miniconda 17 | - export PATH="$HOME/miniconda/bin:$PATH" 18 | - hash -r 19 | - conda config --set always_yes yes --set changeps1 no 20 | - conda update -q conda 21 | # Useful for debugging any issues with conda 22 | - conda info -a 23 | - conda env update -f environment-cpu.yml 24 | - source activate fastai-cpu 25 | - pip install -U pytest 26 | # for some reason the pip section is not supported by conda env update for the time being i've paste it here: 27 | - pip install opencv-python 28 | - pip install graphviz 29 | - pip install torchvision>=0.1.9 30 | - pip install opencv-python 31 | - pip install isoweek 32 | - pip install pandas_summary 33 | - pip install torchtext 34 | - pip install graphviz 35 | - pip install sklearn_pandas 36 | - pip install feather-format 37 | - pip install jupyter_contrib_nbextensions 38 | - pip install plotnine 39 | - pip install awscli 40 | - pip install kaggle-cli 41 | - pip install ipywidgets 42 | - pip install jupyter_contrib_nbextensions 43 | - pip install git+https://github.com/SauceCat/PDPbox.git 44 | 45 | # this libs are required by opencv on ubuntu 16.04 46 | - sudo apt install -y libsm6 libxext6 libxrender-dev libgl1-mesa-glx 47 | 48 | 49 | script: 50 | #- python -m unittest 51 | - python -m pytest tests 52 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.txt 3 | include *.yml 4 | include Makefile 5 | recursive-include fastai/models * 6 | recursive-include fastai/images * 7 | 8 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/README.md: -------------------------------------------------------------------------------- 1 | # fast.ai 2 | The fast.ai deep learning library, lessons, and tutorials. 3 | 4 | Copyright 2017 onwards, Jeremy Howard. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. A copy of the License is provided in the LICENSE file in this repository. 5 | 6 | ## Current Status 7 | This is an alpha version. 8 | 9 | Most of the library is quite well tested since many students have used it to complete the [Practical Deep Learning for Coders](http://course.fast.ai) course. However it hasn't been widely used yet outside of the course, so you may find some missing features or rough edges. 10 | 11 | If you're interested in using the library in your own projects, we're happy to help support any bug fixes or feature additions you need—please use [http://forums.fast.ai](http://forums.fast.ai) to discuss. 12 | 13 | ## To install 14 | 15 | ### Prerequisites 16 | * [Anaconda](https://conda.io/docs/user-guide/install/index.html#), manages Python environment and dependencies 17 | 18 | ### Normal installation 19 | 1. Download project: `git clone https://github.com/fastai/fastai.git` 20 | 1. Move into root folder: `cd fastai` 21 | 1. Set up Python environment: `conda env update` 22 | 1. Activate Python environment: `conda activate fastai` 23 | - If this fails, use instead: `source activate fastai` 24 | 25 | ### Install as pip package 26 | You can also install this library in the local environment using `pip` 27 | 28 | `pip install fastai` 29 | 30 | However this is not currently the recommended approach, since the library is being updated much more frequently than the pip release, fewer people are using and testing the pip version, and pip needs to compile many libraries from scratch (which can be slow). 31 | 32 | ### CPU only environment 33 | Use this if you do not have an NVidia GPU. Note you are encouraged to use Paperspace to access a GPU in the cloud by following this [guide](https://github.com/reshamas/fastai_deeplearn_part1/blob/master/tools/paperspace.md). 34 | 35 | `conda env update -f environment-cpu.yml` 36 | 37 | ## To update 38 | To update everything at any time: 39 | 40 | 1. Update code: `git pull` 41 | 1. Update dependencies: `conda env update` 42 | 43 | ## To test 44 | Before submitting a pull request, run the unit tests: 45 | 46 | 1. Activate Python environment: `conda activate fastai` 47 | - If this fails, use instead: `source activate fastai` 48 | 1. Run tests: `python -m pytest tests` 49 | 50 | ### To run specific test file 51 | 1. Activate Python environment: `conda activate fastai` 52 | - If this fails, use instead: `source activate fastai` 53 | 1. `python -m pytest tests/[file_name.py]` 54 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/.gitignore: -------------------------------------------------------------------------------- 1 | *-Copy?.ipynb 2 | *-Copy??.ipynb 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/excel/collab_filter.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/collab_filter.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/excel/conv-example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/conv-example.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/excel/entropy_example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/entropy_example.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/excel/graddesc.xlsm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/graddesc.xlsm -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/excel/layers_example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/excel/layers_example.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/fastai/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/executors.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | 7 | class LazyThreadPoolExecutor(ThreadPoolExecutor): 8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None): 9 | """ 10 | Collects iterables lazily, rather than immediately. 11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor 12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707 13 | """ 14 | if timeout is not None: end_time = timeout + time.time() 15 | if prefetch is None: prefetch = self._max_workers 16 | if prefetch < 0: raise ValueError("prefetch count may not be negative") 17 | argsiter = zip(*iterables) 18 | fs = collections.deque(self.submit(fn, *args) for args in 19 | itertools.islice(argsiter, self._max_workers + prefetch)) 20 | 21 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required. 22 | def result_iterator(): 23 | nonlocal argsiter 24 | try: 25 | while fs: 26 | res = fs[0].result() if timeout is None else fs[0].result( 27 | end_time - time.time()) 28 | # Got a result, future needn't be cancelled 29 | del fs[0] 30 | # Dispatch next task before yielding to keep pipeline full 31 | if argsiter: 32 | try: 33 | args = next(argsiter) 34 | except StopIteration: 35 | argsiter = None 36 | else: 37 | fs.append(self.submit(fn, *args)) 38 | yield res 39 | finally: 40 | for future in fs: future.cancel() 41 | 42 | return result_iterator() 43 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/fp16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FP16(nn.Module): 6 | def __init__(self, module): 7 | super(FP16, self).__init__() 8 | self.module = batchnorm_to_fp32(module.half()) 9 | 10 | def forward(self, input): 11 | return self.module(input.half()) 12 | 13 | def load_state_dict(self, *inputs, **kwargs): 14 | self.module.load_state_dict(*inputs, **kwargs) 15 | 16 | def state_dict(self, *inputs, **kwargs): 17 | return self.module.state_dict(*inputs, **kwargs) 18 | 19 | 20 | def batchnorm_to_fp32(module): 21 | ''' 22 | BatchNorm layers to have parameters in single precision. 23 | Find all layers and convert them back to float. This can't 24 | be done with built in .apply as that function will apply 25 | fn to all modules, parameters, and buffers. Thus we wouldn't 26 | be able to guard the float conversion based on the module type. 27 | ''' 28 | if isinstance(module, nn.modules.batchnorm._BatchNorm): 29 | module.float() 30 | for child in module.children(): 31 | batchnorm_to_fp32(child) 32 | return module 33 | 34 | 35 | def copy_model_to_fp32(m, optim): 36 | """ Creates a fp32 copy of model parameters and sets optimizer parameters 37 | """ 38 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in 39 | m.parameters()] 40 | optim_groups = [group['params'] for group in optim.param_groups] 41 | iter_fp32_params = iter(fp32_params) 42 | for group_params in optim_groups: 43 | for i in range(len(group_params)): 44 | fp32_param = next(iter_fp32_params) 45 | fp32_param.requires_grad = group_params[i].requires_grad 46 | group_params[i] = fp32_param 47 | return fp32_params 48 | 49 | 50 | def copy_fp32_to_model(m, fp32_params): 51 | m_params = list(m.parameters()) 52 | for fp32_param, m_param in zip(fp32_params, m_params): 53 | m_param.data.copy_(fp32_param.data) 54 | 55 | 56 | def update_fp32_grads(fp32_params, m): 57 | m_params = list(m.parameters()) 58 | for fp32_param, m_param in zip(fp32_params, m_params): 59 | if fp32_param.grad is None: 60 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size())) 61 | fp32_param.grad.data.copy_(m_param.grad.data) 62 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/images/industrial_fishing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/fastai/images/industrial_fishing.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/imports.py: -------------------------------------------------------------------------------- 1 | from IPython.lib.deepreload import reload as dreload 2 | import PIL, os, numpy as np, threading, json, bcolz, scipy 3 | import pandas as pd, pickle, string, sys, re, time, shutil, copy 4 | import seaborn as sns, matplotlib 5 | from abc import abstractmethod 6 | from functools import partial 7 | from pandas_summary import DataFrameSummary 8 | from IPython.lib.display import FileLink 9 | from sklearn import metrics, ensemble, preprocessing 10 | from operator import itemgetter, attrgetter 11 | 12 | from matplotlib import pyplot as plt, rcParams, animation 13 | 14 | matplotlib.rc('animation', html='html5') 15 | np.set_printoptions(precision=5, linewidth=110, suppress=True) 16 | 17 | from ipykernel.kernelapp import IPKernelApp 18 | 19 | 20 | def in_notebook(): return IPKernelApp.initialized() 21 | 22 | 23 | def in_ipynb(): 24 | try: 25 | cls = get_ipython().__class__.__name__ 26 | return cls == 'ZMQInteractiveShell' 27 | except NameError: 28 | return False 29 | 30 | 31 | import tqdm as tq 32 | 33 | 34 | def clear_tqdm(): 35 | inst = getattr(tq.tqdm, '_instances', None) 36 | if not inst: return 37 | try: 38 | for i in range(len(inst)): inst.pop().close() 39 | except Exception: 40 | pass 41 | 42 | 43 | if in_notebook(): 44 | def tqdm(*args, **kwargs): 45 | clear_tqdm() 46 | return tq.tqdm(*args, file=sys.stdout, **kwargs) 47 | 48 | 49 | def trange(*args, **kwargs): 50 | clear_tqdm() 51 | return tq.trange(*args, file=sys.stdout, **kwargs) 52 | else: 53 | from tqdm import tqdm, trange 54 | 55 | tnrange = trange 56 | tqdm_notebook = tqdm 57 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/initializers.py: -------------------------------------------------------------------------------- 1 | def cond_init(m, init_fn): 2 | if not isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 3 | if hasattr(m, 'weight'): init_fn(m.weight) 4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.) 5 | 6 | 7 | def apply_init(m, init_fn): 8 | m.apply(lambda x: cond_init(x, init_fn)) 9 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/io.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlretrieve 2 | 3 | from tqdm import tqdm 4 | 5 | from .torch_imports import * 6 | 7 | 8 | class TqdmUpTo(tqdm): 9 | def update_to(self, b=1, bsize=1, tsize=None): 10 | if tsize is not None: self.total = tsize 11 | self.update(b * bsize - self.n) 12 | 13 | 14 | def get_data(url, filename): 15 | if not os.path.exists(filename): 16 | 17 | dirname = os.path.dirname(filename) 18 | if not os.path.exists(dirname): 19 | os.makedirs(dirname) 20 | 21 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 22 | urlretrieve(url, filename, reporthook=t.update_to) 23 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/layers.py: -------------------------------------------------------------------------------- 1 | class AdaptiveConcatPool2d(nn.Module): 2 | def __init__(self, sz=None): 3 | super().__init__() 4 | sz = sz or (1, 1) 5 | self.ap = nn.AdaptiveAvgPool2d(sz) 6 | self.mp = nn.AdaptiveMaxPool2d(sz) 7 | 8 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1) 9 | 10 | 11 | class Lambda(nn.Module): 12 | def __init__(self, f): super().__init__(); self.f = f 13 | 14 | def forward(self, x): return self.f(x) 15 | 16 | 17 | class Flatten(nn.Module): 18 | def __init__(self): super().__init__() 19 | 20 | def forward(self, x): return x.view(x.size(0), -1) 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/losses.py: -------------------------------------------------------------------------------- 1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9): 2 | y_pred = (y_pred.float() > threshold).float() 3 | y_true = y_true.float() 4 | tp = (y_pred * y_true).sum(dim=1) 5 | precision = tp / (y_pred.sum(dim=1) + eps) 6 | recall = tp / (y_true.sum(dim=1) + eps) 7 | return torch.mean( 8 | precision * recall / (precision * (beta ** 2) + recall + eps) * (1 + beta ** 2)) 9 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/metrics.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | def accuracy_np(preds, targs): 5 | preds = np.argmax(preds, 1) 6 | return (preds == targs).mean() 7 | 8 | 9 | def accuracy(preds, targs): 10 | preds = torch.max(preds, dim=1)[1] 11 | return (preds == targs).float().mean() 12 | 13 | 14 | def accuracy_thresh(thresh): 15 | return lambda preds, targs: accuracy_multi(preds, targs, thresh) 16 | 17 | 18 | def accuracy_multi(preds, targs, thresh): 19 | return ((preds > thresh).float() == targs).float().mean() 20 | 21 | 22 | def accuracy_multi_np(preds, targs, thresh): 23 | return ((preds > thresh) == targs).mean() 24 | 25 | 26 | def recall(preds, targs, thresh=0.5): 27 | pred_pos = preds > thresh 28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 29 | return tpos.sum() / targs.sum() 30 | 31 | 32 | def precision(preds, targs, thresh=0.5): 33 | pred_pos = preds > thresh 34 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 35 | return tpos.sum() / pred_pos.sum() 36 | 37 | 38 | def fbeta(preds, targs, beta, thresh=0.5): 39 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall). 40 | This is the micro averaged version where the true positives, false negatives and 41 | false positives are calculated globally (as opposed to on a per label basis). 42 | 43 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and 44 | beta > 1 favors recall. 45 | """ 46 | assert beta > 0, 'beta needs to be greater than 0' 47 | beta2 = beta ** 2 48 | rec = recall(preds, targs, thresh) 49 | prec = precision(preds, targs, thresh) 50 | return (1 + beta2) * prec * rec / (beta2 * prec + rec) 51 | 52 | 53 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh) 54 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.tar 3 | checkpoint* 4 | log* 5 | wgts/ 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/cifar10/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py --lr=0.1 4 | python main.py --resume --lr=0.01 5 | python main.py --resume --lr=0.001 6 | 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/models/darknet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layers import * 4 | from .layers import * 5 | 6 | 7 | class ConvBN(nn.Module): 8 | "convolutional layer then batchnorm" 9 | 10 | def __init__(self, ch_in, ch_out, kernel_size=3, stride=1, padding=0): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, 13 | padding=padding, bias=False) 14 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) 15 | self.relu = nn.LeakyReLU(0.1, inplace=True) 16 | 17 | def forward(self, x): return self.relu(self.bn(self.conv(x))) 18 | 19 | 20 | class DarknetBlock(nn.Module): 21 | def __init__(self, ch_in): 22 | super().__init__() 23 | ch_hid = ch_in // 2 24 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) 25 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) 26 | 27 | def forward(self, x): return self.conv2(self.conv1(x)) + x 28 | 29 | 30 | class Darknet(nn.Module): 31 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)" 32 | 33 | def make_group_layer(self, ch_in, num_blocks, stride=1): 34 | layers = [ConvBN(ch_in, ch_in * 2, stride=stride)] 35 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in * 2)) 36 | return layers 37 | 38 | def __init__(self, num_blocks, num_classes=1000, start_nf=32): 39 | super().__init__() 40 | nf = start_nf 41 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)] 42 | for i, nb in enumerate(num_blocks): 43 | layers += self.make_group_layer(nf, nb, stride=(1 if i == 1 else 2)) 44 | nf *= 2 45 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] 46 | self.layers = nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | return self.layers(x) 50 | 51 | 52 | def darknet_53(num_classes=1000): return Darknet([1, 2, 8, 8, 4], num_classes) 53 | 54 | 55 | def darknet_small(num_classes=1000): return Darknet([1, 2, 4, 8, 4], num_classes) 56 | 57 | 58 | def darknet_mini(num_classes=1000): return Darknet([1, 2, 4, 4, 2], num_classes, start_nf=24) 59 | 60 | 61 | def darknet_mini2(num_classes=1000): return Darknet([1, 2, 8, 8, 4], num_classes, start_nf=16) 62 | 63 | 64 | def darknet_mini3(num_classes=1000): return Darknet([1, 2, 4, 4], num_classes) 65 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/rnn_train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/set_spawn.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import set_start_method 2 | set_start_method('spawn') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/fastai/transforms_pil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h) 30 | x = np.random.randint(w) 31 | 32 | y1 = np.clip(y - self.length / 2, 0, h) 33 | y2 = np.clip(y + self.length / 2, 0, h) 34 | x1 = np.clip(x - self.length / 2, 0, w) 35 | x2 = np.clip(x + self.length / 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/pretrained.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/pretrained.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/sgdr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/sgdr.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler1.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler3.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/images/zeiler4.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/planet.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from fastai.dataset import * 4 | from fastai.imports import * 5 | from fastai.transforms import * 6 | from sklearn.metrics import fbeta_score 7 | 8 | 9 | def f2(preds, targs, start=0.17, end=0.24, step=0.01): 10 | with warnings.catch_warnings(): 11 | warnings.simplefilter("ignore") 12 | return max([fbeta_score(targs, (preds>th), 2, average='samples') 13 | for th in np.arange(start,end,step)]) 14 | 15 | def opt_th(preds, targs, start=0.17, end=0.24, step=0.01): 16 | ths = np.arange(start,end,step) 17 | idx = np.argmax([fbeta_score(targs, (preds>th), 2, average='samples') 18 | for th in ths]) 19 | return ths[idx] 20 | 21 | def get_data(path, tfms,bs, n, cv_idx): 22 | val_idxs = get_cv_idxs(n, cv_idx) 23 | return ImageClassifierData.from_csv(path, 'train-jpg', f'{path}train_v2.csv', bs, tfms, 24 | suffix='.jpg', val_idxs=val_idxs, test_name='test-jpg') 25 | 26 | def get_data_zoom(f_model, path, sz, bs, n, cv_idx): 27 | tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_top_down, max_zoom=1.05) 28 | return get_data(path, tfms, bs, n, cv_idx) 29 | 30 | def get_data_pad(f_model, path, sz, bs, n, cv_idx): 31 | transforms_pt = [RandomRotateZoom(9, 0.18, 0.1), RandomLighting(0.05, 0.1), RandomDihedral()] 32 | tfms = tfms_from_model(f_model, sz, aug_tfms=transforms_pt, pad=sz//12) 33 | return get_data(path, tfms, bs, n, cv_idx) 34 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/ppt/lesson6.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl1/ppt/lesson6.pptx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl1/scripts/train_planet.py: -------------------------------------------------------------------------------- 1 | from dataset_pt import * 2 | from fast_gen import * 3 | from learner import * 4 | from planet import * 5 | from pt_models import * 6 | from sgdr_pt import * 7 | 8 | bs=64; f_model = resnet34 9 | path = "/data/jhoward/fast/planet/" 10 | cv_idx = int(sys.argv[1]) 11 | torch.cuda.set_device(cv_idx % 4) 12 | if cv_idx==1: torch.cuda.set_device(2) 13 | n=len(list(open(f'{path}train_v2.csv')))-1 14 | 15 | def train_sz(sz, load=None, save_name=None, suf=None): 16 | print(f'\n***** {sz} *****') 17 | #data=get_data_pad(f_model, path, sz, bs, n, cv_idx) 18 | data=get_data_zoom(f_model, path, sz, bs, n, cv_idx) 19 | learn = Learner.pretrained_convnet(f_model, data, metrics=[f2]) 20 | if load: learn.load(f'{load}_{cv_idx}{suf}') 21 | print('--- FC') 22 | learn.fit(0.3, 2, cycle_len=1) 23 | print('--- Gradual') 24 | for i in range(6,3,-1): 25 | learn.freeze_to(i) 26 | learn.fit(0.1*(i-3), 1, cycle_len=1) 27 | learn.unfreeze() 28 | print('--- All') 29 | learn.fit(0.2, 15, cycle_len=3, cycle_save_name=f'{save_name}{suf}') 30 | learn.save(f'{sz}_{cv_idx}{suf}') 31 | 32 | suf='_zoom' 33 | train_sz(64, suf=suf) 34 | train_sz(128, load=64, suf=suf) 35 | train_sz(244, load=128, save_name=f'170809_{cv_idx}', suf=suf) 36 | 37 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/.gitignore: -------------------------------------------------------------------------------- 1 | checkpoints/ 2 | *-Copy?.ipynb 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/.gitignore: -------------------------------------------------------------------------------- 1 | !data 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/aligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | 4 | import torch 5 | import torchvision.transforms as transforms 6 | from PIL import Image 7 | 8 | from .base_dataset import BaseDataset 9 | from .image_folder import make_dataset 10 | 11 | 12 | class AlignedDataset(BaseDataset): 13 | def initialize(self, opt): 14 | self.opt = opt 15 | self.root = opt.dataroot 16 | self.dir_AB = os.path.join(opt.dataroot, opt.phase) 17 | self.AB_paths = sorted(make_dataset(self.dir_AB)) 18 | assert(opt.resize_or_crop == 'resize_and_crop') 19 | 20 | def __getitem__(self, index): 21 | AB_path = self.AB_paths[index] 22 | AB = Image.open(AB_path).convert('RGB') 23 | w, h = AB.size 24 | w2 = int(w / 2) 25 | A = AB.crop((0, 0, w2, h)).resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC) 26 | B = AB.crop((w2, 0, w, h)).resize((self.opt.loadSize, self.opt.loadSize), Image.BICUBIC) 27 | A = transforms.ToTensor()(A) 28 | B = transforms.ToTensor()(B) 29 | w_offset = random.randint(0, max(0, self.opt.loadSize - self.opt.fineSize - 1)) 30 | h_offset = random.randint(0, max(0, self.opt.loadSize - self.opt.fineSize - 1)) 31 | 32 | A = A[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize] 33 | B = B[:, h_offset:h_offset + self.opt.fineSize, w_offset:w_offset + self.opt.fineSize] 34 | 35 | A = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(A) 36 | B = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))(B) 37 | 38 | if self.opt.which_direction == 'BtoA': 39 | input_nc = self.opt.output_nc 40 | output_nc = self.opt.input_nc 41 | else: 42 | input_nc = self.opt.input_nc 43 | output_nc = self.opt.output_nc 44 | 45 | if (not self.opt.no_flip) and random.random() < 0.5: 46 | idx = [i for i in range(A.size(2) - 1, -1, -1)] 47 | idx = torch.LongTensor(idx) 48 | A = A.index_select(2, idx) 49 | B = B.index_select(2, idx) 50 | 51 | if input_nc == 1: # RGB to gray 52 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114 53 | A = tmp.unsqueeze(0) 54 | 55 | if output_nc == 1: # RGB to gray 56 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114 57 | B = tmp.unsqueeze(0) 58 | 59 | return {'A': A, 'B': B, 60 | 'A_paths': AB_path, 'B_paths': AB_path} 61 | 62 | def __len__(self): 63 | return len(self.AB_paths) 64 | 65 | def name(self): 66 | return 'AlignedDataset' 67 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/base_data_loader.py: -------------------------------------------------------------------------------- 1 | class BaseDataLoader(): 2 | def __init__(self): pass 3 | def load_data(): return None 4 | def initialize(self, opt): self.opt = opt 5 | 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/base_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import torchvision.transforms as transforms 3 | from PIL import Image 4 | 5 | 6 | class BaseDataset(data.Dataset): 7 | def __init__(self): 8 | super(BaseDataset, self).__init__() 9 | 10 | def name(self): 11 | return 'BaseDataset' 12 | 13 | def initialize(self, opt): 14 | pass 15 | 16 | 17 | def get_transform(opt): 18 | transform_list = [] 19 | if opt.resize_or_crop == 'resize_and_crop': 20 | osize = [opt.loadSize, opt.loadSize] 21 | transform_list.append(transforms.Scale(osize, Image.BICUBIC)) 22 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 23 | elif opt.resize_or_crop == 'crop': 24 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 25 | elif opt.resize_or_crop == 'scale_width': 26 | transform_list.append(transforms.Lambda( 27 | lambda img: __scale_width(img, opt.fineSize))) 28 | elif opt.resize_or_crop == 'scale_width_and_crop': 29 | transform_list.append(transforms.Lambda( 30 | lambda img: __scale_width(img, opt.loadSize))) 31 | transform_list.append(transforms.RandomCrop(opt.fineSize)) 32 | 33 | if opt.isTrain and not opt.no_flip: 34 | transform_list.append(transforms.RandomHorizontalFlip()) 35 | 36 | transform_list += [transforms.ToTensor(), 37 | transforms.Normalize((0.5, 0.5, 0.5), 38 | (0.5, 0.5, 0.5))] 39 | return transforms.Compose(transform_list) 40 | 41 | 42 | def __scale_width(img, target_width): 43 | ow, oh = img.size 44 | if (ow == target_width): 45 | return img 46 | w = target_width 47 | h = int(target_width * oh / ow) 48 | return img.resize((w, h), Image.BICUBIC) 49 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/custom_dataset_data_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | 3 | from .base_data_loader import BaseDataLoader 4 | 5 | 6 | def CreateDataset(opt): 7 | dataset = None 8 | if opt.dataset_mode == 'aligned': 9 | from .aligned_dataset import AlignedDataset 10 | dataset = AlignedDataset() 11 | elif opt.dataset_mode == 'unaligned': 12 | from .unaligned_dataset import UnalignedDataset 13 | dataset = UnalignedDataset() 14 | elif opt.dataset_mode == 'single': 15 | from .single_dataset import SingleDataset 16 | dataset = SingleDataset() 17 | else: 18 | raise ValueError("Dataset [%s] not recognized." % opt.dataset_mode) 19 | 20 | print("dataset [%s] was created" % (dataset.name())) 21 | dataset.initialize(opt) 22 | return dataset 23 | 24 | 25 | class CustomDatasetDataLoader(BaseDataLoader): 26 | def initialize(self, opt): 27 | BaseDataLoader.initialize(self, opt) 28 | self.dataset = CreateDataset(opt) 29 | self.dataloader = torch.utils.data.DataLoader( 30 | self.dataset, batch_size=opt.batchSize, 31 | shuffle=not opt.serial_batches, num_workers=int(opt.nThreads)) 32 | 33 | def __iter__(self): 34 | for i, data in enumerate(self.dataloader): 35 | if i >= self.opt.max_dataset_size: break 36 | yield data 37 | 38 | def name(self): return 'CustomDatasetDataLoader' 39 | def load_data(self): return self 40 | def __len__(self): return min(len(self.dataset), self.opt.max_dataset_size) 41 | 42 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/data_loader.py: -------------------------------------------------------------------------------- 1 | from ..data.custom_dataset_data_loader import CustomDatasetDataLoader 2 | 3 | def CreateDataLoader(opt): 4 | data_loader = CustomDatasetDataLoader() 5 | print(data_loader.name()) 6 | data_loader.initialize(opt) 7 | return data_loader 8 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/image_folder.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Code from 3 | # https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py 4 | # Modified the original code so that it also loads images from the current 5 | # directory as well as the subdirectories 6 | ############################################################################### 7 | 8 | import os 9 | import os.path 10 | 11 | import torch.utils.data as data 12 | from PIL import Image 13 | 14 | IMG_EXTENSIONS = [ 15 | '.jpg', '.JPG', '.jpeg', '.JPEG', 16 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 17 | ] 18 | 19 | 20 | def is_image_file(filename): 21 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 22 | 23 | 24 | def make_dataset(dir): 25 | images = [] 26 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 27 | 28 | for root, _, fnames in sorted(os.walk(dir)): 29 | for fname in fnames: 30 | if is_image_file(fname): 31 | path = os.path.join(root, fname) 32 | images.append(path) 33 | 34 | return images 35 | 36 | 37 | def default_loader(path): 38 | return Image.open(path).convert('RGB') 39 | 40 | 41 | class ImageFolder(data.Dataset): 42 | 43 | def __init__(self, root, transform=None, return_paths=False, 44 | loader=default_loader): 45 | imgs = make_dataset(root) 46 | if len(imgs) == 0: 47 | raise(RuntimeError("Found 0 images in: " + root + "\n" 48 | "Supported image extensions are: " + 49 | ",".join(IMG_EXTENSIONS))) 50 | 51 | self.root = root 52 | self.imgs = imgs 53 | self.transform = transform 54 | self.return_paths = return_paths 55 | self.loader = loader 56 | 57 | def __getitem__(self, index): 58 | path = self.imgs[index] 59 | img = self.loader(path) 60 | if self.transform is not None: 61 | img = self.transform(img) 62 | if self.return_paths: 63 | return img, path 64 | else: 65 | return img 66 | 67 | def __len__(self): 68 | return len(self.imgs) 69 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/single_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | from PIL import Image 4 | 5 | from .base_dataset import BaseDataset, get_transform 6 | from .image_folder import make_dataset 7 | 8 | 9 | class SingleDataset(BaseDataset): 10 | def initialize(self, opt): 11 | self.opt = opt 12 | self.root = opt.dataroot 13 | self.dir_A = os.path.join(opt.dataroot) 14 | 15 | self.A_paths = make_dataset(self.dir_A) 16 | 17 | self.A_paths = sorted(self.A_paths) 18 | 19 | self.transform = get_transform(opt) 20 | 21 | def __getitem__(self, index): 22 | A_path = self.A_paths[index] 23 | A_img = Image.open(A_path).convert('RGB') 24 | A = self.transform(A_img) 25 | if self.opt.which_direction == 'BtoA': 26 | input_nc = self.opt.output_nc 27 | else: 28 | input_nc = self.opt.input_nc 29 | 30 | if input_nc == 1: # RGB to gray 31 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114 32 | A = tmp.unsqueeze(0) 33 | 34 | return {'A': A, 'A_paths': A_path} 35 | 36 | def __len__(self): 37 | return len(self.A_paths) 38 | 39 | def name(self): 40 | return 'SingleImageDataset' 41 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/data/unaligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import random 3 | 4 | from PIL import Image 5 | 6 | from .base_dataset import BaseDataset, get_transform 7 | from .image_folder import make_dataset 8 | 9 | 10 | class UnalignedDataset(BaseDataset): 11 | def initialize(self, opt): 12 | self.opt = opt 13 | self.root = opt.dataroot 14 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') 15 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') 16 | 17 | self.A_paths = make_dataset(self.dir_A) 18 | self.B_paths = make_dataset(self.dir_B) 19 | 20 | self.A_paths = sorted(self.A_paths) 21 | self.B_paths = sorted(self.B_paths) 22 | self.A_size = len(self.A_paths) 23 | self.B_size = len(self.B_paths) 24 | self.transform = get_transform(opt) 25 | 26 | def __getitem__(self, index): 27 | A_path = self.A_paths[index % self.A_size] 28 | if self.opt.serial_batches: 29 | index_B = index % self.B_size 30 | else: 31 | index_B = random.randint(0, self.B_size - 1) 32 | B_path = self.B_paths[index_B] 33 | # print('(A, B) = (%d, %d)' % (index_A, index_B)) 34 | A_img = Image.open(A_path).convert('RGB') 35 | B_img = Image.open(B_path).convert('RGB') 36 | 37 | A = self.transform(A_img) 38 | B = self.transform(B_img) 39 | if self.opt.which_direction == 'BtoA': 40 | input_nc = self.opt.output_nc 41 | output_nc = self.opt.input_nc 42 | else: 43 | input_nc = self.opt.input_nc 44 | output_nc = self.opt.output_nc 45 | 46 | if input_nc == 1: # RGB to gray 47 | tmp = A[0, ...] * 0.299 + A[1, ...] * 0.587 + A[2, ...] * 0.114 48 | A = tmp.unsqueeze(0) 49 | 50 | if output_nc == 1: # RGB to gray 51 | tmp = B[0, ...] * 0.299 + B[1, ...] * 0.587 + B[2, ...] * 0.114 52 | B = tmp.unsqueeze(0) 53 | 54 | return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path} 55 | 56 | def __len__(self): return max(self.A_size, self.B_size) 57 | 58 | def name(self): return 'UnalignedDataset' 59 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/base_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | class BaseModel(): 7 | def name(self): return 'BaseModel' 8 | 9 | def initialize(self, opt): 10 | self.opt = opt 11 | self.gpu_ids = opt.gpu_ids 12 | self.isTrain = opt.isTrain 13 | self.Tensor = torch.cuda.FloatTensor if self.gpu_ids else torch.Tensor 14 | self.save_dir = os.path.join(opt.checkpoints_dir, opt.name) 15 | 16 | def set_input(self, input): self.input = input 17 | def forward(self): pass 18 | def test(self): pass 19 | def get_image_paths(self): pass 20 | def optimize_parameters(self): pass 21 | def get_current_visuals(self): return self.input 22 | def get_current_errors(self): return {} 23 | def save(self, label): pass 24 | 25 | # helper saving function that can be used by subclasses 26 | def save_network(self, network, network_label, epoch_label, gpu_ids): 27 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 28 | save_path = os.path.join(self.save_dir, save_filename) 29 | torch.save(network.cpu().state_dict(), save_path) 30 | if len(gpu_ids) and torch.cuda.is_available(): network.cuda(gpu_ids[0]) 31 | 32 | # helper loading function that can be used by subclasses 33 | def load_network(self, network, network_label, epoch_label): 34 | save_filename = '%s_net_%s.pth' % (epoch_label, network_label) 35 | save_path = os.path.join(self.save_dir, save_filename) 36 | network.load_state_dict(torch.load(save_path)) 37 | 38 | # update learning rate (called once every epoch) 39 | def update_learning_rate(self): 40 | for scheduler in self.schedulers: scheduler.step() 41 | lr = self.optimizers[0].param_groups[0]['lr'] 42 | print('learning rate = %.7f' % lr) 43 | 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/models.py: -------------------------------------------------------------------------------- 1 | def create_model(opt): 2 | model = None 3 | print(opt.model) 4 | if opt.model == 'cycle_gan': 5 | assert(opt.dataset_mode == 'unaligned') 6 | from .cycle_gan_model import CycleGANModel 7 | model = CycleGANModel() 8 | elif opt.model == 'pix2pix': 9 | assert(opt.dataset_mode == 'aligned') 10 | from .pix2pix_model import Pix2PixModel 11 | model = Pix2PixModel() 12 | elif opt.model == 'test': 13 | assert(opt.dataset_mode == 'single') 14 | from .test_model import TestModel 15 | model = TestModel() 16 | else: 17 | raise ValueError("Model [%s] not recognized." % opt.model) 18 | model.initialize(opt) 19 | print("model [%s] was created" % (model.name())) 20 | return model 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/models/test_model.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from torch.autograd import Variable 4 | 5 | from . import networks 6 | from .base_model import BaseModel 7 | 8 | 9 | class TestModel(BaseModel): 10 | def name(self): 11 | return 'TestModel' 12 | 13 | def initialize(self, opt): 14 | assert(not opt.isTrain) 15 | BaseModel.initialize(self, opt) 16 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, 17 | opt.ngf, opt.which_model_netG, 18 | opt.norm, not opt.no_dropout, 19 | opt.init_type, 20 | self.gpu_ids) 21 | which_epoch = opt.which_epoch 22 | self.load_network(self.netG, 'G', which_epoch) 23 | 24 | print('---------- Networks initialized -------------') 25 | networks.print_network(self.netG) 26 | print('-----------------------------------------------') 27 | 28 | def set_input(self, input): 29 | # we need to use single_dataset mode 30 | input_A = input['A'] 31 | if len(self.gpu_ids) > 0: 32 | input_A = input_A.cuda(self.gpu_ids[0], async=True) 33 | self.input_A = input_A 34 | self.image_paths = input['A_paths'] 35 | 36 | def test(self): 37 | self.real_A = Variable(self.input_A) 38 | self.fake_B = self.netG(self.real_A) 39 | 40 | # get image paths 41 | def get_image_paths(self): 42 | return self.image_paths 43 | 44 | def get_current_visuals(self): 45 | real_A = util.tensor2im(self.real_A.data) 46 | fake_B = util.tensor2im(self.fake_B.data) 47 | return OrderedDict([('real_A', real_A), ('fake_B', fake_B)]) 48 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/options/test_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TestOptions(BaseOptions): 5 | def initialize(self): 6 | BaseOptions.initialize(self) 7 | self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 8 | self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 9 | self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 11 | self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') 12 | self.parser.add_argument('--how_many', type=int, default=50, help='how many test images to run') 13 | self.isTrain = False 14 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from data.data_loader import CreateDataLoader 4 | from models.models import create_model 5 | from options.test_options import TestOptions 6 | from util import html 7 | from util.visualizer import Visualizer 8 | 9 | opt = TestOptions().parse() 10 | opt.nThreads = 1 # test code only supports nThreads = 1 11 | opt.batchSize = 1 # test code only supports batchSize = 1 12 | opt.serial_batches = True # no shuffle 13 | opt.no_flip = True # no flip 14 | 15 | data_loader = CreateDataLoader(opt) 16 | dataset = data_loader.load_data() 17 | model = create_model(opt) 18 | visualizer = Visualizer(opt) 19 | # create website 20 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch)) 21 | webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch)) 22 | # test 23 | for i, data in enumerate(dataset): 24 | if i >= opt.how_many: break 25 | model.set_input(data) 26 | model.test() 27 | visuals = model.get_current_visuals() 28 | img_path = model.get_image_paths() 29 | print('%04d: process image... %s' % (i, img_path)) 30 | visualizer.save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio) 31 | 32 | webpage.save() 33 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/train.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | from data.data_loader import CreateDataLoader 4 | from models.models import create_model 5 | from options.train_options import TrainOptions 6 | from util.visualizer import Visualizer 7 | 8 | opt = TrainOptions().parse() 9 | data_loader = CreateDataLoader(opt) 10 | dataset = data_loader.load_data() 11 | dataset_size = len(data_loader) 12 | print('#training images = %d' % dataset_size) 13 | 14 | model = create_model(opt) 15 | visualizer = Visualizer(opt) 16 | total_steps = 0 17 | 18 | for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): 19 | epoch_start_time = time.time() 20 | iter_data_time = time.time() 21 | epoch_iter = 0 22 | 23 | for i, data in enumerate(dataset): 24 | iter_start_time = time.time() 25 | if total_steps % opt.print_freq == 0: 26 | t_data = iter_start_time - iter_data_time 27 | visualizer.reset() 28 | total_steps += opt.batchSize 29 | epoch_iter += opt.batchSize 30 | model.set_input(data) 31 | model.optimize_parameters() 32 | 33 | if total_steps % opt.display_freq == 0: 34 | save_result = total_steps % opt.update_html_freq == 0 35 | visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) 36 | 37 | if total_steps % opt.print_freq == 0: 38 | errors = model.get_current_errors() 39 | t = (time.time() - iter_start_time) / opt.batchSize 40 | visualizer.print_current_errors(epoch, epoch_iter, errors, t, t_data) 41 | if opt.display_id > 0: 42 | visualizer.plot_current_errors(epoch, float(epoch_iter) / dataset_size, opt, errors) 43 | 44 | if total_steps % opt.save_latest_freq == 0: 45 | print('saving the latest model (epoch %d, total_steps %d)' % 46 | (epoch, total_steps)) 47 | model.save('latest') 48 | 49 | iter_data_time = time.time() 50 | if epoch % opt.save_epoch_freq == 0: 51 | print('saving the model at the end of epoch %d, iters %d' % 52 | (epoch, total_steps)) 53 | model.save('latest') 54 | model.save(epoch) 55 | 56 | print('End of epoch %d / %d \t Time Taken: %d sec' % 57 | (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) 58 | model.update_learning_rate() 59 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/html.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import dominate 4 | from dominate.tags import * 5 | 6 | 7 | class HTML: 8 | def __init__(self, web_dir, title, refresh=0): 9 | self.title = title 10 | self.web_dir = web_dir 11 | self.img_dir = os.path.join(self.web_dir, 'images') 12 | if not os.path.exists(self.web_dir): 13 | os.makedirs(self.web_dir) 14 | if not os.path.exists(self.img_dir): 15 | os.makedirs(self.img_dir) 16 | # print(self.img_dir) 17 | 18 | self.doc = dominate.document(title=title) 19 | if refresh > 0: 20 | with self.doc.head: 21 | meta(http_equiv="refresh", content=str(refresh)) 22 | 23 | def get_image_dir(self): 24 | return self.img_dir 25 | 26 | def add_header(self, str): 27 | with self.doc: 28 | h3(str) 29 | 30 | def add_table(self, border=1): 31 | self.t = table(border=border, style="table-layout: fixed;") 32 | self.doc.add(self.t) 33 | 34 | def add_images(self, ims, txts, links, width=400): 35 | self.add_table() 36 | with self.t: 37 | with tr(): 38 | for im, txt, link in zip(ims, txts, links): 39 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 40 | with p(): 41 | with a(href=os.path.join('images', link)): 42 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 43 | br() 44 | p(txt) 45 | 46 | def save(self): 47 | html_file = '%s/index.html' % self.web_dir 48 | f = open(html_file, 'wt') 49 | f.write(self.doc.render()) 50 | f.close() 51 | 52 | 53 | if __name__ == '__main__': 54 | html = HTML('web/', 'test_html') 55 | html.add_header('hello world') 56 | 57 | ims = [] 58 | txts = [] 59 | links = [] 60 | for n in range(4): 61 | ims.append('image_%d.png' % n) 62 | txts.append('text_%d' % n) 63 | links.append('image_%d.png' % n) 64 | html.add_images(ims, txts, links) 65 | html.save() 66 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | from torch.autograd import Variable 5 | 6 | 7 | class ImagePool(): 8 | def __init__(self, pool_size): 9 | self.pool_size = pool_size 10 | if self.pool_size > 0: 11 | self.num_imgs = 0 12 | self.images = [] 13 | 14 | def query(self, images): 15 | if self.pool_size == 0: 16 | return Variable(images) 17 | return_images = [] 18 | for image in images: 19 | image = torch.unsqueeze(image, 0) 20 | if self.num_imgs < self.pool_size: 21 | self.num_imgs = self.num_imgs + 1 22 | self.images.append(image) 23 | return_images.append(image) 24 | else: 25 | p = random.uniform(0, 1) 26 | if p > 0.5: 27 | random_id = random.randint(0, self.pool_size - 1) 28 | tmp = self.images[random_id].clone() 29 | self.images[random_id] = image 30 | return_images.append(tmp) 31 | else: 32 | return_images.append(image) 33 | return_images = Variable(torch.cat(return_images, 0)) 34 | return return_images 35 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/cgan/util/util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | 5 | import numpy as np 6 | import torch 7 | from PIL import Image 8 | 9 | 10 | # Converts a Tensor into a Numpy array 11 | # |imtype|: the desired type of the converted numpy array 12 | def tensor2im(image_tensor, imtype=np.uint8): 13 | image_numpy = image_tensor[0].cpu().float().numpy() 14 | if image_numpy.shape[0] == 1: 15 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 16 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 17 | return image_numpy.astype(imtype) 18 | 19 | 20 | def diagnose_network(net, name='network'): 21 | mean = 0.0 22 | count = 0 23 | for param in net.parameters(): 24 | if param.grad is not None: 25 | mean += torch.mean(torch.abs(param.grad.data)) 26 | count += 1 27 | if count > 0: 28 | mean = mean / count 29 | print(name) 30 | print(mean) 31 | 32 | 33 | def save_image(image_numpy, image_path): 34 | image_pil = Image.fromarray(image_numpy) 35 | image_pil.save(image_path) 36 | 37 | 38 | def print_numpy(x, val=True, shp=False): 39 | x = x.astype(np.float64) 40 | if shp: 41 | print('shape,', x.shape) 42 | if val: 43 | x = x.flatten() 44 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 45 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 46 | 47 | 48 | def mkdirs(paths): 49 | if isinstance(paths, list) and not isinstance(paths, str): 50 | for path in paths: 51 | mkdir(path) 52 | else: 53 | mkdir(paths) 54 | 55 | 56 | def mkdir(path): 57 | if not os.path.exists(path): 58 | os.makedirs(path) 59 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/fastai/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/executors.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | 7 | class LazyThreadPoolExecutor(ThreadPoolExecutor): 8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None): 9 | """ 10 | Collects iterables lazily, rather than immediately. 11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor 12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707 13 | """ 14 | if timeout is not None: end_time = timeout + time.time() 15 | if prefetch is None: prefetch = self._max_workers 16 | if prefetch < 0: raise ValueError("prefetch count may not be negative") 17 | argsiter = zip(*iterables) 18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch)) 19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required. 20 | def result_iterator(): 21 | nonlocal argsiter 22 | try: 23 | while fs: 24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time()) 25 | # Got a result, future needn't be cancelled 26 | del fs[0] 27 | # Dispatch next task before yielding to keep pipeline full 28 | if argsiter: 29 | try: 30 | args = next(argsiter) 31 | except StopIteration: 32 | argsiter = None 33 | else: 34 | fs.append(self.submit(fn, *args)) 35 | yield res 36 | finally: 37 | for future in fs: future.cancel() 38 | return result_iterator() -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/fp16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FP16(nn.Module): 6 | def __init__(self, module): 7 | super(FP16, self).__init__() 8 | self.module = batchnorm_to_fp32(module.half()) 9 | 10 | def forward(self, input): 11 | return self.module(input.half()) 12 | 13 | def load_state_dict(self, *inputs, **kwargs): 14 | self.module.load_state_dict(*inputs, **kwargs) 15 | 16 | def state_dict(self, *inputs, **kwargs): 17 | return self.module.state_dict(*inputs, **kwargs) 18 | 19 | def batchnorm_to_fp32(module): 20 | ''' 21 | BatchNorm layers to have parameters in single precision. 22 | Find all layers and convert them back to float. This can't 23 | be done with built in .apply as that function will apply 24 | fn to all modules, parameters, and buffers. Thus we wouldn't 25 | be able to guard the float conversion based on the module type. 26 | ''' 27 | if isinstance(module, nn.modules.batchnorm._BatchNorm): 28 | module.float() 29 | for child in module.children(): 30 | batchnorm_to_fp32(child) 31 | return module 32 | 33 | def copy_model_to_fp32(m, optim): 34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters 35 | """ 36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()] 37 | optim_groups = [group['params'] for group in optim.param_groups] 38 | iter_fp32_params = iter(fp32_params) 39 | for group_params in optim_groups: 40 | for i in range(len(group_params)): 41 | fp32_param = next(iter_fp32_params) 42 | fp32_param.requires_grad = group_params[i].requires_grad 43 | group_params[i] = fp32_param 44 | return fp32_params 45 | 46 | def copy_fp32_to_model(m, fp32_params): 47 | m_params = list(m.parameters()) 48 | for fp32_param, m_param in zip(fp32_params, m_params): 49 | m_param.data.copy_(fp32_param.data) 50 | 51 | def update_fp32_grads(fp32_params, m): 52 | m_params = list(m.parameters()) 53 | for fp32_param, m_param in zip(fp32_params, m_params): 54 | if fp32_param.grad is None: 55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size())) 56 | fp32_param.grad.data.copy_(m_param.grad.data) 57 | 58 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/images/industrial_fishing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/fastai/images/industrial_fishing.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/imports.py: -------------------------------------------------------------------------------- 1 | from IPython.lib.deepreload import reload as dreload 2 | import os, numpy as np, math, json, bcolz, scipy, cv2 3 | import pandas as pd, pickle, string, sys 4 | import matplotlib 5 | import contextlib 6 | from abc import abstractmethod 7 | from functools import partial 8 | from isoweek import Week 9 | from pandas_summary import DataFrameSummary 10 | from sklearn import metrics, ensemble, preprocessing 11 | from pathlib import Path 12 | from distutils.version import LooseVersion 13 | 14 | from matplotlib import pyplot as plt, rcParams, animation 15 | 16 | matplotlib.rc('animation', html='html5') 17 | np.set_printoptions(precision=5, linewidth=110, suppress=True) 18 | 19 | from ipykernel.kernelapp import IPKernelApp 20 | def in_notebook(): return IPKernelApp.initialized() 21 | 22 | def in_ipynb(): 23 | try: 24 | cls = get_ipython().__class__.__name__ 25 | return cls == 'ZMQInteractiveShell' 26 | except NameError: 27 | return False 28 | 29 | import tqdm as tq 30 | from tqdm import tqdm_notebook, tnrange 31 | 32 | def clear_tqdm(): 33 | inst = getattr(tq.tqdm, '_instances', None) 34 | if not inst: return 35 | try: 36 | for i in range(len(inst)): inst.pop().close() 37 | except Exception: 38 | pass 39 | 40 | if in_notebook(): 41 | def tqdm(*args, **kwargs): 42 | clear_tqdm() 43 | return tq.tqdm(*args, file=sys.stdout, **kwargs) 44 | def trange(*args, **kwargs): 45 | clear_tqdm() 46 | return tq.trange(*args, file=sys.stdout, **kwargs) 47 | else: 48 | from tqdm import tqdm, trange 49 | tnrange=trange 50 | tqdm_notebook=tqdm 51 | 52 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/initializers.py: -------------------------------------------------------------------------------- 1 | def cond_init(m, init_fn): 2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)): 3 | if hasattr(m, 'weight'): init_fn(m.weight) 4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.) 5 | 6 | def apply_init(m, init_fn): 7 | m.apply(lambda x: cond_init(x, init_fn)) 8 | 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/io.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlretrieve 2 | 3 | from tqdm import tqdm 4 | 5 | 6 | class TqdmUpTo(tqdm): 7 | def update_to(self, b=1, bsize=1, tsize=None): 8 | if tsize is not None: self.total = tsize 9 | self.update(b * bsize - self.n) 10 | 11 | def get_data(url, filename): 12 | if not os.path.exists(filename): 13 | 14 | dirname = os.path.dirname(filename) 15 | if not os.path.exists(dirname): 16 | os.makedirs(dirname) 17 | 18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 19 | urlretrieve(url, filename, reporthook=t.update_to) 20 | 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/layer_optimizer.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | 3 | def opt_params(parm, lr, wd): 4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd} 5 | 6 | class LayerOptimizer(): 7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None): 8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups] 9 | if not isinstance(lrs, Iterable): lrs=[lrs] 10 | if len(lrs)==1: lrs=lrs*len(layer_groups) 11 | if wds is None: wds=0. 12 | if not isinstance(wds, Iterable): wds=[wds] 13 | if len(wds)==1: wds=wds*len(layer_groups) 14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds 15 | self.opt = opt_fn(self.opt_params()) 16 | 17 | def opt_params(self): 18 | assert(len(self.layer_groups) == len(self.lrs)) 19 | assert(len(self.layer_groups) == len(self.wds)) 20 | params = list(zip(self.layer_groups,self.lrs,self.wds)) 21 | return [opt_params(*p) for p in params] 22 | 23 | @property 24 | def lr(self): return self.lrs[-1] 25 | 26 | @property 27 | def mom(self): 28 | if 'betas' in self.opt.param_groups[0]: 29 | return self.opt.param_groups[0]['betas'][0] 30 | else: 31 | return self.opt.param_groups[0]['momentum'] 32 | 33 | def set_lrs(self, lrs): 34 | if not isinstance(lrs, Iterable): lrs=[lrs] 35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups) 36 | set_lrs(self.opt, lrs) 37 | self.lrs=lrs 38 | 39 | def set_wds(self, wds): 40 | if not isinstance(wds, Iterable): wds=[wds] 41 | if len(wds)==1: wds=wds*len(self.layer_groups) 42 | set_wds(self.opt, wds) 43 | self.wds=wds 44 | 45 | def set_mom(self,momentum): 46 | if 'betas' in self.opt.param_groups[0]: 47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1]) 48 | else: 49 | for pg in self.opt.param_groups: pg['momentum'] = momentum 50 | 51 | def set_beta(self,beta): 52 | if 'betas' in self.opt.param_groups[0]: 53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta) 54 | elif 'alpha' in self.opt.param_groups[0]: 55 | for pg in self.opt.param_groups: pg['alpha'] = beta 56 | 57 | def set_opt_fn(self, opt_fn): 58 | if type(self.opt) != type(opt_fn(self.opt_params())): 59 | self.opt = opt_fn(self.opt_params()) 60 | 61 | def zip_strict_(l, r): 62 | assert(len(l) == len(r)) 63 | return zip(l, r) 64 | 65 | def set_lrs(opt, lrs): 66 | if not isinstance(lrs, Iterable): lrs=[lrs] 67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups) 68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr 69 | 70 | def set_wds(opt, wds): 71 | if not isinstance(wds, Iterable): wds=[wds] 72 | if len(wds)==1: wds=wds*len(opt.param_groups) 73 | assert(len(opt.param_groups) == len(wds)) 74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd 75 | 76 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/layers.py: -------------------------------------------------------------------------------- 1 | class AdaptiveConcatPool2d(nn.Module): 2 | def __init__(self, sz=None): 3 | super().__init__() 4 | sz = sz or (1,1) 5 | self.ap = nn.AdaptiveAvgPool2d(sz) 6 | self.mp = nn.AdaptiveMaxPool2d(sz) 7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1) 8 | 9 | class Lambda(nn.Module): 10 | def __init__(self, f): super().__init__(); self.f=f 11 | def forward(self, x): return self.f(x) 12 | 13 | class Flatten(nn.Module): 14 | def __init__(self): super().__init__() 15 | def forward(self, x): return x.view(x.size(0), -1) 16 | 17 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/losses.py: -------------------------------------------------------------------------------- 1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9): 2 | y_pred = (y_pred.float() > threshold).float() 3 | y_true = y_true.float() 4 | tp = (y_pred * y_true).sum(dim=1) 5 | precision = tp / (y_pred.sum(dim=1)+eps) 6 | recall = tp / (y_true.sum(dim=1)+eps) 7 | return torch.mean( 8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2)) 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/metrics.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | def accuracy_np(preds, targs): 5 | preds = np.argmax(preds, 1) 6 | return (preds==targs).mean() 7 | 8 | def accuracy(preds, targs): 9 | preds = torch.max(preds, dim=1)[1] 10 | return (preds==targs).float().mean() 11 | 12 | def accuracy_thresh(thresh): 13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh) 14 | 15 | def accuracy_multi(preds, targs, thresh): 16 | return ((preds>thresh).float()==targs).float().mean() 17 | 18 | def accuracy_multi_np(preds, targs, thresh): 19 | return ((preds>thresh)==targs).mean() 20 | 21 | def recall(preds, targs, thresh=0.5): 22 | pred_pos = preds > thresh 23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 24 | return tpos.sum()/targs.sum() 25 | 26 | def precision(preds, targs, thresh=0.5): 27 | pred_pos = preds > thresh 28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 29 | return tpos.sum()/pred_pos.sum() 30 | 31 | def fbeta(preds, targs, beta, thresh=0.5): 32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall). 33 | This is the micro averaged version where the true positives, false negatives and 34 | false positives are calculated globally (as opposed to on a per label basis). 35 | 36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and 37 | beta > 1 favors recall. 38 | """ 39 | assert beta > 0, 'beta needs to be greater than 0' 40 | beta2 = beta ** 2 41 | rec = recall(preds, targs, thresh) 42 | prec = precision(preds, targs, thresh) 43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec) 44 | 45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh) 46 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.tar 3 | checkpoint* 4 | log* 5 | wgts/ 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/cifar10/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py --lr=0.1 4 | python main.py --resume --lr=0.01 5 | python main.py --resume --lr=0.001 6 | 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/cifar10/wideresnet.py: -------------------------------------------------------------------------------- 1 | # Cifar10 Wideresnet for Dawn Submission 2 | 3 | from ...layers import * 4 | 5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False) 6 | 7 | def bn(ni, init_zero=False): 8 | m = nn.BatchNorm2d(ni) 9 | m.weight.data.fill_(0 if init_zero else 1) 10 | m.bias.data.zero_() 11 | return m 12 | 13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False): 14 | bn_initzero = bn(ni, init_zero=init_zero) 15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride)) 16 | 17 | def noop(x): return x 18 | 19 | class BasicBlock(nn.Module): 20 | def __init__(self, ni, nf, stride, drop_p=0.0): 21 | super().__init__() 22 | self.bn = nn.BatchNorm2d(ni) 23 | self.conv1 = conv_2d(ni, nf, 3, stride) 24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1) 25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None 26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop 27 | 28 | def forward(self, x): 29 | x2 = F.relu(self.bn(x), inplace=True) 30 | r = self.shortcut(x2) 31 | x = self.conv1(x2) 32 | if self.drop: x = self.drop(x) 33 | x = self.conv2(x) * 0.2 34 | return x.add_(r) 35 | 36 | 37 | def _make_group(N, ni, nf, block, stride, drop_p): 38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)] 39 | 40 | class WideResNet(nn.Module): 41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16): 42 | super().__init__() 43 | n_channels = [start_nf] 44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k) 45 | 46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1 47 | for i in range(num_groups): 48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p) 49 | 50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), 51 | Flatten(), nn.Linear(n_channels[3], num_classes)] 52 | self.features = nn.Sequential(*layers) 53 | 54 | def forward(self, x): return self.features(x) 55 | 56 | 57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.) 58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.) 59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.) 60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2) 61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.) 62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.) 63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2) 64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2) 65 | 66 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/models/darknet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layers import * 4 | from .layers import * 5 | 6 | 7 | class ConvBN(nn.Module): 8 | "convolutional layer then batchnorm" 9 | 10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) 13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) 14 | self.relu = nn.LeakyReLU(0.1, inplace=True) 15 | 16 | def forward(self, x): return self.relu(self.bn(self.conv(x))) 17 | 18 | class DarknetBlock(nn.Module): 19 | def __init__(self, ch_in): 20 | super().__init__() 21 | ch_hid = ch_in//2 22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) 23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, x): return self.conv2(self.conv1(x)) + x 26 | 27 | class Darknet(nn.Module): 28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)" 29 | 30 | def make_group_layer(self, ch_in, num_blocks, stride=1): 31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)] 32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2)) 33 | return layers 34 | 35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32): 36 | super().__init__() 37 | nf = start_nf 38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)] 39 | for i,nb in enumerate(num_blocks): 40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2)) 41 | nf *= 2 42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] 43 | self.layers = nn.Sequential(*layers) 44 | 45 | def forward(self, x): return self.layers(x) 46 | 47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes) 48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes) 49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24) 50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16) 51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes) 52 | 53 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/rnn_train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/set_spawn.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import set_start_method 2 | set_start_method('spawn') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/fastai/transforms_pil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h) 30 | x = np.random.randint(w) 31 | 32 | y1 = np.clip(y - self.length / 2, 0, h) 33 | y2 = np.clip(y + self.length / 2, 0, h) 34 | x1 = np.clip(x - self.length / 2, 0, w) 35 | x2 = np.clip(x + self.length / 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/create_toks.py: -------------------------------------------------------------------------------- 1 | import html 2 | 3 | import fire 4 | from fastai.text import * 5 | 6 | BOS = 'xbos' # beginning-of-sentence tag 7 | FLD = 'xfld' # data field tag 8 | 9 | re1 = re.compile(r' +') 10 | 11 | 12 | def fixup(x): 13 | x = x.replace('#39;', "'").replace('amp;', '&').replace('#146;', "'").replace( 14 | 'nbsp;', ' ').replace('#36;', '$').replace('\\n', "\n").replace('quot;', "'").replace( 15 | '
', "\n").replace('\\"', '"').replace('','u_n').replace(' @.@ ','.').replace( 16 | ' @-@ ','-').replace('\\', ' \\ ') 17 | return re1.sub(' ', html.unescape(x)) 18 | 19 | 20 | def get_texts(df, n_lbls): 21 | if len(df.columns) == 1: 22 | labels = [] 23 | texts = f'\n{BOS} {FLD} 1 ' + df[0].astype(str) 24 | texts = texts.apply(fixup).values.astype(str) 25 | else: 26 | labels = df.iloc[:,range(n_lbls)].values.astype(np.int64) 27 | texts = f'\n{BOS} {FLD} 1 ' + df[n_lbls].astype(str) 28 | for i in range(n_lbls+1, len(df.columns)): texts += f' {FLD} {i-n_lbls} ' + df[i].astype(str) 29 | texts = texts.apply(fixup).values.astype(str) 30 | 31 | tok = Tokenizer().proc_all_mp(partition_by_cores(texts)) 32 | return tok, list(labels) 33 | 34 | 35 | def get_all(df, n_lbls): 36 | tok, labels = [], [] 37 | for i, r in enumerate(df): 38 | print(i) 39 | tok_, labels_ = get_texts(r, n_lbls) 40 | tok += tok_; 41 | labels += labels_ 42 | return tok, labels 43 | 44 | 45 | def create_toks(prefix, pr_abbr, chunksize=24000, n_lbls=1): 46 | PATH = f'data/nlp_clas/{prefix}/' 47 | 48 | df_trn = pd.read_csv(f'{PATH}train.csv', header=None, chunksize=chunksize) 49 | df_val = pd.read_csv(f'{PATH}test.csv', header=None, chunksize=chunksize) 50 | print(prefix) 51 | 52 | os.makedirs(f'{PATH}tmp', exist_ok=True) 53 | tok_trn, trn_labels = get_all(df_trn, n_lbls) 54 | tok_val, val_labels = get_all(df_val, n_lbls) 55 | 56 | np.save(f'{PATH}tmp/tok_trn.npy', tok_trn) 57 | np.save(f'{PATH}tmp/tok_val.npy', tok_val) 58 | np.save(f'{PATH}tmp/lbl_trn.npy', trn_labels) 59 | np.save(f'{PATH}tmp/lbl_val.npy', val_labels) 60 | 61 | trn_joined = [' '.join(o) for o in tok_trn] 62 | mdl_fn = f'{PATH}tmp/{pr_abbr}_joined.txt' 63 | open(mdl_fn, 'w', encoding='utf-8').writelines(trn_joined) 64 | 65 | 66 | if __name__ == '__main__': fire.Fire(create_toks) 67 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/tok2id.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from fastai.text import * 3 | 4 | 5 | def tok2id(prefix, max_vocab=60000, min_freq=1): 6 | print(f'prefix {prefix} max_vocab {max_vocab} min_freq {min_freq}') 7 | PATH=f'data/nlp_clas/{prefix}/' 8 | trn_tok = np.load(f'{PATH}tmp/tok_trn.npy') 9 | val_tok = np.load(f'{PATH}tmp/tok_val.npy') 10 | 11 | freq = Counter(p for o in trn_tok for p in o) 12 | print(freq.most_common(25)) 13 | itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq] 14 | itos.insert(0, '_pad_') 15 | itos.insert(0, '_unk_') 16 | stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)}) 17 | print(len(itos)) 18 | 19 | trn_lm = np.array([[stoi[o] for o in p] for p in trn_tok]) 20 | val_lm = np.array([[stoi[o] for o in p] for p in val_tok]) 21 | 22 | np.save(f'{PATH}tmp/trn_ids.npy', trn_lm) 23 | np.save(f'{PATH}tmp/val_ids.npy', val_lm) 24 | pickle.dump(itos, open(f'{PATH}tmp/itos.pkl', 'wb')) 25 | 26 | if __name__ == '__main__': fire.Fire(tok2id) 27 | 28 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/imdb_scripts/train_tri_wt.py: -------------------------------------------------------------------------------- 1 | import fire 2 | from fastai.learner import * 3 | from fastai.lm_rnn import * 4 | from fastai.rnn_reg import * 5 | from fastai.rnn_train import * 6 | from fastai.text import * 7 | from sampled_sm import * 8 | 9 | 10 | def train_lm(prefix, cuda_id, cl=1, bs=64, backwards=False, lr=3e-4, startat=0, sampled=True, preload=True): 11 | print(f'prefix {prefix}; cuda_id {cuda_id}; cl {cl}; bs {bs}; backwards {backwards} sampled {sampled} ' 12 | f'lr {lr} startat {startat}') 13 | torch.cuda.set_device(cuda_id) 14 | PRE = 'bwd_' if backwards else 'fwd_' 15 | PRE2 = PRE 16 | PRE2 = 'bwd_' 17 | IDS = 'ids' 18 | NLPPATH=Path('data/nlp_clas') 19 | PATH=NLPPATH / prefix 20 | PATH2=NLPPATH / 'wikitext-103_2' 21 | bptt=70 22 | em_sz,nh,nl = 400,1150,3 23 | opt_fn = partial(optim.Adam, betas=(0.8, 0.99)) 24 | 25 | if backwards: 26 | trn_lm = np.load(PATH / f'tmp/trn_{IDS}_bwd.npy') 27 | val_lm = np.load(PATH / f'tmp/val_{IDS}_bwd.npy') 28 | else: 29 | trn_lm = np.load(PATH / f'tmp/trn_{IDS}.npy') 30 | val_lm = np.load(PATH / f'tmp/val_{IDS}.npy') 31 | trn_lm = np.concatenate(trn_lm) 32 | val_lm = np.concatenate(val_lm) 33 | 34 | itos = pickle.load(open(PATH / 'tmp/itos.pkl', 'rb')) 35 | vs = len(itos) 36 | 37 | trn_dl = LanguageModelLoader(trn_lm, bs, bptt) 38 | val_dl = LanguageModelLoader(val_lm, bs//5 if sampled else bs, bptt) 39 | md = LanguageModelData(PATH, 1, vs, trn_dl, val_dl, bs=bs, bptt=bptt) 40 | 41 | tprs = get_prs(trn_lm, vs) 42 | drops = np.array([0.25, 0.1, 0.2, 0.02, 0.15])*0.5 43 | learner,crit = get_learner(drops, 15000, sampled, md, em_sz, nh, nl, opt_fn, tprs) 44 | wd=1e-7 45 | learner.metrics = [accuracy] 46 | 47 | if (startat<1) and preload: 48 | wgts = torch.load(PATH2 / f'models/{PRE2}lm_3.h5', map_location=lambda storage, loc: storage) 49 | ew = to_np(wgts['0.encoder.weight']) 50 | row_m = ew.mean(0) 51 | 52 | itos2 = pickle.load(open(PATH2 / 'tmp/itos.pkl', 'rb')) 53 | stoi2 = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos2)}) 54 | nw = np.zeros((vs, em_sz), dtype=np.float32) 55 | for i,w in enumerate(itos): 56 | r = stoi2[w] 57 | nw[i] = ew[r] if r>=0 else row_m 58 | 59 | wgts['0.encoder.weight'] = T(nw) 60 | wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(nw)) 61 | wgts['1.decoder.weight'] = T(np.copy(nw)) 62 | learner.model.load_state_dict(wgts) 63 | elif startat==1: learner.load(f'{PRE}lm_4') 64 | learner.metrics = [accuracy] 65 | 66 | lrs = np.array([lr/6,lr/3,lr,lr]) 67 | #lrs=lr 68 | 69 | learner.unfreeze() 70 | learner.fit(lrs, 1, wds=wd, use_clr=(32,10), cycle_len=cl) 71 | learner.save(f'{PRE}lm_4') 72 | learner.save_encoder(f'{PRE}lm_4_enc') 73 | 74 | if __name__ == '__main__': fire.Fire(train_lm) 75 | 76 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/lsun_scripts/lsun-data.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import os 5 | from os.path import exists, join 6 | 7 | import lmdb 8 | from tqdm import tqdm 9 | 10 | __author__ = 'Fisher Yu' 11 | __email__ = 'fy@cs.princeton.edu' 12 | __license__ = 'MIT' 13 | # (Minor edits by Jeremy Howard) 14 | 15 | 16 | def export_images(db_path, out_dir, flat=False): 17 | print('Exporting', db_path, 'to', out_dir) 18 | env = lmdb.open(db_path, map_size=1099511627776, 19 | max_readers=100, readonly=True) 20 | with env.begin(write=False) as txn: 21 | cursor = txn.cursor() 22 | for key, val in tqdm(cursor): 23 | key = key.decode() 24 | if not flat: image_out_dir = join(out_dir, '/'.join(key[:3])) 25 | else: image_out_dir = out_dir 26 | if not exists(image_out_dir): os.makedirs(image_out_dir) 27 | image_out_path = join(image_out_dir, key + '.jpg') 28 | with open(image_out_path, 'wb') as fp: fp.write(val) 29 | 30 | 31 | def main(): 32 | parser = argparse.ArgumentParser() 33 | parser.add_argument('lmdb_path', nargs='+', type=str, 34 | help='The path to the lmdb database folder. ' 35 | 'Support multiple database paths.') 36 | parser.add_argument('--out_dir', type=str, default='') 37 | parser.add_argument('--flat', action='store_true', 38 | help='If enabled, the images are imported into output ' 39 | 'directory directly instead of hierarchical ' 40 | 'directories.') 41 | args = parser.parse_args() 42 | lmdb_paths = args.lmdb_path 43 | for lmdb_path in lmdb_paths: export_images(lmdb_path, args.out_dir, args.flat) 44 | 45 | 46 | if __name__ == '__main__': main() 47 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/lsun_scripts/lsun-download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division 5 | 6 | import argparse 7 | import json 8 | import subprocess 9 | from os.path import join 10 | 11 | from six.moves.urllib.request import urlopen 12 | 13 | __author__ = 'Fisher Yu' 14 | __email__ = 'fy@cs.princeton.edu' 15 | __license__ = 'MIT' 16 | 17 | 18 | def list_categories(tag): 19 | url = 'http://lsun.cs.princeton.edu/htbin/list.cgi?tag=' + tag 20 | f = urlopen(url) 21 | return json.loads(f.read()) 22 | 23 | 24 | def download(out_dir, category, set_name, tag): 25 | url = 'http://lsun.cs.princeton.edu/htbin/download.cgi?tag={tag}' \ 26 | '&category={category}&set={set_name}'.format(**locals()) 27 | if set_name == 'test': 28 | out_name = 'test_lmdb.zip' 29 | else: 30 | out_name = '{category}_{set_name}_lmdb.zip'.format(**locals()) 31 | out_path = join(out_dir, out_name) 32 | cmd = ['curl', url, '-o', out_path] 33 | print('Downloading', category, set_name, 'set') 34 | subprocess.call(cmd) 35 | 36 | 37 | def main(): 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument('--tag', type=str, default='latest') 40 | parser.add_argument('-o', '--out_dir', default='') 41 | parser.add_argument('-c', '--category', default=None) 42 | args = parser.parse_args() 43 | 44 | categories = list_categories(args.tag) 45 | if args.category is None: 46 | print('Downloading', len(categories), 'categories') 47 | for category in categories: 48 | download(args.out_dir, category, 'train', args.tag) 49 | download(args.out_dir, category, 'val', args.tag) 50 | download(args.out_dir, '', 'test', args.tag) 51 | else: 52 | if args.category == 'test': 53 | download(args.out_dir, '', 'test', args.tag) 54 | elif args.category not in categories: 55 | print('Error:', args.category, "doesn't exist in", 56 | args.tag, 'LSUN release') 57 | else: 58 | download(args.out_dir, args.category, 'train', args.tag) 59 | download(args.out_dir, args.category, 'val', args.tag) 60 | 61 | 62 | if __name__ == '__main__': 63 | main() 64 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/ppt/lesson8.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/ppt/lesson8.pptx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/dl2/xl/dl-examples.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/dl2/xl/dl-examples.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/excel/naivebayes.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/excel/naivebayes.xlsx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/fastai/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/executors.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | 7 | class LazyThreadPoolExecutor(ThreadPoolExecutor): 8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None): 9 | """ 10 | Collects iterables lazily, rather than immediately. 11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor 12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707 13 | """ 14 | if timeout is not None: end_time = timeout + time.time() 15 | if prefetch is None: prefetch = self._max_workers 16 | if prefetch < 0: raise ValueError("prefetch count may not be negative") 17 | argsiter = zip(*iterables) 18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch)) 19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required. 20 | def result_iterator(): 21 | nonlocal argsiter 22 | try: 23 | while fs: 24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time()) 25 | # Got a result, future needn't be cancelled 26 | del fs[0] 27 | # Dispatch next task before yielding to keep pipeline full 28 | if argsiter: 29 | try: 30 | args = next(argsiter) 31 | except StopIteration: 32 | argsiter = None 33 | else: 34 | fs.append(self.submit(fn, *args)) 35 | yield res 36 | finally: 37 | for future in fs: future.cancel() 38 | return result_iterator() -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/fp16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FP16(nn.Module): 6 | def __init__(self, module): 7 | super(FP16, self).__init__() 8 | self.module = batchnorm_to_fp32(module.half()) 9 | 10 | def forward(self, input): 11 | return self.module(input.half()) 12 | 13 | def load_state_dict(self, *inputs, **kwargs): 14 | self.module.load_state_dict(*inputs, **kwargs) 15 | 16 | def state_dict(self, *inputs, **kwargs): 17 | return self.module.state_dict(*inputs, **kwargs) 18 | 19 | def batchnorm_to_fp32(module): 20 | ''' 21 | BatchNorm layers to have parameters in single precision. 22 | Find all layers and convert them back to float. This can't 23 | be done with built in .apply as that function will apply 24 | fn to all modules, parameters, and buffers. Thus we wouldn't 25 | be able to guard the float conversion based on the module type. 26 | ''' 27 | if isinstance(module, nn.modules.batchnorm._BatchNorm): 28 | module.float() 29 | for child in module.children(): 30 | batchnorm_to_fp32(child) 31 | return module 32 | 33 | def copy_model_to_fp32(m, optim): 34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters 35 | """ 36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()] 37 | optim_groups = [group['params'] for group in optim.param_groups] 38 | iter_fp32_params = iter(fp32_params) 39 | for group_params in optim_groups: 40 | for i in range(len(group_params)): 41 | fp32_param = next(iter_fp32_params) 42 | fp32_param.requires_grad = group_params[i].requires_grad 43 | group_params[i] = fp32_param 44 | return fp32_params 45 | 46 | def copy_fp32_to_model(m, fp32_params): 47 | m_params = list(m.parameters()) 48 | for fp32_param, m_param in zip(fp32_params, m_params): 49 | m_param.data.copy_(fp32_param.data) 50 | 51 | def update_fp32_grads(fp32_params, m): 52 | m_params = list(m.parameters()) 53 | for fp32_param, m_param in zip(fp32_params, m_params): 54 | if fp32_param.grad is None: 55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size())) 56 | fp32_param.grad.data.copy_(m_param.grad.data) 57 | 58 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/images/industrial_fishing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/fastai/images/industrial_fishing.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/imports.py: -------------------------------------------------------------------------------- 1 | import PIL, numpy as np, collections, bcolz, random, cv2 2 | import pandas as pd, pickle, string, sys, re, time, copy 3 | import matplotlib 4 | from pathlib import Path 5 | 6 | matplotlib.rc('animation', html='html5') 7 | np.set_printoptions(precision=5, linewidth=110, suppress=True) 8 | 9 | from ipykernel.kernelapp import IPKernelApp 10 | def in_notebook(): return IPKernelApp.initialized() 11 | 12 | def in_ipynb(): 13 | try: 14 | cls = get_ipython().__class__.__name__ 15 | return cls == 'ZMQInteractiveShell' 16 | except NameError: 17 | return False 18 | 19 | import tqdm as tq 20 | 21 | 22 | def clear_tqdm(): 23 | inst = getattr(tq.tqdm, '_instances', None) 24 | if not inst: return 25 | try: 26 | for i in range(len(inst)): inst.pop().close() 27 | except Exception: 28 | pass 29 | 30 | if in_notebook(): 31 | def tqdm(*args, **kwargs): 32 | clear_tqdm() 33 | return tq.tqdm(*args, file=sys.stdout, **kwargs) 34 | def trange(*args, **kwargs): 35 | clear_tqdm() 36 | return tq.trange(*args, file=sys.stdout, **kwargs) 37 | else: 38 | from tqdm import tqdm, trange 39 | tnrange=trange 40 | tqdm_notebook=tqdm 41 | 42 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/initializers.py: -------------------------------------------------------------------------------- 1 | def cond_init(m, init_fn): 2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)): 3 | if hasattr(m, 'weight'): init_fn(m.weight) 4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.) 5 | 6 | def apply_init(m, init_fn): 7 | m.apply(lambda x: cond_init(x, init_fn)) 8 | 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/io.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlretrieve 2 | 3 | from tqdm import tqdm 4 | 5 | 6 | class TqdmUpTo(tqdm): 7 | def update_to(self, b=1, bsize=1, tsize=None): 8 | if tsize is not None: self.total = tsize 9 | self.update(b * bsize - self.n) 10 | 11 | def get_data(url, filename): 12 | if not os.path.exists(filename): 13 | 14 | dirname = os.path.dirname(filename) 15 | if not os.path.exists(dirname): 16 | os.makedirs(dirname) 17 | 18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 19 | urlretrieve(url, filename, reporthook=t.update_to) 20 | 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/layer_optimizer.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | 3 | def opt_params(parm, lr, wd): 4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd} 5 | 6 | class LayerOptimizer(): 7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None): 8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups] 9 | if not isinstance(lrs, Iterable): lrs=[lrs] 10 | if len(lrs)==1: lrs=lrs*len(layer_groups) 11 | if wds is None: wds=0. 12 | if not isinstance(wds, Iterable): wds=[wds] 13 | if len(wds)==1: wds=wds*len(layer_groups) 14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds 15 | self.opt = opt_fn(self.opt_params()) 16 | 17 | def opt_params(self): 18 | assert(len(self.layer_groups) == len(self.lrs)) 19 | assert(len(self.layer_groups) == len(self.wds)) 20 | params = list(zip(self.layer_groups,self.lrs,self.wds)) 21 | return [opt_params(*p) for p in params] 22 | 23 | @property 24 | def lr(self): return self.lrs[-1] 25 | 26 | @property 27 | def mom(self): 28 | if 'betas' in self.opt.param_groups[0]: 29 | return self.opt.param_groups[0]['betas'][0] 30 | else: 31 | return self.opt.param_groups[0]['momentum'] 32 | 33 | def set_lrs(self, lrs): 34 | if not isinstance(lrs, Iterable): lrs=[lrs] 35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups) 36 | set_lrs(self.opt, lrs) 37 | self.lrs=lrs 38 | 39 | def set_wds(self, wds): 40 | if not isinstance(wds, Iterable): wds=[wds] 41 | if len(wds)==1: wds=wds*len(self.layer_groups) 42 | set_wds(self.opt, wds) 43 | self.wds=wds 44 | 45 | def set_mom(self,momentum): 46 | if 'betas' in self.opt.param_groups[0]: 47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1]) 48 | else: 49 | for pg in self.opt.param_groups: pg['momentum'] = momentum 50 | 51 | def set_beta(self,beta): 52 | if 'betas' in self.opt.param_groups[0]: 53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta) 54 | elif 'alpha' in self.opt.param_groups[0]: 55 | for pg in self.opt.param_groups: pg['alpha'] = beta 56 | 57 | def set_opt_fn(self, opt_fn): 58 | if type(self.opt) != type(opt_fn(self.opt_params())): 59 | self.opt = opt_fn(self.opt_params()) 60 | 61 | def zip_strict_(l, r): 62 | assert(len(l) == len(r)) 63 | return zip(l, r) 64 | 65 | def set_lrs(opt, lrs): 66 | if not isinstance(lrs, Iterable): lrs=[lrs] 67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups) 68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr 69 | 70 | def set_wds(opt, wds): 71 | if not isinstance(wds, Iterable): wds=[wds] 72 | if len(wds)==1: wds=wds*len(opt.param_groups) 73 | assert(len(opt.param_groups) == len(wds)) 74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd 75 | 76 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/layers.py: -------------------------------------------------------------------------------- 1 | class AdaptiveConcatPool2d(nn.Module): 2 | def __init__(self, sz=None): 3 | super().__init__() 4 | sz = sz or (1,1) 5 | self.ap = nn.AdaptiveAvgPool2d(sz) 6 | self.mp = nn.AdaptiveMaxPool2d(sz) 7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1) 8 | 9 | class Lambda(nn.Module): 10 | def __init__(self, f): super().__init__(); self.f=f 11 | def forward(self, x): return self.f(x) 12 | 13 | class Flatten(nn.Module): 14 | def __init__(self): super().__init__() 15 | def forward(self, x): return x.view(x.size(0), -1) 16 | 17 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/losses.py: -------------------------------------------------------------------------------- 1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9): 2 | y_pred = (y_pred.float() > threshold).float() 3 | y_true = y_true.float() 4 | tp = (y_pred * y_true).sum(dim=1) 5 | precision = tp / (y_pred.sum(dim=1)+eps) 6 | recall = tp / (y_true.sum(dim=1)+eps) 7 | return torch.mean( 8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2)) 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/metrics.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | def accuracy_np(preds, targs): 5 | preds = np.argmax(preds, 1) 6 | return (preds==targs).mean() 7 | 8 | def accuracy(preds, targs): 9 | preds = torch.max(preds, dim=1)[1] 10 | return (preds==targs).float().mean() 11 | 12 | def accuracy_thresh(thresh): 13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh) 14 | 15 | def accuracy_multi(preds, targs, thresh): 16 | return ((preds>thresh).float()==targs).float().mean() 17 | 18 | def accuracy_multi_np(preds, targs, thresh): 19 | return ((preds>thresh)==targs).mean() 20 | 21 | def recall(preds, targs, thresh=0.5): 22 | pred_pos = preds > thresh 23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 24 | return tpos.sum()/targs.sum() 25 | 26 | def precision(preds, targs, thresh=0.5): 27 | pred_pos = preds > thresh 28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 29 | return tpos.sum()/pred_pos.sum() 30 | 31 | def fbeta(preds, targs, beta, thresh=0.5): 32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall). 33 | This is the micro averaged version where the true positives, false negatives and 34 | false positives are calculated globally (as opposed to on a per label basis). 35 | 36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and 37 | beta > 1 favors recall. 38 | """ 39 | assert beta > 0, 'beta needs to be greater than 0' 40 | beta2 = beta ** 2 41 | rec = recall(preds, targs, thresh) 42 | prec = precision(preds, targs, thresh) 43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec) 44 | 45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh) 46 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.tar 3 | checkpoint* 4 | log* 5 | wgts/ 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/cifar10/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py --lr=0.1 4 | python main.py --resume --lr=0.01 5 | python main.py --resume --lr=0.001 6 | 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/cifar10/wideresnet.py: -------------------------------------------------------------------------------- 1 | # Cifar10 Wideresnet for Dawn Submission 2 | 3 | from ...layers import * 4 | 5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False) 6 | 7 | def bn(ni, init_zero=False): 8 | m = nn.BatchNorm2d(ni) 9 | m.weight.data.fill_(0 if init_zero else 1) 10 | m.bias.data.zero_() 11 | return m 12 | 13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False): 14 | bn_initzero = bn(ni, init_zero=init_zero) 15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride)) 16 | 17 | def noop(x): return x 18 | 19 | class BasicBlock(nn.Module): 20 | def __init__(self, ni, nf, stride, drop_p=0.0): 21 | super().__init__() 22 | self.bn = nn.BatchNorm2d(ni) 23 | self.conv1 = conv_2d(ni, nf, 3, stride) 24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1) 25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None 26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop 27 | 28 | def forward(self, x): 29 | x2 = F.relu(self.bn(x), inplace=True) 30 | r = self.shortcut(x2) 31 | x = self.conv1(x2) 32 | if self.drop: x = self.drop(x) 33 | x = self.conv2(x) * 0.2 34 | return x.add_(r) 35 | 36 | 37 | def _make_group(N, ni, nf, block, stride, drop_p): 38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)] 39 | 40 | class WideResNet(nn.Module): 41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16): 42 | super().__init__() 43 | n_channels = [start_nf] 44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k) 45 | 46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1 47 | for i in range(num_groups): 48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p) 49 | 50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), 51 | Flatten(), nn.Linear(n_channels[3], num_classes)] 52 | self.features = nn.Sequential(*layers) 53 | 54 | def forward(self, x): return self.features(x) 55 | 56 | 57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.) 58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.) 59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.) 60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2) 61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.) 62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.) 63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2) 64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2) 65 | 66 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/models/darknet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layers import * 4 | from .layers import * 5 | 6 | 7 | class ConvBN(nn.Module): 8 | "convolutional layer then batchnorm" 9 | 10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) 13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) 14 | self.relu = nn.LeakyReLU(0.1, inplace=True) 15 | 16 | def forward(self, x): return self.relu(self.bn(self.conv(x))) 17 | 18 | class DarknetBlock(nn.Module): 19 | def __init__(self, ch_in): 20 | super().__init__() 21 | ch_hid = ch_in//2 22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) 23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, x): return self.conv2(self.conv1(x)) + x 26 | 27 | class Darknet(nn.Module): 28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)" 29 | 30 | def make_group_layer(self, ch_in, num_blocks, stride=1): 31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)] 32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2)) 33 | return layers 34 | 35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32): 36 | super().__init__() 37 | nf = start_nf 38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)] 39 | for i,nb in enumerate(num_blocks): 40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2)) 41 | nf *= 2 42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] 43 | self.layers = nn.Sequential(*layers) 44 | 45 | def forward(self, x): return self.layers(x) 46 | 47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes) 48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes) 49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24) 50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16) 51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes) 52 | 53 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/rnn_train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/set_spawn.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import set_start_method 2 | set_start_method('spawn') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/fastai/transforms_pil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h) 30 | x = np.random.randint(w) 31 | 32 | y1 = np.clip(y - self.length / 2, 0, h) 33 | y2 = np.clip(y + self.length / 2, 0, h) 34 | x1 = np.clip(x - self.length / 2, 0, w) 35 | x2 = np.clip(x + self.length / 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/bulldozers_data2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/digit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/digit.gif -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/ethics_recidivism.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/ethics_recidivism.jpg -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/mnist.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/overfitting2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/overfitting2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/sgd2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/sgd2.gif -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/what_is_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/what_is_pytorch.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler1.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler3.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/images/zeiler4.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/ppt/2017-12-ethics.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/ppt/2017-12-ethics.pptx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/courses/ml1/ppt/ml_applications.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/courses/ml1/ppt/ml_applications.pptx -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/README.md: -------------------------------------------------------------------------------- 1 | # fastai doc project 2 | 3 | The fastai doc project is just getting underway! So now is a great time to get involved. Here are some thoughts and guidelines to help you get oriented... 4 | 5 | ## Project goals and approach 6 | 7 | The idea of this project is to create documentation that makes readers say "wow that's the most fantastic documentation I've ever read". So... no pressure. :) How do we do this? By taking the philosophies demonstrated in fast.ai's courses and bringing them to the world of documentation. Here are a few guidelines to consider: 8 | 9 | - Assume the reader is intelligent and interested 10 | - Don't assume the reader has any specific knowledge about the field you're documenting 11 | - If you need the reader to have some knowledge to understand your documentation, and there is some effective external resource they can learn from, point them there rather than trying to do it all yourself 12 | - Use code to describe what's going on where possible, not math 13 | - Create a notebook demonstrating the ideas you're documenting (include the notebook in this repo) and show examples from the notebook directly in your docs 14 | - Use a top-down approach; that is, first explain what problem the code is meant to solve, and at a high level how it solves it, and then go deeper into the details once those concepts are well understood 15 | - For common tasks, show full end-to-end examples of how to complete the task. 16 | 17 | Use pictures, tables, analogies, and other explanatory devices (even embedded video!) wherever they can help the reader understand. Use hyperlinks liberally, both within these docs and to external resources. 18 | 19 | We don't want this detailed documentation to create clutter in the code, and we also don't want to overwhelm the user when they just want a quick summary of what a method does. Therefore, docstrings should generally be limited to a single line. The python standard library is documented this way--for instance, the docstring for `re.compile()` is the single line "*Compile a regular expression pattern, returning a pattern object.*" But the full documentation of the `re` library on the python web site goes into detail about this method, how it's used, and its relation to other parts of the library. 20 | 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/docs/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/dataloader.adoc: -------------------------------------------------------------------------------- 1 | = fastai.dataloader 2 | 3 | == Introduction and overview 4 | 5 | *Note:* the fastai DataLoader has a similar API to the PyTorch DataLoader. Please see http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader[the PyTorch documentation] for usage and details. The documentation presented here focuses on the differences between the two classes. 6 | 7 | == {{class DataLoader,dataset,batch_size=1,shuffle=False,sampler=None,batch_sampler=None,pad_idx=0,num_workers=None,pin_memory=False,drop_last=False,pre_pad=True,half=False,transpose=False,transpose_y=False}} 8 | 9 | .Used to iterate through a dataset to pass data into a model for training. 10 | 11 | === {{arguments}} 12 | 13 | For information on arguments with no descriptions, please see http://pytorch.org/docs/master/data.html#torch.utils.data.DataLoader[the PyTorch documentation] 14 | 15 | {{arg dataset,Dataset}} 16 | 17 | {{arg batch_size,int,1}} 18 | 19 | {{arg shuffle,bool,False}} 20 | 21 | {{arg sampler,Sampler,None}} 22 | 23 | {{arg batch_sampler,BatchSampler,None}} 24 | 25 | {{arg pad_idx,int,0}} 26 | A padding index representing how many zeros to add to each batch. See: `pre_pad` 27 | 28 | {{arg num_workers,int,None}} 29 | Allows the user to manually set the number of workers. If left as `None`, it will default to the number of CPU cores the system has. If > 0, the dataloader will create `num_workers` number of jobs using `concurrent.futures.ThreadPoolExecutor`. 30 | 31 | {{arg pin_memory,bool,False}} 32 | 33 | {{arg drop_last,bool,False}} 34 | 35 | {{arg pre_pad,bool,True}} 36 | If `pad_idx` is non-zero, this determines if the zeros should go at the beginning of the batch, or at the end. By default, the zeros are added at the beginning of the batch. 37 | 38 | {{arg half,bool,False}} 39 | If `True`, `torch.cuda.HalfTensor()` will be used instead of `torch.FloatTensor()`. 40 | 41 | {{arg transpose,bool,False}} 42 | If `True`, each batch will have its inputs transposed. 43 | 44 | {{arg transpose_y,bool,False}} 45 | If `True`, each batch will have its outputs (labels) transposed. 46 | 47 | === {{methods}} 48 | 49 | {{method jag_stack,b}} 50 | 51 | Helper method for `np_collate()`. Returns a np.array of the batch passed in, with zeros added for any shorter rows inside the batch, plus extra zeros if `self.pad_idx > 0`. If all items inside the batch are the same length, no zero padding is added. 52 | 53 | {{method np_collate,batch}} 54 | 55 | Helper method for `get_batch()`. Based on the input data type, it creates an appropriate np.array, list, or dict. If the method is passed a string or list of strings, it simply returns the parameter without modification. Batches must contain numbers, strings, dicts, or lists, and this method also ensures this is the case. 56 | 57 | {{method get_batch,indices}} 58 | 59 | Helper method for `__iter__()`. When an iterator of the dataloader object is created, `get_batch()` is used to retrieve items from the dataset and apply transposes if needed based on `self.transpose` and `self.transpose_y`. 60 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/md_expander.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | 5 | def expand(filename): 6 | 7 | f = open(filename, "r") 8 | contents = f.read() 9 | 10 | regex_inside = r"\{\{(.*?)\}\}" 11 | regex_outside = r"(^|\}\})(.*?)(\{\{|$)" 12 | 13 | within = re.finditer(regex_inside, contents, re.MULTILINE | re.DOTALL) 14 | outside = re.finditer(regex_outside, contents, re.MULTILINE | re.DOTALL) 15 | 16 | for matchNum, match in enumerate(within): 17 | for groupNum in range(0, len(match.groups())): 18 | group = match.group(1) 19 | if group.startswith("class"): 20 | classname = re.search(r" (.*?),", group).groups()[0] 21 | params = re.search(r",(.*)", group).groups()[0] 22 | print('

Class: ' + classname + '(' + params + '

') 23 | 24 | print (match.group(1)) 25 | 26 | # split = re.split(regex_inside, contents) 27 | # 28 | # for i, item in enumerate(split): 29 | 30 | 31 | 32 | if __name__ == '__main__': 33 | 34 | expand(sys.argv[1]) 35 | 36 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/module-decisions.md: -------------------------------------------------------------------------------- 1 | # Module Decisions 2 | 3 | ## Introduction 4 | 5 | There are many ways of doing one thing in programming. Instead of getting into debates about the one right way of doing things, in `fastai` library we would like to make decisions and then stick with them. This page is to list down any such decisions made. 6 | 7 | ### Image Data 8 | - Coordinates 9 | - Computer vision uses coordinates in format `(x, y)`. e.g. PIL 10 | - Maths uses `(y, x)`. e.g. Numpy, PyTorch 11 | - `fastai` will use `(y, x)` 12 | - Bounding Boxes 13 | - Will use `(coordinates top right, coordinates bottom right)` instead of `(coordinates top right, (height, width))` 14 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/templates.py: -------------------------------------------------------------------------------- 1 | HEADER = ''' 2 | = fastai.{} 3 | 4 | == Introduction and overview 5 | 6 | ``` 7 | ...example... 8 | ``` 9 | 10 | 11 | ''' -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/testing.adoc: -------------------------------------------------------------------------------- 1 | = Notes on fastai testing style 2 | 3 | We chose pytest as a framework since it's more modern, concise, and https://www.slant.co/topics/2621/~python-unit-testing-frameworks[recommended by coders]. 4 | 5 | We also try to follow this suggestion from the http://docs.python-guide.org/en/latest/writing/tests/[python testing guide]: 6 | 7 | ____ 8 | Use long and descriptive names for testing functions. The style guide here is slightly different than that of running code, where short names are often preferred. The reason is testing functions are never called explicitly. square() or even sqr() is ok in running code, but in testing code you would have names such as test_square_of_number_2(), test_square_negative_number(). These function names are displayed when a test fails, and should be as descriptive as possible. 9 | ____ 10 | 11 | More generally, aim to write tests that also explain the code they are testing. A really good test suite can also serve as really good documentation. 12 | 13 | == Testing patterns 14 | 15 | * Do not use mock or fake objects. The library is nice enough that real versions of required objects can be used without prohibitive overhead. 16 | * Keep test methods small and tidy, just like any other code. 17 | * Aim to add a regression test as part of any bug fix PR. 18 | * Add tests before refactoring, so they can help prove correctness. 19 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/transforms-tmpl.adoc: -------------------------------------------------------------------------------- 1 | = fastai.transforms 2 | 3 | == Introduction and overview 4 | 5 | The fastai transforms pipeline for images is designed to convert your independent and dependent variables into a form ready to be batched by your DataLoader and passed to your model. It is most commonly used like this: 6 | 7 | 8 | ``` 9 | ...example... 10 | ``` 11 | 12 | The most common types of transforms are predefined in ... 13 | 14 | The most likely customizations you might need to do are ... 15 | 16 | You can create custom transform pipelines using an approach like: ... 17 | 18 | If you want to create a custom transform, you will need to : ... 19 | 20 | == {{class Transform,tfm_y=TfmType.NO}} 21 | 22 | .Abstract parent for all transforms. 23 | 24 | Override do_transform to implement transformation of a single object. 25 | 26 | === {{arguments}} 27 | 28 | {{arg tfm_y,TfmType,TfmType.NO}} 29 | Type of transform. For details, see {{xref TfmType}}. 30 | 31 | === {{methods}} 32 | 33 | {{method set_state,}} 34 | 35 | A transform may include a random component. If it does, it will often need to transform `y` using the same random values as `x` (e.g. a horizontal flip in segmentation must be applied to the mask as well). Therefore, this method is used to ensure all random state is calculated in one place. 36 | 37 | **NB:** Transformations are often run in multiple threads. Therefore any state must be stored in thread local storage. The `Transform` class provide a thread local `store` attribute for you to use. See {{xref RandomFlip}} for an example of how to use random state safely in `Transform` subclasses. 38 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/docs/transforms.adoc: -------------------------------------------------------------------------------- 1 | = fastai.transforms 2 | Jeremy Howard and contributors 3 | :toc: 4 | 5 | == Introduction and overview 6 | 7 | The fastai transforms pipeline for images is designed to convert your independent and dependent variables into a form ready to be batched by your DataLoader and passed to your model. It is most commonly used like this: 8 | 9 | 10 | ``` 11 | ...example... 12 | ``` 13 | 14 | The most common types of transforms are predefined in ... 15 | 16 | The most likely customizations you might need to do are ... 17 | 18 | You can create custom transform pipelines using an approach like: ... 19 | 20 | If you want to create a custom transform, you will need to : ... 21 | 22 | [[Transform]] 23 | == Class Transform [.small]#(tfm_y=TfmType.NO)# 24 | 25 | .Abstract parent for all transforms. 26 | 27 | Override do_transform to implement transformation of a single object. 28 | 29 | === Arguments 30 | 31 | tfm_y (type TfmType, default TfmType.NO):: 32 | Type of transform. For details, see xref:TfmType[TfmType] 33 | 34 | === Methods 35 | 36 | set_state:: 37 | A transform may include a random component. If it does, it will often need to transform `y` using the same random values as `x` (e.g. a horizontal flip in segmentation must be applied to the mask as well). Therefore, this method is used to ensure all random state is calculated in one place. 38 | + 39 | **NB:** Transformations are often run in multiple threads. Therefore any state must be stored in thread local storage. The `Transform` class provide a thread local `store` attribute for you to use. See {{xref RandomFlip}} for an example of how to use random state safely in `Transform` subclasses. 40 | 41 | [[TfmType]] 42 | == Class TfmType:IntEnum 43 | 44 | .Type of transformation. 45 | 46 | NO:: the default, y does not get transformed when x is transformed. 47 | PIXEL:: x and y are images and should be transformed in the same way. _E.g.: image segmentation._ 48 | COORD:: y are coordinates (i.e bounding boxes) 49 | CLASS:: y are class labels (same behaviour as PIXEL, except no normalization) 50 | 51 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/environment-cpu.yml: -------------------------------------------------------------------------------- 1 | name: fastai-cpu 2 | channels: 3 | - fastai 4 | - pytorch 5 | - defaults 6 | - peterjc123 7 | dependencies: 8 | - scipy 9 | - numpy 10 | - pillow 11 | - jpeg 12 | - spacy 13 | - zlib 14 | - freetype 15 | - libtiff 16 | - bleach 17 | - certifi 18 | - cffi 19 | - cycler 20 | - decorator 21 | - entrypoints 22 | - expat 23 | #- fontconfig 24 | #- glib 25 | - html5lib 26 | - icu 27 | - ipykernel 28 | - ipython 29 | - ipython_genutils 30 | - ipywidgets 31 | #- jbig 32 | - jedi 33 | - jinja2 34 | - jsonschema 35 | - jupyter 36 | - jupyter_client 37 | - jupyter_console 38 | - jupyter_core 39 | - conda-forge::jupyter_contrib_nbextensions 40 | #- libffi 41 | #- libgcc 42 | #- libgfortran 43 | - libiconv 44 | - libpng 45 | - libsodium 46 | - libxml2 47 | - markupsafe 48 | - matplotlib 49 | - mistune 50 | - mkl 51 | - nbconvert 52 | - nbformat 53 | - notebook 54 | - numpy 55 | - olefile 56 | - openssl 57 | - pandas 58 | - pandocfilters 59 | - path.py 60 | - patsy 61 | - pcre 62 | - pexpect 63 | - pickleshare 64 | - pillow 65 | - pip 66 | - prompt_toolkit 67 | #- ptyprocess 68 | - pycparser 69 | - pygments 70 | - pyparsing 71 | - pyqt 72 | - python>=3.6.0 73 | - python-dateutil 74 | - pytz 75 | - pyzmq 76 | - qt 77 | - qtconsole 78 | #- readline 79 | - scipy 80 | - seaborn 81 | - setuptools 82 | - simplegeneric 83 | - sip 84 | - six 85 | - sqlite 86 | - statsmodels 87 | #- terminado 88 | - testpath 89 | - tk 90 | - tornado<5 91 | - tqdm 92 | - traitlets 93 | - wcwidth 94 | - wheel 95 | - widgetsnbextension 96 | - xz 97 | - zeromq 98 | - pytorch<0.4 99 | - bcolz 100 | - prompt_toolkit 101 | - pytest 102 | - pip: 103 | - torchvision>=0.1.9 104 | - opencv-python 105 | - isoweek 106 | - pandas_summary 107 | - torchtext 108 | - graphviz 109 | - sklearn_pandas 110 | - feather-format 111 | - plotnine 112 | - kaggle-cli 113 | - ipywidgets 114 | # - git+https://github.com/SauceCat/PDPbox.git 115 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/environment-nopytorch.yml: -------------------------------------------------------------------------------- 1 | name: fastai 2 | channels: 3 | - fastai 4 | #- pytorch 5 | - defaults 6 | - peterjc123 7 | dependencies: 8 | - scipy 9 | #- cuda90 10 | #- cudnn 11 | - numpy 12 | - pillow 13 | - jpeg 14 | - spacy 15 | - zlib 16 | - freetype 17 | - libtiff 18 | - bleach 19 | - certifi 20 | - cffi 21 | - cycler 22 | - decorator 23 | - entrypoints 24 | - expat 25 | #- fontconfig 26 | #- glib 27 | - html5lib 28 | - icu 29 | - ipykernel 30 | - ipython 31 | - ipython_genutils 32 | - ipywidgets 33 | #- jbig 34 | - jedi 35 | - jinja2 36 | - jsonschema 37 | - jupyter 38 | - jupyter_client 39 | - jupyter_console 40 | - jupyter_core 41 | - conda-forge::jupyter_contrib_nbextensions 42 | #- libffi 43 | #- libgcc 44 | #- libgfortran 45 | - libiconv 46 | - libpng 47 | - libsodium 48 | - libxml2 49 | - markupsafe 50 | - matplotlib 51 | - mistune 52 | - mkl 53 | - nbconvert 54 | - nbformat 55 | - notebook 56 | - numpy 57 | - olefile 58 | - openssl 59 | - pandas 60 | - pandocfilters 61 | - path.py 62 | - patsy 63 | - pcre 64 | - pexpect 65 | - pickleshare 66 | - pillow 67 | - pip 68 | - prompt_toolkit 69 | #- ptyprocess 70 | - pycparser 71 | - pygments 72 | - pyparsing 73 | - pyqt 74 | - python>=3.6.0 75 | - python-dateutil 76 | - pytz 77 | - pyzmq 78 | - qt 79 | - qtconsole 80 | #- readline 81 | - scipy 82 | - seaborn 83 | - setuptools 84 | - simplegeneric 85 | - sip 86 | - six 87 | - sqlite 88 | - statsmodels 89 | #- terminado 90 | - testpath 91 | - tk 92 | - tornado<5 93 | - tqdm 94 | - traitlets 95 | - wcwidth 96 | - wheel 97 | - widgetsnbextension 98 | - xz 99 | - zeromq 100 | #- pytorch<0.4 101 | - bcolz 102 | - prompt_toolkit 103 | - pip: 104 | #- torchvision>=0.1.9 105 | - opencv-python 106 | - isoweek 107 | - pandas_summary 108 | #- torchtext 109 | - graphviz 110 | - sklearn_pandas 111 | - feather-format 112 | - plotnine 113 | - awscli 114 | - kaggle-cli 115 | - ipywidgets 116 | #- git+https://github.com/SauceCat/PDPbox.git 117 | 118 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/environment-old.yml: -------------------------------------------------------------------------------- 1 | name: fastai 2 | channels: 3 | - fastai 4 | - pytorch 5 | - defaults 6 | dependencies: 7 | - scipy 8 | - cuda90 9 | - numpy 10 | - pillow 11 | - jpeg 12 | - spacy 13 | - zlib 14 | - freetype 15 | - libtiff 16 | - bleach 17 | - certifi 18 | - cffi 19 | - cycler 20 | - decorator 21 | - entrypoints 22 | - expat 23 | - fontconfig 24 | - glib 25 | - html5lib 26 | - icu 27 | - ipykernel 28 | - ipython 29 | - ipython_genutils 30 | - ipywidgets 31 | - jbig 32 | - jedi 33 | - jinja2 34 | - jsonschema 35 | - jupyter 36 | - jupyter_client 37 | - jupyter_console 38 | - jupyter_core 39 | - conda-forge::jupyter_contrib_nbextensions 40 | - libffi 41 | - libgcc 42 | - libgfortran 43 | - libiconv 44 | - libpng 45 | - libsodium 46 | - libxml2 47 | - markupsafe 48 | - matplotlib 49 | - mistune 50 | - mkl 51 | - nbconvert 52 | - nbformat 53 | - notebook 54 | - numpy 55 | - olefile 56 | - openssl 57 | - pandas 58 | - pandocfilters 59 | - path.py 60 | - patsy 61 | - pcre 62 | - pexpect 63 | - pickleshare 64 | - pillow 65 | - pip 66 | - prompt_toolkit 67 | - ptyprocess 68 | - pycparser 69 | - pygments 70 | - pyparsing 71 | - pyqt 72 | - python>=3.6.0 73 | - python-dateutil 74 | - pytz 75 | - pyzmq 76 | - qt 77 | - qtconsole 78 | - readline 79 | - scipy 80 | - seaborn 81 | - setuptools 82 | - simplegeneric 83 | - sip 84 | - six 85 | - sqlite 86 | - statsmodels 87 | - terminado 88 | - testpath 89 | - tk 90 | - tornado<5 91 | - tqdm 92 | - traitlets 93 | - wcwidth 94 | - wheel 95 | - widgetsnbextension 96 | - xz 97 | - zeromq 98 | - pytorch>=0.2.0 99 | - torchvision>=0.1.9 100 | - bcolz 101 | - prompt_toolkit 102 | - pip: 103 | - opencv-python 104 | - isoweek 105 | - pandas_summary 106 | - torchtext 107 | - graphviz 108 | - sklearn_pandas 109 | - feather-format 110 | - plotnine 111 | - awscli 112 | - kaggle-cli 113 | - ipywidgets 114 | - git+https://github.com/SauceCat/PDPbox.git 115 | 116 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/environment.yml: -------------------------------------------------------------------------------- 1 | name: fastai 2 | channels: 3 | - fastai 4 | - pytorch 5 | - defaults 6 | - peterjc123 7 | dependencies: 8 | - scipy 9 | - cuda90 10 | - cudnn 11 | - numpy 12 | - pillow 13 | - jpeg 14 | - spacy 15 | - zlib 16 | - freetype 17 | - libtiff 18 | - bleach 19 | - certifi 20 | - cffi 21 | - cycler 22 | - decorator 23 | - entrypoints 24 | - expat 25 | #- fontconfig 26 | #- glib 27 | - html5lib 28 | - icu 29 | - ipykernel 30 | - ipython 31 | - ipython_genutils 32 | - ipywidgets 33 | #- jbig 34 | - jedi 35 | - jinja2 36 | - jsonschema 37 | - jupyter 38 | - jupyter_client 39 | - jupyter_console 40 | - jupyter_core 41 | - conda-forge::jupyter_contrib_nbextensions 42 | #- libffi 43 | #- libgcc 44 | #- libgfortran 45 | - libiconv 46 | - libpng 47 | - libsodium 48 | - libxml2 49 | - markupsafe 50 | - matplotlib 51 | - mistune 52 | - mkl 53 | - nbconvert 54 | - nbformat 55 | - notebook 56 | - numpy 57 | - olefile 58 | - openssl 59 | - pandas 60 | - pandocfilters 61 | - path.py 62 | - patsy 63 | - pcre 64 | - pexpect 65 | - pickleshare 66 | - pillow 67 | - pip 68 | - prompt_toolkit 69 | #- ptyprocess 70 | - pycparser 71 | - pygments 72 | - pyparsing 73 | - pyqt 74 | - python>=3.6.0 75 | - python-dateutil 76 | - pytz 77 | - pyzmq 78 | - qt 79 | - qtconsole 80 | #- readline 81 | - scipy 82 | - seaborn 83 | - setuptools 84 | - simplegeneric 85 | - sip 86 | - six 87 | - sqlite 88 | - statsmodels 89 | #- terminado 90 | - testpath 91 | - tk 92 | - tornado<5 93 | - tqdm 94 | - traitlets 95 | - wcwidth 96 | - wheel 97 | - widgetsnbextension 98 | - xz 99 | - zeromq 100 | - pytorch<0.4 101 | - bcolz 102 | - prompt_toolkit 103 | - pytest 104 | - pip: 105 | - torchvision>=0.1.9 106 | - opencv-python 107 | - isoweek 108 | - pandas_summary 109 | - torchtext 110 | - graphviz 111 | - sklearn_pandas 112 | - feather-format 113 | - plotnine 114 | - kaggle-cli 115 | - ipywidgets 116 | #- git+https://github.com/SauceCat/PDPbox.git 117 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/fastai/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/executors.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | 7 | class LazyThreadPoolExecutor(ThreadPoolExecutor): 8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None): 9 | """ 10 | Collects iterables lazily, rather than immediately. 11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor 12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707 13 | """ 14 | if timeout is not None: end_time = timeout + time.time() 15 | if prefetch is None: prefetch = self._max_workers 16 | if prefetch < 0: raise ValueError("prefetch count may not be negative") 17 | argsiter = zip(*iterables) 18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch)) 19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required. 20 | def result_iterator(): 21 | nonlocal argsiter 22 | try: 23 | while fs: 24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time()) 25 | # Got a result, future needn't be cancelled 26 | del fs[0] 27 | # Dispatch next task before yielding to keep pipeline full 28 | if argsiter: 29 | try: 30 | args = next(argsiter) 31 | except StopIteration: 32 | argsiter = None 33 | else: 34 | fs.append(self.submit(fn, *args)) 35 | yield res 36 | finally: 37 | for future in fs: future.cancel() 38 | return result_iterator() -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/fp16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FP16(nn.Module): 6 | def __init__(self, module): 7 | super(FP16, self).__init__() 8 | self.module = batchnorm_to_fp32(module.half()) 9 | 10 | def forward(self, input): 11 | return self.module(input.half()) 12 | 13 | def load_state_dict(self, *inputs, **kwargs): 14 | self.module.load_state_dict(*inputs, **kwargs) 15 | 16 | def state_dict(self, *inputs, **kwargs): 17 | return self.module.state_dict(*inputs, **kwargs) 18 | 19 | def batchnorm_to_fp32(module): 20 | ''' 21 | BatchNorm layers to have parameters in single precision. 22 | Find all layers and convert them back to float. This can't 23 | be done with built in .apply as that function will apply 24 | fn to all modules, parameters, and buffers. Thus we wouldn't 25 | be able to guard the float conversion based on the module type. 26 | ''' 27 | if isinstance(module, nn.modules.batchnorm._BatchNorm): 28 | module.float() 29 | for child in module.children(): 30 | batchnorm_to_fp32(child) 31 | return module 32 | 33 | def copy_model_to_fp32(m, optim): 34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters 35 | """ 36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()] 37 | optim_groups = [group['params'] for group in optim.param_groups] 38 | iter_fp32_params = iter(fp32_params) 39 | for group_params in optim_groups: 40 | for i in range(len(group_params)): 41 | fp32_param = next(iter_fp32_params) 42 | fp32_param.requires_grad = group_params[i].requires_grad 43 | group_params[i] = fp32_param 44 | return fp32_params 45 | 46 | def copy_fp32_to_model(m, fp32_params): 47 | m_params = list(m.parameters()) 48 | for fp32_param, m_param in zip(fp32_params, m_params): 49 | m_param.data.copy_(fp32_param.data) 50 | 51 | def update_fp32_grads(fp32_params, m): 52 | m_params = list(m.parameters()) 53 | for fp32_param, m_param in zip(fp32_params, m_params): 54 | if fp32_param.grad is None: 55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size())) 56 | fp32_param.grad.data.copy_(m_param.grad.data) 57 | 58 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/images/industrial_fishing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/fastai/images/industrial_fishing.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/imports.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | 4 | import matplotlib 5 | import numpy as np 6 | 7 | matplotlib.rc('animation', html='html5') 8 | np.set_printoptions(precision=5, linewidth=110, suppress=True) 9 | 10 | from ipykernel.kernelapp import IPKernelApp 11 | def in_notebook(): return IPKernelApp.initialized() 12 | 13 | def in_ipynb(): 14 | try: 15 | cls = get_ipython().__class__.__name__ 16 | return cls == 'ZMQInteractiveShell' 17 | except NameError: 18 | return False 19 | 20 | import tqdm as tq 21 | 22 | 23 | def clear_tqdm(): 24 | inst = getattr(tq.tqdm, '_instances', None) 25 | if not inst: return 26 | try: 27 | for i in range(len(inst)): inst.pop().close() 28 | except Exception: 29 | pass 30 | 31 | if in_notebook(): 32 | def tqdm(*args, **kwargs): 33 | clear_tqdm() 34 | return tq.tqdm(*args, file=sys.stdout, **kwargs) 35 | def trange(*args, **kwargs): 36 | clear_tqdm() 37 | return tq.trange(*args, file=sys.stdout, **kwargs) 38 | else: 39 | from tqdm import tqdm, trange 40 | tnrange=trange 41 | tqdm_notebook=tqdm 42 | 43 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/initializers.py: -------------------------------------------------------------------------------- 1 | def cond_init(m, init_fn): 2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)): 3 | if hasattr(m, 'weight'): init_fn(m.weight) 4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.) 5 | 6 | def apply_init(m, init_fn): 7 | m.apply(lambda x: cond_init(x, init_fn)) 8 | 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/io.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlretrieve 2 | 3 | from tqdm import tqdm 4 | 5 | 6 | class TqdmUpTo(tqdm): 7 | def update_to(self, b=1, bsize=1, tsize=None): 8 | if tsize is not None: self.total = tsize 9 | self.update(b * bsize - self.n) 10 | 11 | def get_data(url, filename): 12 | if not os.path.exists(filename): 13 | 14 | dirname = os.path.dirname(filename) 15 | if not os.path.exists(dirname): 16 | os.makedirs(dirname) 17 | 18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 19 | urlretrieve(url, filename, reporthook=t.update_to) 20 | 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/layer_optimizer.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | 3 | def opt_params(parm, lr, wd): 4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd} 5 | 6 | class LayerOptimizer(): 7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None): 8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups] 9 | if not isinstance(lrs, Iterable): lrs=[lrs] 10 | if len(lrs)==1: lrs=lrs*len(layer_groups) 11 | if wds is None: wds=0. 12 | if not isinstance(wds, Iterable): wds=[wds] 13 | if len(wds)==1: wds=wds*len(layer_groups) 14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds 15 | self.opt = opt_fn(self.opt_params()) 16 | 17 | def opt_params(self): 18 | assert(len(self.layer_groups) == len(self.lrs)) 19 | assert(len(self.layer_groups) == len(self.wds)) 20 | params = list(zip(self.layer_groups,self.lrs,self.wds)) 21 | return [opt_params(*p) for p in params] 22 | 23 | @property 24 | def lr(self): return self.lrs[-1] 25 | 26 | @property 27 | def mom(self): 28 | if 'betas' in self.opt.param_groups[0]: 29 | return self.opt.param_groups[0]['betas'][0] 30 | else: 31 | return self.opt.param_groups[0]['momentum'] 32 | 33 | def set_lrs(self, lrs): 34 | if not isinstance(lrs, Iterable): lrs=[lrs] 35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups) 36 | set_lrs(self.opt, lrs) 37 | self.lrs=lrs 38 | 39 | def set_wds(self, wds): 40 | if not isinstance(wds, Iterable): wds=[wds] 41 | if len(wds)==1: wds=wds*len(self.layer_groups) 42 | set_wds(self.opt, wds) 43 | self.wds=wds 44 | 45 | def set_mom(self,momentum): 46 | if 'betas' in self.opt.param_groups[0]: 47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1]) 48 | else: 49 | for pg in self.opt.param_groups: pg['momentum'] = momentum 50 | 51 | def set_beta(self,beta): 52 | if 'betas' in self.opt.param_groups[0]: 53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta) 54 | elif 'alpha' in self.opt.param_groups[0]: 55 | for pg in self.opt.param_groups: pg['alpha'] = beta 56 | 57 | def set_opt_fn(self, opt_fn): 58 | if type(self.opt) != type(opt_fn(self.opt_params())): 59 | self.opt = opt_fn(self.opt_params()) 60 | 61 | def zip_strict_(l, r): 62 | assert(len(l) == len(r)) 63 | return zip(l, r) 64 | 65 | def set_lrs(opt, lrs): 66 | if not isinstance(lrs, Iterable): lrs=[lrs] 67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups) 68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr 69 | 70 | def set_wds(opt, wds): 71 | if not isinstance(wds, Iterable): wds=[wds] 72 | if len(wds)==1: wds=wds*len(opt.param_groups) 73 | assert(len(opt.param_groups) == len(wds)) 74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd 75 | 76 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/layers.py: -------------------------------------------------------------------------------- 1 | class AdaptiveConcatPool2d(nn.Module): 2 | def __init__(self, sz=None): 3 | super().__init__() 4 | sz = sz or (1,1) 5 | self.ap = nn.AdaptiveAvgPool2d(sz) 6 | self.mp = nn.AdaptiveMaxPool2d(sz) 7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1) 8 | 9 | class Lambda(nn.Module): 10 | def __init__(self, f): super().__init__(); self.f=f 11 | def forward(self, x): return self.f(x) 12 | 13 | class Flatten(nn.Module): 14 | def __init__(self): super().__init__() 15 | def forward(self, x): return x.view(x.size(0), -1) 16 | 17 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/losses.py: -------------------------------------------------------------------------------- 1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9): 2 | y_pred = (y_pred.float() > threshold).float() 3 | y_true = y_true.float() 4 | tp = (y_pred * y_true).sum(dim=1) 5 | precision = tp / (y_pred.sum(dim=1)+eps) 6 | recall = tp / (y_true.sum(dim=1)+eps) 7 | return torch.mean( 8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2)) 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/metrics.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | def accuracy_np(preds, targs): 5 | preds = np.argmax(preds, 1) 6 | return (preds==targs).mean() 7 | 8 | def accuracy(preds, targs): 9 | preds = torch.max(preds, dim=1)[1] 10 | return (preds==targs).float().mean() 11 | 12 | def accuracy_thresh(thresh): 13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh) 14 | 15 | def accuracy_multi(preds, targs, thresh): 16 | return ((preds>thresh).float()==targs).float().mean() 17 | 18 | def accuracy_multi_np(preds, targs, thresh): 19 | return ((preds>thresh)==targs).mean() 20 | 21 | def recall(preds, targs, thresh=0.5): 22 | pred_pos = preds > thresh 23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 24 | return tpos.sum()/targs.sum() 25 | 26 | def precision(preds, targs, thresh=0.5): 27 | pred_pos = preds > thresh 28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 29 | return tpos.sum()/pred_pos.sum() 30 | 31 | def fbeta(preds, targs, beta, thresh=0.5): 32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall). 33 | This is the micro averaged version where the true positives, false negatives and 34 | false positives are calculated globally (as opposed to on a per label basis). 35 | 36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and 37 | beta > 1 favors recall. 38 | """ 39 | assert beta > 0, 'beta needs to be greater than 0' 40 | beta2 = beta ** 2 41 | rec = recall(preds, targs, thresh) 42 | prec = precision(preds, targs, thresh) 43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec) 44 | 45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh) 46 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.tar 3 | checkpoint* 4 | log* 5 | wgts/ 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/models/cifar10/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py --lr=0.1 4 | python main.py --resume --lr=0.01 5 | python main.py --resume --lr=0.001 6 | 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/models/cifar10/wideresnet.py: -------------------------------------------------------------------------------- 1 | # Cifar10 Wideresnet for Dawn Submission 2 | 3 | from ...layers import * 4 | 5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False) 6 | 7 | def bn(ni, init_zero=False): 8 | m = nn.BatchNorm2d(ni) 9 | m.weight.data.fill_(0 if init_zero else 1) 10 | m.bias.data.zero_() 11 | return m 12 | 13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False): 14 | bn_initzero = bn(ni, init_zero=init_zero) 15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride)) 16 | 17 | def noop(x): return x 18 | 19 | class BasicBlock(nn.Module): 20 | def __init__(self, ni, nf, stride, drop_p=0.0): 21 | super().__init__() 22 | self.bn = nn.BatchNorm2d(ni) 23 | self.conv1 = conv_2d(ni, nf, 3, stride) 24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1) 25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None 26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop 27 | 28 | def forward(self, x): 29 | x2 = F.relu(self.bn(x), inplace=True) 30 | r = self.shortcut(x2) 31 | x = self.conv1(x2) 32 | if self.drop: x = self.drop(x) 33 | x = self.conv2(x) * 0.2 34 | return x.add_(r) 35 | 36 | 37 | def _make_group(N, ni, nf, block, stride, drop_p): 38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)] 39 | 40 | class WideResNet(nn.Module): 41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16): 42 | super().__init__() 43 | n_channels = [start_nf] 44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k) 45 | 46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1 47 | for i in range(num_groups): 48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p) 49 | 50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), 51 | Flatten(), nn.Linear(n_channels[3], num_classes)] 52 | self.features = nn.Sequential(*layers) 53 | 54 | def forward(self, x): return self.features(x) 55 | 56 | 57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.) 58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.) 59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.) 60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2) 61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.) 62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.) 63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2) 64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2) 65 | 66 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/models/darknet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layers import * 4 | from .layers import * 5 | 6 | 7 | class ConvBN(nn.Module): 8 | "convolutional layer then batchnorm" 9 | 10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) 13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) 14 | self.relu = nn.LeakyReLU(0.1, inplace=True) 15 | 16 | def forward(self, x): return self.relu(self.bn(self.conv(x))) 17 | 18 | class DarknetBlock(nn.Module): 19 | def __init__(self, ch_in): 20 | super().__init__() 21 | ch_hid = ch_in//2 22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) 23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, x): return self.conv2(self.conv1(x)) + x 26 | 27 | class Darknet(nn.Module): 28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)" 29 | 30 | def make_group_layer(self, ch_in, num_blocks, stride=1): 31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)] 32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2)) 33 | return layers 34 | 35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32): 36 | super().__init__() 37 | nf = start_nf 38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)] 39 | for i,nb in enumerate(num_blocks): 40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2)) 41 | nf *= 2 42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] 43 | self.layers = nn.Sequential(*layers) 44 | 45 | def forward(self, x): return self.layers(x) 46 | 47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes) 48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes) 49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24) 50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16) 51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes) 52 | 53 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/rnn_train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/set_spawn.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import set_start_method 2 | set_start_method('spawn') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/fastai/transforms_pil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h) 30 | x = np.random.randint(w) 31 | 32 | y1 = np.clip(y - self.length / 2, 0, h) 33 | y2 = np.clip(y + self.length / 2, 0, h) 34 | x1 = np.clip(x - self.length / 2, 0, w) 35 | x2 = np.clip(x + self.length / 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = .git courses -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/requirements.txt: -------------------------------------------------------------------------------- 1 | bcolz>=1.1.2 2 | bleach>=2.0.0 3 | certifi>=2016.2.28 4 | cycler>=0.10.0 5 | decorator>=4.1.2 6 | entrypoints>=0.2.3 7 | graphviz>=0.8.2 8 | html5lib>=0.999999999 9 | ipykernel>=4.6.1 10 | ipython>=6.2.0 11 | ipython-genutils>=0.2.0 12 | ipywidgets>=7.0.1 13 | isoweek>=1.3.3 14 | jedi>=0.10.2 15 | Jinja2>=2.9.6 16 | jsonschema>=2.6.0 17 | jupyter>=1.0.0 18 | jupyter-client>=5.1.0 19 | jupyter-console>=5.2.0 20 | jupyter-core>=4.3.0 21 | MarkupSafe>=1.0 22 | matplotlib>=2.0.2 23 | mistune>=0.7.4 24 | nbconvert>=5.3.1 25 | nbformat>=4.4.0 26 | notebook>=5.1.0 27 | numpy>=1.13.1 28 | olefile>=0.44 29 | opencv-python>=3.3.0.10 30 | pandas>=0.20.3 31 | pandas_summary>=0.0.41 32 | pandocfilters>=1.4.2 33 | pexpect>=4.2.1 34 | pickleshare>=0.7.4 35 | Pillow>=4.2.1 36 | prompt-toolkit>=1.0.15 37 | ptyprocess>=0.5.2 38 | Pygments>=2.2.0 39 | pyparsing>=2.2.0 40 | pytest>=3.5.0 41 | python-dateutil>=2.6.1 42 | pytz>=2017.2 43 | PyYAML>=3.12 44 | pyzmq>=16.0.2 45 | qtconsole>=4.3.1 46 | scipy>=0.19.1 47 | seaborn>=0.8.1 48 | simplegeneric>=0.8.1 49 | six>=1.11.0 50 | sklearn_pandas>=1.6.0 51 | terminado>=0.6 52 | testpath>=0.3.1 53 | torch<0.4 54 | torchtext>=0.2.3 55 | torchvision>=0.2.0 56 | tornado>=4.5.2,<5 57 | tqdm>=4.15.0 58 | traitlets>=4.3.2 59 | wcwidth>=0.1.7 60 | webencodings>=0.5.1 61 | widgetsnbextension>=3.0.3 62 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/setup.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | """ Setup script for installing fastai """ 5 | 6 | #from distutils.core import setup 7 | from setuptools import setup 8 | 9 | setup( 10 | name = "fastai", 11 | packages = ['fastai', 'fastai/models', 'fastai/models/cifar10'], 12 | version = '0.7.0', 13 | description = "The fastai deep learning and machine learning library.", 14 | author = "Jeremy Howard and contributors", 15 | author_email = "info@fast.ai", 16 | license = "Apache License 2.0", 17 | url = "https://github.com/fastai/fastai", 18 | download_url = 'https://github.com/fastai/fastai/archive/0.7.0.tar.gz', 19 | install_requires = 20 | ['bcolz', 'bleach', 'certifi', 'cycler', 'decorator', 'entrypoints', 'feather-format', 'graphviz', 'html5lib', 21 | 'ipykernel', 'ipython', 'ipython-genutils', 'ipywidgets', 'isoweek', 'jedi', 'Jinja2', 'jsonschema', 'jupyter', 22 | 'MarkupSafe', 'matplotlib', 'numpy', 'opencv-python', 'pandas', 23 | 'pandas_summary', 'pickleshare', 'Pillow', 'plotnine', 24 | 'ptyprocess', 'Pygments', 'pyparsing', 'python-dateutil', 'pytz', 'PyYAML', 'pyzmq', 'scipy', 25 | 'seaborn', 'simplegeneric', 'sklearn_pandas', 'testpath', 'torch<0.4', 'torchtext', 'torchvision', 'tornado', 'tqdm', 26 | 'traitlets', 'wcwidth', 'webencodings', 'widgetsnbextension'], 27 | keywords = ['deeplearning', 'pytorch', 'machinelearning'], 28 | classifiers = ['Development Status :: 3 - Alpha', 29 | 'Programming Language :: Python', 30 | 'Programming Language :: Python :: 3.6', 31 | 'Topic :: Scientific/Engineering :: Artificial Intelligence'] 32 | ) 33 | 34 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('agg') 3 | 4 | # the above imports are fixing the TLS issue: 5 | # ```ImportError: dlopen: cannot load any more object with static TLS``` 6 | # they were set after experimenting with the test sets on ubuntu 16.04 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tests/test_core.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastai.core import partition 3 | 4 | 5 | def test_partition_functionality(): 6 | sz = 2 7 | a = [1,2,3,4,5] 8 | ex = [[1,2],[3,4],[5]] 9 | result = partition(a, sz) 10 | assert len(result) == len(ex) 11 | assert all([a == b for a, b in zip(result, ex)]) 12 | 13 | sz = 3 14 | ex = [[1,2,3],[4,5]] 15 | result = partition(a, sz) 16 | assert len(result) == len(ex) 17 | assert all([a == b for a,b in zip(result, ex)]) 18 | 19 | sz = 1 20 | ex = [[1],[2],[3],[4],[5]] 21 | result = partition(a, sz) 22 | assert len(result) == len(ex) 23 | assert all([a == b for a,b in zip(result, ex)]) 24 | 25 | sz = 6 26 | ex = [[1,2,3,4,5]] 27 | result = partition(a, sz) 28 | assert len(result) == len(ex) 29 | assert all([a == b for a,b in zip(result, ex)]) 30 | 31 | sz = 3 32 | a = [] 33 | result = partition(a, sz) 34 | assert len(result) == 0 35 | 36 | def test_partition_error_handling(): 37 | sz = 0 38 | a = [1,2,3,4,5] 39 | with pytest.raises(ValueError): 40 | partition(a, sz) 41 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tests/test_lsuv_initializer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | import pytest 6 | import torch 7 | import torch.nn as nn 8 | import torchvision.models as models 9 | from fastai.core import VV 10 | from fastai.lsuv_initializer import apply_lsuv_init 11 | 12 | 13 | @pytest.fixture 14 | def image_data(): 15 | images_to_process = [] 16 | for img_fname in os.listdir('fastai/images'): 17 | img = cv2.imread(os.path.join('fastai/images', img_fname)) 18 | images_to_process.append(np.transpose(cv2.resize(img, (224,224)), (2,0,1))) 19 | data = np.array(images_to_process).astype(np.float32) 20 | return VV(torch.from_numpy(data)) 21 | 22 | 23 | def add_hooks(m, fn): 24 | hooks = [] 25 | def add_hook(m): 26 | if (isinstance(m, nn.Conv2d)) or (isinstance(m, nn.Linear)): 27 | hooks.append(m.register_forward_hook(fn)) 28 | m.apply(add_hook) 29 | return hooks 30 | def remove_hooks(hooks): [h.remove() for h in hooks] 31 | 32 | def run_with_capture(m, data): 33 | activation_variances = [] 34 | def capture_hook(self, input, output): 35 | activation_variances.append(np.var(output.data.cpu().numpy())) 36 | hooks = add_hooks(m, capture_hook) 37 | m(data) 38 | remove_hooks(hooks) 39 | return activation_variances 40 | 41 | def test_fast_initialization_without_orthonormal(image_data): 42 | alexnet = models.alexnet(pretrained=False) 43 | pre_init_var = run_with_capture(alexnet, image_data) 44 | assert pre_init_var[0] >= 1000 # the first few pre-init variances are huge, 45 | assert pre_init_var[1] >= 100 # even larger than these conservative tests. 46 | 47 | tol = 0.1 48 | alexnet = apply_lsuv_init(alexnet, image_data, std_tol=tol, do_orthonorm=False, cuda=False) 49 | *post_init_var, final_var = run_with_capture(alexnet, image_data) 50 | for var in post_init_var: 51 | assert 2 <= var <= 4 52 | assert final_var == pytest.approx(1, tol**2) 53 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tests/test_samplers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from fastai.text import SortSampler, SortishSampler 4 | 5 | 6 | def test_sort_sampler_sorts_all_descending(): 7 | bs = 4 8 | n = bs*100 9 | data = 2 * np.arange(n) 10 | samp = list(SortSampler(data, lambda i: data[i])) 11 | 12 | # The sample is a permutation of the indices. 13 | assert sorted(samp) == list(range(n)) 14 | # And that "permutation" is for descending data order. 15 | assert all(s1 > s2 for s1, s2 in zip(samp, samp[1:])) 16 | 17 | 18 | def test_sortish_sampler_sorts_each_batch_descending(): 19 | bs = 4 20 | n = bs*100 21 | data = 2 * np.arange(n) 22 | samp = list(SortishSampler(data, lambda i: data[i], bs)) 23 | 24 | # The sample is a permutation of the indices. 25 | assert sorted(samp) == list(range(n)) 26 | # And that permutation is kind of reverse sorted. 27 | assert all( 28 | s1 > s2 or (i+1) % bs == 0 # don't check batch boundaries 29 | for i, (s1, s2) in enumerate(zip(samp, samp[1:])) 30 | ) 31 | assert samp[0] == max(samp) 32 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '../') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/.gitignore: -------------------------------------------------------------------------------- 1 | weights/ 2 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/fastai/__init__.py -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/executors.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import itertools 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | 7 | class LazyThreadPoolExecutor(ThreadPoolExecutor): 8 | def map(self, fn, *iterables, timeout=None, chunksize=1, prefetch=None): 9 | """ 10 | Collects iterables lazily, rather than immediately. 11 | Docstring same as parent: https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.Executor 12 | Implmentation taken from this PR: https://github.com/python/cpython/pull/707 13 | """ 14 | if timeout is not None: end_time = timeout + time.time() 15 | if prefetch is None: prefetch = self._max_workers 16 | if prefetch < 0: raise ValueError("prefetch count may not be negative") 17 | argsiter = zip(*iterables) 18 | fs = collections.deque(self.submit(fn, *args) for args in itertools.islice(argsiter, self._max_workers+prefetch)) 19 | # Yield must be hidden in closure so that the futures are submitted before the first iterator value is required. 20 | def result_iterator(): 21 | nonlocal argsiter 22 | try: 23 | while fs: 24 | res = fs[0].result() if timeout is None else fs[0].result(end_time-time.time()) 25 | # Got a result, future needn't be cancelled 26 | del fs[0] 27 | # Dispatch next task before yielding to keep pipeline full 28 | if argsiter: 29 | try: 30 | args = next(argsiter) 31 | except StopIteration: 32 | argsiter = None 33 | else: 34 | fs.append(self.submit(fn, *args)) 35 | yield res 36 | finally: 37 | for future in fs: future.cancel() 38 | return result_iterator() -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/fp16.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class FP16(nn.Module): 6 | def __init__(self, module): 7 | super(FP16, self).__init__() 8 | self.module = batchnorm_to_fp32(module.half()) 9 | 10 | def forward(self, input): 11 | return self.module(input.half()) 12 | 13 | def load_state_dict(self, *inputs, **kwargs): 14 | self.module.load_state_dict(*inputs, **kwargs) 15 | 16 | def state_dict(self, *inputs, **kwargs): 17 | return self.module.state_dict(*inputs, **kwargs) 18 | 19 | def batchnorm_to_fp32(module): 20 | ''' 21 | BatchNorm layers to have parameters in single precision. 22 | Find all layers and convert them back to float. This can't 23 | be done with built in .apply as that function will apply 24 | fn to all modules, parameters, and buffers. Thus we wouldn't 25 | be able to guard the float conversion based on the module type. 26 | ''' 27 | if isinstance(module, nn.modules.batchnorm._BatchNorm): 28 | module.float() 29 | for child in module.children(): 30 | batchnorm_to_fp32(child) 31 | return module 32 | 33 | def copy_model_to_fp32(m, optim): 34 | """ Creates a fp32 copy of model parameters and sets optimizer parameters 35 | """ 36 | fp32_params = [m_param.clone().type(torch.cuda.FloatTensor).detach() for m_param in m.parameters()] 37 | optim_groups = [group['params'] for group in optim.param_groups] 38 | iter_fp32_params = iter(fp32_params) 39 | for group_params in optim_groups: 40 | for i in range(len(group_params)): 41 | fp32_param = next(iter_fp32_params) 42 | fp32_param.requires_grad = group_params[i].requires_grad 43 | group_params[i] = fp32_param 44 | return fp32_params 45 | 46 | def copy_fp32_to_model(m, fp32_params): 47 | m_params = list(m.parameters()) 48 | for fp32_param, m_param in zip(fp32_params, m_params): 49 | m_param.data.copy_(fp32_param.data) 50 | 51 | def update_fp32_grads(fp32_params, m): 52 | m_params = list(m.parameters()) 53 | for fp32_param, m_param in zip(fp32_params, m_params): 54 | if fp32_param.grad is None: 55 | fp32_param.grad = nn.Parameter(fp32_param.data.new().resize_(*fp32_param.data.size())) 56 | fp32_param.grad.data.copy_(m_param.grad.data) 57 | 58 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/images/industrial_fishing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/fastai/images/industrial_fishing.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/imports.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import matplotlib 4 | import numpy as np 5 | 6 | matplotlib.rc('animation', html='html5') 7 | np.set_printoptions(precision=5, linewidth=110, suppress=True) 8 | 9 | from ipykernel.kernelapp import IPKernelApp 10 | def in_notebook(): return IPKernelApp.initialized() 11 | 12 | def in_ipynb(): 13 | try: 14 | cls = get_ipython().__class__.__name__ 15 | return cls == 'ZMQInteractiveShell' 16 | except NameError: 17 | return False 18 | 19 | import tqdm as tq 20 | 21 | 22 | def clear_tqdm(): 23 | inst = getattr(tq.tqdm, '_instances', None) 24 | if not inst: return 25 | try: 26 | for i in range(len(inst)): inst.pop().close() 27 | except Exception: 28 | pass 29 | 30 | if in_notebook(): 31 | def tqdm(*args, **kwargs): 32 | clear_tqdm() 33 | return tq.tqdm(*args, file=sys.stdout, **kwargs) 34 | def trange(*args, **kwargs): 35 | clear_tqdm() 36 | return tq.trange(*args, file=sys.stdout, **kwargs) 37 | else: 38 | from tqdm import tqdm, trange 39 | tnrange=trange 40 | tqdm_notebook=tqdm 41 | 42 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/initializers.py: -------------------------------------------------------------------------------- 1 | def cond_init(m, init_fn): 2 | if not isinstance(m, (nn.BatchNorm1d,nn.BatchNorm2d,nn.BatchNorm3d)): 3 | if hasattr(m, 'weight'): init_fn(m.weight) 4 | if hasattr(m, 'bias'): m.bias.data.fill_(0.) 5 | 6 | def apply_init(m, init_fn): 7 | m.apply(lambda x: cond_init(x, init_fn)) 8 | 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/io.py: -------------------------------------------------------------------------------- 1 | from urllib.request import urlretrieve 2 | 3 | from tqdm import tqdm 4 | 5 | 6 | class TqdmUpTo(tqdm): 7 | def update_to(self, b=1, bsize=1, tsize=None): 8 | if tsize is not None: self.total = tsize 9 | self.update(b * bsize - self.n) 10 | 11 | def get_data(url, filename): 12 | if not os.path.exists(filename): 13 | 14 | dirname = os.path.dirname(filename) 15 | if not os.path.exists(dirname): 16 | os.makedirs(dirname) 17 | 18 | with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: 19 | urlretrieve(url, filename, reporthook=t.update_to) 20 | 21 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/layer_optimizer.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | 3 | def opt_params(parm, lr, wd): 4 | return {'params': chain_params(parm), 'lr':lr, 'weight_decay':wd} 5 | 6 | class LayerOptimizer(): 7 | def __init__(self, opt_fn, layer_groups, lrs, wds=None): 8 | if not isinstance(layer_groups, (list,tuple)): layer_groups=[layer_groups] 9 | if not isinstance(lrs, Iterable): lrs=[lrs] 10 | if len(lrs)==1: lrs=lrs*len(layer_groups) 11 | if wds is None: wds=0. 12 | if not isinstance(wds, Iterable): wds=[wds] 13 | if len(wds)==1: wds=wds*len(layer_groups) 14 | self.layer_groups,self.lrs,self.wds = layer_groups,lrs,wds 15 | self.opt = opt_fn(self.opt_params()) 16 | 17 | def opt_params(self): 18 | assert(len(self.layer_groups) == len(self.lrs)) 19 | assert(len(self.layer_groups) == len(self.wds)) 20 | params = list(zip(self.layer_groups,self.lrs,self.wds)) 21 | return [opt_params(*p) for p in params] 22 | 23 | @property 24 | def lr(self): return self.lrs[-1] 25 | 26 | @property 27 | def mom(self): 28 | if 'betas' in self.opt.param_groups[0]: 29 | return self.opt.param_groups[0]['betas'][0] 30 | else: 31 | return self.opt.param_groups[0]['momentum'] 32 | 33 | def set_lrs(self, lrs): 34 | if not isinstance(lrs, Iterable): lrs=[lrs] 35 | if len(lrs)==1: lrs=lrs*len(self.layer_groups) 36 | set_lrs(self.opt, lrs) 37 | self.lrs=lrs 38 | 39 | def set_wds(self, wds): 40 | if not isinstance(wds, Iterable): wds=[wds] 41 | if len(wds)==1: wds=wds*len(self.layer_groups) 42 | set_wds(self.opt, wds) 43 | self.wds=wds 44 | 45 | def set_mom(self,momentum): 46 | if 'betas' in self.opt.param_groups[0]: 47 | for pg in self.opt.param_groups: pg['betas'] = (momentum, pg['betas'][1]) 48 | else: 49 | for pg in self.opt.param_groups: pg['momentum'] = momentum 50 | 51 | def set_beta(self,beta): 52 | if 'betas' in self.opt.param_groups[0]: 53 | for pg in self.opt.param_groups: pg['betas'] = (pg['betas'][0],beta) 54 | elif 'alpha' in self.opt.param_groups[0]: 55 | for pg in self.opt.param_groups: pg['alpha'] = beta 56 | 57 | def set_opt_fn(self, opt_fn): 58 | if type(self.opt) != type(opt_fn(self.opt_params())): 59 | self.opt = opt_fn(self.opt_params()) 60 | 61 | def zip_strict_(l, r): 62 | assert(len(l) == len(r)) 63 | return zip(l, r) 64 | 65 | def set_lrs(opt, lrs): 66 | if not isinstance(lrs, Iterable): lrs=[lrs] 67 | if len(lrs)==1: lrs=lrs*len(opt.param_groups) 68 | for pg,lr in zip_strict_(opt.param_groups,lrs): pg['lr'] = lr 69 | 70 | def set_wds(opt, wds): 71 | if not isinstance(wds, Iterable): wds=[wds] 72 | if len(wds)==1: wds=wds*len(opt.param_groups) 73 | assert(len(opt.param_groups) == len(wds)) 74 | for pg,wd in zip_strict_(opt.param_groups,wds): pg['weight_decay'] = wd 75 | 76 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/layers.py: -------------------------------------------------------------------------------- 1 | class AdaptiveConcatPool2d(nn.Module): 2 | def __init__(self, sz=None): 3 | super().__init__() 4 | sz = sz or (1,1) 5 | self.ap = nn.AdaptiveAvgPool2d(sz) 6 | self.mp = nn.AdaptiveMaxPool2d(sz) 7 | def forward(self, x): return torch.cat([self.mp(x), self.ap(x)], 1) 8 | 9 | class Lambda(nn.Module): 10 | def __init__(self, f): super().__init__(); self.f=f 11 | def forward(self, x): return self.f(x) 12 | 13 | class Flatten(nn.Module): 14 | def __init__(self): super().__init__() 15 | def forward(self, x): return x.view(x.size(0), -1) 16 | 17 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/losses.py: -------------------------------------------------------------------------------- 1 | def fbeta_torch(y_true, y_pred, beta, threshold, eps=1e-9): 2 | y_pred = (y_pred.float() > threshold).float() 3 | y_true = y_true.float() 4 | tp = (y_pred * y_true).sum(dim=1) 5 | precision = tp / (y_pred.sum(dim=1)+eps) 6 | recall = tp / (y_true.sum(dim=1)+eps) 7 | return torch.mean( 8 | precision*recall / (precision*(beta**2)+recall+eps) * (1+beta**2)) 9 | 10 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/metrics.py: -------------------------------------------------------------------------------- 1 | from .imports import * 2 | 3 | 4 | def accuracy_np(preds, targs): 5 | preds = np.argmax(preds, 1) 6 | return (preds==targs).mean() 7 | 8 | def accuracy(preds, targs): 9 | preds = torch.max(preds, dim=1)[1] 10 | return (preds==targs).float().mean() 11 | 12 | def accuracy_thresh(thresh): 13 | return lambda preds,targs: accuracy_multi(preds, targs, thresh) 14 | 15 | def accuracy_multi(preds, targs, thresh): 16 | return ((preds>thresh).float()==targs).float().mean() 17 | 18 | def accuracy_multi_np(preds, targs, thresh): 19 | return ((preds>thresh)==targs).mean() 20 | 21 | def recall(preds, targs, thresh=0.5): 22 | pred_pos = preds > thresh 23 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 24 | return tpos.sum()/targs.sum() 25 | 26 | def precision(preds, targs, thresh=0.5): 27 | pred_pos = preds > thresh 28 | tpos = torch.mul((targs.byte() == pred_pos), targs.byte()) 29 | return tpos.sum()/pred_pos.sum() 30 | 31 | def fbeta(preds, targs, beta, thresh=0.5): 32 | """Calculates the F-beta score (the weighted harmonic mean of precision and recall). 33 | This is the micro averaged version where the true positives, false negatives and 34 | false positives are calculated globally (as opposed to on a per label basis). 35 | 36 | beta == 1 places equal weight on precision and recall, b < 1 emphasizes precision and 37 | beta > 1 favors recall. 38 | """ 39 | assert beta > 0, 'beta needs to be greater than 0' 40 | beta2 = beta ** 2 41 | rec = recall(preds, targs, thresh) 42 | prec = precision(preds, targs, thresh) 43 | return (1 + beta2) * prec * rec / (beta2 * prec + rec) 44 | 45 | def f1(preds, targs, thresh=0.5): return fbeta(preds, targs, 1, thresh) 46 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/models/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.tar 3 | checkpoint* 4 | log* 5 | wgts/ 6 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/models/cifar10/main.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python main.py --lr=0.1 4 | python main.py --resume --lr=0.01 5 | python main.py --resume --lr=0.001 6 | 7 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/models/cifar10/wideresnet.py: -------------------------------------------------------------------------------- 1 | # Cifar10 Wideresnet for Dawn Submission 2 | 3 | from ...layers import * 4 | 5 | def conv_2d(ni, nf, ks, stride): return nn.Conv2d(ni, nf, kernel_size=ks, stride=stride, padding=ks//2, bias=False) 6 | 7 | def bn(ni, init_zero=False): 8 | m = nn.BatchNorm2d(ni) 9 | m.weight.data.fill_(0 if init_zero else 1) 10 | m.bias.data.zero_() 11 | return m 12 | 13 | def bn_relu_conv(ni, nf, ks, stride, init_zero=False): 14 | bn_initzero = bn(ni, init_zero=init_zero) 15 | return nn.Sequential(bn_initzero, nn.ReLU(inplace=True), conv_2d(ni, nf, ks, stride)) 16 | 17 | def noop(x): return x 18 | 19 | class BasicBlock(nn.Module): 20 | def __init__(self, ni, nf, stride, drop_p=0.0): 21 | super().__init__() 22 | self.bn = nn.BatchNorm2d(ni) 23 | self.conv1 = conv_2d(ni, nf, 3, stride) 24 | self.conv2 = bn_relu_conv(nf, nf, 3, 1) 25 | self.drop = nn.Dropout(drop_p, inplace=True) if drop_p else None 26 | self.shortcut = conv_2d(ni, nf, 1, stride) if ni != nf else noop 27 | 28 | def forward(self, x): 29 | x2 = F.relu(self.bn(x), inplace=True) 30 | r = self.shortcut(x2) 31 | x = self.conv1(x2) 32 | if self.drop: x = self.drop(x) 33 | x = self.conv2(x) * 0.2 34 | return x.add_(r) 35 | 36 | 37 | def _make_group(N, ni, nf, block, stride, drop_p): 38 | return [block(ni if i == 0 else nf, nf, stride if i == 0 else 1, drop_p) for i in range(N)] 39 | 40 | class WideResNet(nn.Module): 41 | def __init__(self, num_groups, N, num_classes, k=1, drop_p=0.0, start_nf=16): 42 | super().__init__() 43 | n_channels = [start_nf] 44 | for i in range(num_groups): n_channels.append(start_nf*(2**i)*k) 45 | 46 | layers = [conv_2d(3, n_channels[0], 3, 1)] # conv1 47 | for i in range(num_groups): 48 | layers += _make_group(N, n_channels[i], n_channels[i+1], BasicBlock, (1 if i==0 else 2), drop_p) 49 | 50 | layers += [nn.BatchNorm2d(n_channels[3]), nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d(1), 51 | Flatten(), nn.Linear(n_channels[3], num_classes)] 52 | self.features = nn.Sequential(*layers) 53 | 54 | def forward(self, x): return self.features(x) 55 | 56 | 57 | def wrn_22(): return WideResNet(num_groups=3, N=3, num_classes=10, k=6, drop_p=0.) 58 | def wrn_22_k8(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.) 59 | def wrn_22_k10(): return WideResNet(num_groups=3, N=3, num_classes=10, k=10, drop_p=0.) 60 | def wrn_22_k8_p2(): return WideResNet(num_groups=3, N=3, num_classes=10, k=8, drop_p=0.2) 61 | def wrn_28(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.) 62 | def wrn_28_k8(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.) 63 | def wrn_28_k8_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=8, drop_p=0.2) 64 | def wrn_28_p2(): return WideResNet(num_groups=3, N=4, num_classes=10, k=6, drop_p=0.2) 65 | 66 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/models/darknet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .layers import * 4 | from .layers import * 5 | 6 | 7 | class ConvBN(nn.Module): 8 | "convolutional layer then batchnorm" 9 | 10 | def __init__(self, ch_in, ch_out, kernel_size = 3, stride=1, padding=0): 11 | super().__init__() 12 | self.conv = nn.Conv2d(ch_in, ch_out, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) 13 | self.bn = nn.BatchNorm2d(ch_out, momentum=0.01) 14 | self.relu = nn.LeakyReLU(0.1, inplace=True) 15 | 16 | def forward(self, x): return self.relu(self.bn(self.conv(x))) 17 | 18 | class DarknetBlock(nn.Module): 19 | def __init__(self, ch_in): 20 | super().__init__() 21 | ch_hid = ch_in//2 22 | self.conv1 = ConvBN(ch_in, ch_hid, kernel_size=1, stride=1, padding=0) 23 | self.conv2 = ConvBN(ch_hid, ch_in, kernel_size=3, stride=1, padding=1) 24 | 25 | def forward(self, x): return self.conv2(self.conv1(x)) + x 26 | 27 | class Darknet(nn.Module): 28 | "Replicates the darknet classifier from the YOLOv3 paper (table 1)" 29 | 30 | def make_group_layer(self, ch_in, num_blocks, stride=1): 31 | layers = [ConvBN(ch_in,ch_in*2,stride=stride)] 32 | for i in range(num_blocks): layers.append(DarknetBlock(ch_in*2)) 33 | return layers 34 | 35 | def __init__(self, num_blocks, num_classes=1000, start_nf=32): 36 | super().__init__() 37 | nf = start_nf 38 | layers = [ConvBN(3, nf, kernel_size=3, stride=1, padding=1)] 39 | for i,nb in enumerate(num_blocks): 40 | layers += self.make_group_layer(nf, nb, stride=(1 if i==1 else 2)) 41 | nf *= 2 42 | layers += [nn.AdaptiveAvgPool2d(1), Flatten(), nn.Linear(nf, num_classes)] 43 | self.layers = nn.Sequential(*layers) 44 | 45 | def forward(self, x): return self.layers(x) 46 | 47 | def darknet_53(num_classes=1000): return Darknet([1,2,8,8,4], num_classes) 48 | def darknet_small(num_classes=1000): return Darknet([1,2,4,8,4], num_classes) 49 | def darknet_mini(num_classes=1000): return Darknet([1,2,4,4,2], num_classes, start_nf=24) 50 | def darknet_mini2(num_classes=1000): return Darknet([1,2,8,8,4], num_classes, start_nf=16) 51 | def darknet_mini3(num_classes=1000): return Darknet([1,2,4,4], num_classes) 52 | 53 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/rnn_train.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/set_spawn.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import set_start_method 2 | set_start_method('spawn') 3 | 4 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/fastai/transforms_pil.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h) 30 | x = np.random.randint(w) 31 | 32 | y1 = np.clip(y - self.length / 2, 0, h) 33 | y2 = np.clip(y + self.length / 2, 0, h) 34 | x1 = np.clip(x - self.length / 2, 0, w) 35 | x2 = np.clip(x + self.length / 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/cifar10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/cifar10.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/demba_combustion_engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/demba_combustion_engine.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/digit.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/digit.gif -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/fashion-mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/fashion-mnist.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/markov_health.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/markov_health.jpg -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/mnist.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/normal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/normal.jpg -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/overfitting.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/overfitting2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/overfitting2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/sgd2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/sgd2.gif -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/shop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/shop.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/what_is_pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/what_is_pytorch.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/zeiler1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler1.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/zeiler2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler2.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/zeiler3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler3.png -------------------------------------------------------------------------------- /code_summarization_transfer_learning/fastai/tutorials/images/zeiler4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/code_summarization_transfer_learning/fastai/tutorials/images/zeiler4.png -------------------------------------------------------------------------------- /pytorch_model/README.md: -------------------------------------------------------------------------------- 1 | ## Deep Code Search 2 | 3 | In this code base, we demonstrate a joint embedding model based on a code embedding network and description embedding network largely based on work done by https://github.com/guxd/deep-code-search 4 | 5 | 6 | Our contribution is mostly in ironing out bugs, trying out newer and more complicated models, and setting it up to work with Python. 7 | 8 | ### Usage 9 | In order for the codebase to execute, please install requirements as following 10 | 11 | ```bash 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | ### Train 16 | 17 | ```bash 18 | python codesearcher.py --mode train --language java|python 19 | ``` 20 | 21 | ### Code Embedding 22 | 23 | ```bash 24 | python codesearcher.py --mode repr_code --language java|python 25 | ``` 26 | 27 | ### Search 28 | 29 | ```bash 30 | python codesearcher.py --mode search --language java|python 31 | ``` -------------------------------------------------------------------------------- /pytorch_model/java/test.apiseq.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850 3 | size 259877 4 | -------------------------------------------------------------------------------- /pytorch_model/java/test.desc.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:212336e49778a8b2a665c9995cc6016fb2fdf9fae5e9f1e6c743bfd879939e62 3 | size 290579 4 | -------------------------------------------------------------------------------- /pytorch_model/java/test.methname.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c 3 | size 177624 4 | -------------------------------------------------------------------------------- /pytorch_model/java/test.rawcode.txt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e708058d9933fc0e6c79d0dfbc6656a98394b5949626022b4fd6a6db89899b1d 3 | size 3387172 4 | -------------------------------------------------------------------------------- /pytorch_model/java/test.tokens.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9 3 | size 304017 4 | -------------------------------------------------------------------------------- /pytorch_model/java/train.apiseq.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850 3 | size 259877 4 | -------------------------------------------------------------------------------- /pytorch_model/java/train.desc.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:212336e49778a8b2a665c9995cc6016fb2fdf9fae5e9f1e6c743bfd879939e62 3 | size 290579 4 | -------------------------------------------------------------------------------- /pytorch_model/java/train.methname.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c 3 | size 177624 4 | -------------------------------------------------------------------------------- /pytorch_model/java/train.tokens.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9 3 | size 304017 4 | -------------------------------------------------------------------------------- /pytorch_model/java/use.apiseq.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:78cb34396e2f1396e38cc8cad3dbb2ffaa4e0fb9e2a88d62306f87a09d455850 3 | size 259877 4 | -------------------------------------------------------------------------------- /pytorch_model/java/use.codevecs.normalized.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a24d9df51b46934b5e4d050645e02f1cfad1e4ff656bead2174d7fe290c61d3c 3 | size 10101368 4 | -------------------------------------------------------------------------------- /pytorch_model/java/use.methname.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e6e25dd09303501caac7d7d9ee85eac2553bfc4711b51c3428dac69d13ff769c 3 | size 177624 4 | -------------------------------------------------------------------------------- /pytorch_model/java/use.rawcode.txt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e708058d9933fc0e6c79d0dfbc6656a98394b5949626022b4fd6a6db89899b1d 3 | size 3387172 4 | -------------------------------------------------------------------------------- /pytorch_model/java/use.tokens.h5: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eb23480f047db40d536d0cfcc58dd8b1ffcfb0d24457928a9242abf2043408c9 3 | size 304017 4 | -------------------------------------------------------------------------------- /pytorch_model/java/vocab.apiseq.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3c75e10d3f8d4911846941edd1293bed9a25296addd7f049b2ddb1e03007ffe2 3 | size 225989 4 | -------------------------------------------------------------------------------- /pytorch_model/java/vocab.desc.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:59310289b8fd4e43fbf44d9f9d01c283dce498cea8dca088484fdf36bad6c0e2 3 | size 167955 4 | -------------------------------------------------------------------------------- /pytorch_model/java/vocab.methname.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7ebfe7b692b1a43318ed584150c931bcd59e69bd826aca88e9624f25a99f2f8f 3 | size 165351 4 | -------------------------------------------------------------------------------- /pytorch_model/java/vocab.tokens.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c8666768e9c11fb5adf4fad54a245720b48b32bf5c028d4b3424a0c5ed4e42f6 3 | size 160199 4 | -------------------------------------------------------------------------------- /pytorch_model/python/small.rawcode.txt: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:eebc835b00f4378ed28c39c6da969eb1018d0941f9060290b392a3c6edda7a8b 3 | size 6342472 4 | -------------------------------------------------------------------------------- /pytorch_model/python/small.test.apiseq.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:3562437b8ce684e476fb63d565d5fa816af7e28c87ecd04ad38f04c7a3e6ad68 3 | size 1800128 4 | -------------------------------------------------------------------------------- /pytorch_model/python/small.test.desc.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:7dd7e2fb3e43bcebc481f5ea58e2cdaff2b1178341d64be00e72647ec1eb5b1f 3 | size 600128 4 | -------------------------------------------------------------------------------- /pytorch_model/python/small.test.methname.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:dc075bb3140324f931da4d387b68fca720cd18f5580ebb6c59d3400c114f9a5c 3 | size 200128 4 | -------------------------------------------------------------------------------- /pytorch_model/python/small.test.tokens.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:202b4708e7caec66550dbb33f335ca43b189109299e866784969dfd8cf88940c 3 | size 2200128 4 | -------------------------------------------------------------------------------- /pytorch_model/python/test.apiseq.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d311ccac692f7b3d9ba98e6e2c023a5111bb45491fffd0ac1f3fe5b1ac403bcb 3 | size 32264948 4 | -------------------------------------------------------------------------------- /pytorch_model/python/test.desc.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:fa9e15d9e05f1049080ff909d978e918106b1b8720f476165d3fe76cae4168eb 3 | size 10755068 4 | -------------------------------------------------------------------------------- /pytorch_model/python/test.methname.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d21e1d5504a448e313322eb37c481b9ffe0d585011ab6d4aa06d7b0682ea0e5d 3 | size 3585108 4 | -------------------------------------------------------------------------------- /pytorch_model/python/test.tokens.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f9c8cf49f61ec5b549aed004962ada3399b2564a4bd159f65250afc4c5213a9b 3 | size 39434908 4 | -------------------------------------------------------------------------------- /pytorch_model/python/train.apiseq.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8eab78f2253a13ba95eea301d8e22448996c889bd232a17754e36cb4c00e7a69 3 | size 220083788 4 | -------------------------------------------------------------------------------- /pytorch_model/python/train.desc.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8f32233953f3e720638692f510c1d32f0a7a81270427713addaba39ee95a7757 3 | size 73361348 4 | -------------------------------------------------------------------------------- /pytorch_model/python/train.methname.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:e08163a86474d697ce6b6b00d9afc484c4dbb374d6c6276648716b6e54c81f04 3 | size 24453868 4 | -------------------------------------------------------------------------------- /pytorch_model/python/train.tokens.npy: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:67ac9dc30fbc09e2ff285f1e271a2f425a695274bdb86e668e02c1623d85805a 3 | size 268991268 4 | -------------------------------------------------------------------------------- /pytorch_model/python/vocab.apiseq.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:cfd1a06ffef46b1c97ad104eeb33581ba699815d8a840ff320eb1bd603134c7c 3 | size 187631 4 | -------------------------------------------------------------------------------- /pytorch_model/python/vocab.desc.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:a415c720fe7f100dfb662e39d91961d123ff5aa4468c17cbd94c1a1f53b0f6e9 3 | size 195427 4 | -------------------------------------------------------------------------------- /pytorch_model/python/vocab.methname.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:49622af8642d024b5a610679e7a48b96c1c542d6887d598ebd8df167040e124d 3 | size 192772 4 | -------------------------------------------------------------------------------- /pytorch_model/python/vocab.tokens.pkl: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:db21920d44053464edc8e847fa3ee6b79d8b0a03d7109f30a24d3d3a17d540b2 3 | size 190334 4 | -------------------------------------------------------------------------------- /pytorch_model/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==0.4.0 2 | torchvision==0.2.1 3 | tables 4 | numpy 5 | scipy 6 | tqdm 7 | tables 8 | tensorboardx 9 | tensorboard -------------------------------------------------------------------------------- /pytorch_model/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | import numpy as np 5 | import torch 6 | 7 | 8 | def cos_np(data1, data2): 9 | """numpy implementation of cosine similarity for matrix""" 10 | dotted = np.dot(data1, np.transpose(data2)) 11 | norm1 = np.linalg.norm(data1, axis=1) 12 | norm2 = np.linalg.norm(data2, axis=1) 13 | matrix_vector_norms = np.multiply(norm1, norm2) 14 | neighbors = np.divide(dotted, matrix_vector_norms) 15 | return neighbors 16 | 17 | 18 | def normalize(data): 19 | """normalize matrix by rows""" 20 | normalized_data = data / np.linalg.norm(data, axis=1).reshape((data.shape[0], 1)) 21 | return normalized_data 22 | 23 | 24 | def dot_np(data1, data2): 25 | """cosine similarity for normalized vectors""" 26 | return np.dot(data1, np.transpose(data2)) 27 | 28 | 29 | ####################################################################### 30 | 31 | def asMinutes(s): 32 | m = math.floor(s / 60) 33 | s -= m * 60 34 | return '%d:%d' % (m, s) 35 | 36 | 37 | def timeSince(since, percent): 38 | now = time.time() 39 | s = now - since 40 | es = s / (percent) 41 | rs = es - s 42 | return '%s<%s' % (asMinutes(s), asMinutes(rs)) 43 | 44 | 45 | ####################################################################### 46 | 47 | def sent2indexes(sentence, vocab): 48 | '''sentence: a string 49 | return: a numpy array of word indices 50 | ''' 51 | return np.array([vocab[word] for word in sentence.strip().split(' ')]) 52 | 53 | 54 | ######################################################################## 55 | 56 | use_cuda = torch.cuda.is_available() 57 | 58 | 59 | def gVar(data): 60 | tensor = data 61 | if isinstance(data, np.ndarray): 62 | tensor = torch.from_numpy(data) 63 | if use_cuda: 64 | tensor = tensor.cuda() 65 | return tensor 66 | -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chnsh/deep-semantic-code-search/57cf12b90b5ec3a49bd6c04cf2b68888162558b3/screenshot.png --------------------------------------------------------------------------------