├── .gitignore ├── Data └── SAMPLE_500.smi ├── Datasets ├── data_splits │ ├── BBBP │ │ ├── test.pkl │ │ └── train.pkl │ ├── ClinTox │ │ ├── test.pkl │ │ └── train.pkl │ ├── ESOL │ │ ├── test.pkl │ │ └── train.pkl │ ├── FreeSolv │ │ ├── test.pkl │ │ └── train.pkl │ ├── HIV │ │ ├── test.pkl │ │ └── train.pkl │ └── Lipophilicity │ │ ├── test.pkl │ │ └── train.pkl ├── download_datasets.py └── extract_smiles.py ├── Featurizers ├── BBBP_featurizer.py ├── Clintox_featurizer.py ├── HIV_featurizer.py ├── rdkit_featurizer.py └── tox21_featurizer.py ├── LICENSE ├── Model ├── config.json ├── download_models.py └── model.py ├── PCA-Kmeans.ipynb ├── Predictors ├── pred_ESOL.py ├── pred_FreeSolv.py ├── pred_Lipophilicity.py ├── pred_siamese_BBBP.py ├── pred_siamese_Clintox.py └── pred_siamese_HIV.py ├── README.md ├── Tokenizer ├── MFBERT_Tokenizer.py └── Model │ ├── dict.txt │ ├── sentencepiece.unigram.model │ ├── special_tokens_map.json │ └── tokenizer_config.json ├── calculate_bulk_tanimoto_similarity.py ├── classification_heads.ipynb ├── fine_tune_BBBP.py ├── fine_tune_ESOL.py ├── fine_tune_HIV.py ├── fine_tune_Lipophilicity.py ├── fine_tune_clintox.py ├── fine_tune_freesolv.py ├── fine_tune_siamese-BBBP.py ├── fine_tune_siamese-ClinTox.py ├── fine_tune_siamese-HIV.py ├── fine_tune_tox21.py ├── main.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/.gitignore -------------------------------------------------------------------------------- /Data/SAMPLE_500.smi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Data/SAMPLE_500.smi -------------------------------------------------------------------------------- /Datasets/data_splits/BBBP/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/BBBP/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/BBBP/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/BBBP/train.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/ClinTox/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/ClinTox/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/ClinTox/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/ClinTox/train.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/ESOL/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/ESOL/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/ESOL/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/ESOL/train.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/FreeSolv/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/FreeSolv/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/FreeSolv/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/FreeSolv/train.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/HIV/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/HIV/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/HIV/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/HIV/train.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/Lipophilicity/test.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/Lipophilicity/test.pkl -------------------------------------------------------------------------------- /Datasets/data_splits/Lipophilicity/train.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/data_splits/Lipophilicity/train.pkl -------------------------------------------------------------------------------- /Datasets/download_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/download_datasets.py -------------------------------------------------------------------------------- /Datasets/extract_smiles.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Datasets/extract_smiles.py -------------------------------------------------------------------------------- /Featurizers/BBBP_featurizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Featurizers/BBBP_featurizer.py -------------------------------------------------------------------------------- /Featurizers/Clintox_featurizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Featurizers/Clintox_featurizer.py -------------------------------------------------------------------------------- /Featurizers/HIV_featurizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Featurizers/HIV_featurizer.py -------------------------------------------------------------------------------- /Featurizers/rdkit_featurizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Featurizers/rdkit_featurizer.py -------------------------------------------------------------------------------- /Featurizers/tox21_featurizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Featurizers/tox21_featurizer.py -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/LICENSE -------------------------------------------------------------------------------- /Model/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Model/config.json -------------------------------------------------------------------------------- /Model/download_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Model/download_models.py -------------------------------------------------------------------------------- /Model/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Model/model.py -------------------------------------------------------------------------------- /PCA-Kmeans.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/PCA-Kmeans.ipynb -------------------------------------------------------------------------------- /Predictors/pred_ESOL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_ESOL.py -------------------------------------------------------------------------------- /Predictors/pred_FreeSolv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_FreeSolv.py -------------------------------------------------------------------------------- /Predictors/pred_Lipophilicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_Lipophilicity.py -------------------------------------------------------------------------------- /Predictors/pred_siamese_BBBP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_siamese_BBBP.py -------------------------------------------------------------------------------- /Predictors/pred_siamese_Clintox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_siamese_Clintox.py -------------------------------------------------------------------------------- /Predictors/pred_siamese_HIV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Predictors/pred_siamese_HIV.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/README.md -------------------------------------------------------------------------------- /Tokenizer/MFBERT_Tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Tokenizer/MFBERT_Tokenizer.py -------------------------------------------------------------------------------- /Tokenizer/Model/dict.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Tokenizer/Model/dict.txt -------------------------------------------------------------------------------- /Tokenizer/Model/sentencepiece.unigram.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Tokenizer/Model/sentencepiece.unigram.model -------------------------------------------------------------------------------- /Tokenizer/Model/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Tokenizer/Model/special_tokens_map.json -------------------------------------------------------------------------------- /Tokenizer/Model/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/Tokenizer/Model/tokenizer_config.json -------------------------------------------------------------------------------- /calculate_bulk_tanimoto_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/calculate_bulk_tanimoto_similarity.py -------------------------------------------------------------------------------- /classification_heads.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/classification_heads.ipynb -------------------------------------------------------------------------------- /fine_tune_BBBP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_BBBP.py -------------------------------------------------------------------------------- /fine_tune_ESOL.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_ESOL.py -------------------------------------------------------------------------------- /fine_tune_HIV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_HIV.py -------------------------------------------------------------------------------- /fine_tune_Lipophilicity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_Lipophilicity.py -------------------------------------------------------------------------------- /fine_tune_clintox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_clintox.py -------------------------------------------------------------------------------- /fine_tune_freesolv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_freesolv.py -------------------------------------------------------------------------------- /fine_tune_siamese-BBBP.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_siamese-BBBP.py -------------------------------------------------------------------------------- /fine_tune_siamese-ClinTox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_siamese-ClinTox.py -------------------------------------------------------------------------------- /fine_tune_siamese-HIV.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_siamese-HIV.py -------------------------------------------------------------------------------- /fine_tune_tox21.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/fine_tune_tox21.py -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/main.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GouldGroup/MFBERT/HEAD/requirements.txt --------------------------------------------------------------------------------