├── README.md ├── adversarial_robustness ├── README.md └── code │ ├── common │ ├── Makefile │ ├── _ext │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ ├── custom_kernel.d │ │ ├── custom_kernel.o │ │ └── my_lib │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ │ ├── _my_lib.so │ │ │ └── ffiex.py │ ├── build.py │ ├── cmd_args.py │ ├── cmd_args.pyc │ ├── dnn.py │ ├── dnn.pyc │ ├── functions │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── custom_func.cpython-37.pyc │ │ ├── custom_func.py │ │ └── custom_func.pyc │ ├── graph_embedding.py │ ├── graph_embedding.pyc │ ├── modules │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── custom_mod.cpython-37.pyc │ │ ├── custom_mod.py │ │ └── custom_mod.pyc │ ├── src │ │ ├── custom_kernel.cu │ │ ├── custom_kernel.h │ │ ├── my_lib.c │ │ ├── my_lib.h │ │ ├── my_lib_cuda.c │ │ └── my_lib_cuda.h │ └── test.py │ ├── data_generator │ ├── data_util.py │ ├── data_util.pyc │ ├── gen_er_components.py │ └── pkl_dump.sh │ ├── graph_attack │ ├── collect_rl_results.py │ ├── dqn.py │ ├── er_trivial_attack.py │ ├── genetic_algorithm.py │ ├── grad_attack.py │ ├── nstep_replay_mem.py │ ├── nstep_replay_mem.pyc │ ├── plot_dqn.py │ ├── plot_dqn.sh │ ├── q_net.py │ ├── q_net.pyc │ ├── rl_common.py │ ├── run_dqn.sh │ ├── run_ga.sh │ ├── run_grad.sh │ └── run_trivial.sh │ └── graph_classification │ ├── er_components.py │ ├── graph_common.py │ ├── run_er_components.sh │ └── test_er_comp.sh ├── semisupervised_TU ├── README.md ├── environment.yml ├── finetuning │ ├── datasets.py │ ├── feature_expansion.py │ ├── gcn_conv.py │ ├── image_dataset.py │ ├── main.py │ ├── main_cl.py │ ├── net_cl.py │ ├── net_gae.py │ ├── net_infomax.py │ ├── train_eval.py │ ├── tu_dataset.py │ └── utils.py └── pre-training │ ├── datasets.py │ ├── feature_expansion.py │ ├── gcn_conv.py │ ├── main.py │ ├── res_gcn.py │ ├── train_eval.py │ ├── tu_dataset.py │ └── utils.py ├── simgrace.png ├── transfer_learning ├── README.md ├── bio │ ├── batch.py │ ├── dataloader.py │ ├── finetune.py │ ├── finetune.sh │ ├── finetune_tune.sh │ ├── loader.py │ ├── model.py │ ├── models_simgrace │ │ ├── simgrace_100.pth │ │ ├── simgrace_20.pth │ │ ├── simgrace_40.pth │ │ ├── simgrace_60.pth │ │ └── simgrace_80.pth │ ├── pretrain_contextpred.py │ ├── pretrain_deepgraphinfomax.py │ ├── pretrain_edgepred.py │ ├── pretrain_masking.py │ ├── pretrain_simgrace.py │ ├── pretrain_supervised.py │ ├── result_analysis.py │ ├── splitters.py │ └── util.py └── chem │ ├── batch.py │ ├── dataloader.py │ ├── finetune.py │ ├── finetune.sh │ ├── finetune_mutag_ptc.py │ ├── finetune_tune.sh │ ├── loader.py │ ├── model.py │ ├── models_simgrace │ ├── simgrace_100.pth │ ├── simgrace_20.pth │ ├── simgrace_40.pth │ ├── simgrace_60.pth │ └── simgrace_80.pth │ ├── parse_result.py │ ├── pretrain_contextpred.py │ ├── pretrain_deepgraphinfomax.py │ ├── pretrain_edgepred.py │ ├── pretrain_masking.py │ ├── pretrain_simgrace.py │ ├── pretrain_supervised.py │ ├── run.sh │ ├── splitters.py │ └── util.py └── unsupervised_TU ├── Accuracy.txt ├── __pycache__ ├── arguments.cpython-37.pyc ├── aug.cpython-37.pyc ├── evaluate_embedding.cpython-37.pyc ├── gin.cpython-37.pyc ├── losses.cpython-37.pyc └── model.cpython-37.pyc ├── arguments.py ├── aug.py ├── cortex_DIM ├── configs │ ├── convnets.py │ └── resnets.py ├── functions │ ├── __pycache__ │ │ ├── gan_losses.cpython-37.pyc │ │ └── misc.cpython-37.pyc │ ├── dim_losses.py │ ├── gan_losses.py │ └── misc.py └── nn_modules │ ├── __pycache__ │ ├── mi_networks.cpython-37.pyc │ └── misc.cpython-37.pyc │ ├── convnet.py │ ├── encoder.py │ ├── mi_networks.py │ ├── misc.py │ └── resnet.py ├── data └── NCI1 │ └── NCI1 │ ├── processed │ ├── data.pt │ ├── pre_filter.pt │ └── pre_transform.pt │ └── raw │ ├── NCI1_A.txt │ ├── NCI1_graph_indicator.txt │ ├── NCI1_graph_labels.txt │ ├── NCI1_node_labels.txt │ └── README.txt ├── deepinfomax.py ├── evaluate_embedding.py ├── gin.py ├── go.sh ├── losses.py ├── model.py ├── readme.md ├── simgrace.py └── test.py /README.md: -------------------------------------------------------------------------------- 1 | # SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation (WWW 2022) 2 | PyTorch implementation for [SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation](https://arxiv.org/pdf/2202.03104.pdf) accepted by The Web Conference 2022 (WWW 2022). 3 | ## Overview 4 | In this repository, we provide the codes of SimGRACE to evaluate its performances in terms of generalizability (unsupervised & semi-supervised learning), transferability (transfer learning) and robustness (adversarial robustness). 5 | ![](./simgrace.png) 6 | ## Dataset download 7 | * Semi-supervised learning & Unsupervised representation learning [TU Datasets](https://chrsmrrs.github.io/datasets/docs/datasets/) (social and biochemical graphs) 8 | * Transfer learning [chem data](http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip) (2.5GB);[bio data](http://snap.stanford.edu/gnn-pretrain/data/bio_dataset.zip) (2GB) 9 | * Adversarial robustness [synthetic data](https://www.dropbox.com/sh/mu8odkd36x54rl3/AABg8ABiMqwcMEM5qKIY97nla?dl=0) 10 | 11 | ## Citation 12 | ``` 13 | @inproceedings{10.1145/3485447.3512156, 14 | author = {Xia, Jun and Wu, Lirong and Chen, Jintao and Hu, Bozhen and Li, Stan Z.}, 15 | title = {SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation}, 16 | year = {2022}, 17 | isbn = {9781450390965}, 18 | publisher = {Association for Computing Machinery}, 19 | address = {New York, NY, USA}, 20 | url = {https://doi.org/10.1145/3485447.3512156}, 21 | doi = {10.1145/3485447.3512156}, 22 | booktitle = {Proceedings of the ACM Web Conference 2022}, 23 | pages = {1070–1079}, 24 | numpages = {10}, 25 | keywords = {graph representation learning, contrastive learning, Graph neural networks, robustness, graph self-supervised learning}, 26 | location = {Virtual Event, Lyon, France}, 27 | series = {WWW '22} 28 | } 29 | ``` 30 | ## Useful resources for Pretrained Graphs Models (PGMs) 31 | * The first comprehensive survey for PGMs: [A Survey of Pretraining on Graphs: Taxonomy, Methods, and Applications](https://arxiv.org/abs/2202.07893v1) 32 | * [A curated list of must-read papers, open-source pretrained models and pretraining datasets.](https://github.com/junxia97/awesome-pretrain-on-graphs) 33 | 34 | ## Reference 35 | 1. [Graph Contrastive Learning Automated (ICML 2021)](https://github.com/Shen-Lab/GraphCL_Automated) 36 | 2. [Graph Contrastive Learning with Augmentations (NeurIPS 2020)](https://github.com/Shen-Lab/GraphCL) 37 | 3. [Strategies for Pre-training Graph Neural Networks (ICLR 2020)](https://github.com/snap-stanford/pretrain-gnns/) 38 | 4. [Adversarial Attack on Graph Structured Data (ICML 2018)](https://github.com/Hanjun-Dai/graph_adversarial_attack) 39 | -------------------------------------------------------------------------------- /adversarial_robustness/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies & Dataset 2 | 3 | Please refer to https://github.com/Hanjun-Dai/graph_adversarial_attack for environment setup and to download dataset. 4 | After the configuration, you should have three directories: ```./code/```, ```./dropbox/``` and ```./pytorch_structure2vec/```. 5 | 6 | ## Training & Evaluation 7 | ### Pre-training + finetuning: ### 8 | ``` 9 | cd ./code/graph_classification 10 | ./run_er_components.sh 15 20 0.15 2 -phase train 11 | ./run_er_components.sh 15 20 0.15 3 -phase train 12 | ./run_er_components.sh 15 20 0.15 4 -phase train 13 | 14 | ./run_er_components.sh 40 50 0.05 2 -phase train 15 | ./run_er_components.sh 40 50 0.05 3 -phase train 16 | ./run_er_components.sh 40 50 0.05 4 -phase train 17 | 18 | ./run_er_components.sh 90 100 0.02 2 -phase train 19 | ./run_er_components.sh 90 100 0.02 3 -phase train 20 | ./run_er_components.sh 90 100 0.02 4 -phase train 21 | ``` 22 | 23 | ### Adversarial attacks: ### 24 | ``` 25 | cd ./code/graph_attack 26 | ./run_trivial.sh 15 20 0.15 2 -phase train 27 | ./run_trivial.sh 15 20 0.15 3 -phase train 28 | ./run_trivial.sh 15 20 0.15 4 -phase train 29 | ./run_grad.sh 15 20 0.15 2 -phase train 30 | ./run_grad.sh 15 20 0.15 3 -phase train 31 | ./run_grad.sh 15 20 0.15 4 -phase train 32 | ./run_dqn.sh 15 20 0.15 2 -phase train 33 | ./run_dqn.sh 15 20 0.15 3 -phase train 34 | ./run_dqn.sh 15 20 0.15 4 -phase train 35 | 36 | ./run_trivial.sh 40 50 0.05 2 -phase train 37 | ./run_trivial.sh 40 50 0.05 3 -phase train 38 | ./run_trivial.sh 40 50 0.05 4 -phase train 39 | ./run_grad.sh 40 50 0.05 2 -phase train 40 | ./run_grad.sh 40 50 0.05 3 -phase train 41 | ./run_grad.sh 40 50 0.05 4 -phase train 42 | ./run_dqn.sh 40 50 0.05 2 -phase train 43 | ./run_dqn.sh 40 50 0.05 3 -phase train 44 | ./run_dqn.sh 40 50 0.05 4 -phase train 45 | 46 | ./run_trivial.sh 90 100 0.02 2 -phase train 47 | ./run_trivial.sh 90 100 0.02 3 -phase train 48 | ./run_trivial.sh 90 100 0.02 4 -phase train 49 | ./run_grad.sh 90 100 0.02 2 -phase train 50 | ./run_grad.sh 90 100 0.02 3 -phase train 51 | ./run_grad.sh 90 100 0.02 4 -phase train 52 | ./run_dqn.sh 90 100 0.02 2 -phase train 53 | ./run_dqn.sh 90 100 0.02 3 -phase train 54 | ./run_dqn.sh 90 100 0.02 4 -phase train 55 | ``` 56 | 57 | ## Acknowledgements 58 | * https://github.com/Shen-Lab/GraphCL/tree/master/adversarialRobustness_Component 59 | * https://github.com/Hanjun-Dai/graph_adversarial_attack. 60 | -------------------------------------------------------------------------------- /adversarial_robustness/code/common/Makefile: -------------------------------------------------------------------------------- 1 | dir_guard = @mkdir -p $(@D) 2 | 3 | #INTEL_ROOT := /opt/intel 4 | MKL_ROOT = $(INTEL_ROOT)/mkl 5 | TBB_ROOT = $(INTEL_ROOT)/tbb 6 | 7 | FIND := find 8 | CXX := g++ 9 | CXXFLAGS += -Wall -O3 -std=c++11 10 | LDFLAGS += -lm -lmkl_rt -ltbb 11 | 12 | CUDA_HOME := /usr/local/cuda-9.0 13 | NVCC := $(CUDA_HOME)/bin/nvcc 14 | NVCCFLAGS += --default-stream per-thread 15 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand -lcusparse 16 | 17 | CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \ 18 | -gencode arch=compute_35,code=sm_35 \ 19 | -gencode arch=compute_50,code=sm_50 \ 20 | -gencode arch=compute_50,code=compute_50 21 | 22 | build_root = _ext 23 | obj_build_root = $(build_root) 24 | 25 | include_dirs = $(CUDA_HOME)/include $(MKL_ROOT)/include $(TBB_ROOT)/include include 26 | CXXFLAGS += $(addprefix -I,$(include_dirs)) 27 | CXXFLAGS += -fPIC 28 | 29 | NVCCFLAGS += $(addprefix -I,$(include_dirs)) 30 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC' 31 | cu_files = $(shell $(FIND) src/ -name "*.cu" -printf "%P\n") 32 | cu_obj_files = $(subst .cu,.o,$(cu_files)) 33 | objs = $(addprefix $(obj_build_root)/,$(cu_obj_files)) 34 | 35 | DEPS = ${objs:.o=.d} 36 | mylib = _ext/my_lib/_my_lib.so 37 | 38 | all: $(objs) $(mylib) 39 | 40 | $(obj_build_root)/%.o: src/%.cu 41 | $(dir_guard) 42 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D) 43 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 44 | 45 | $(mylib): src/*.c src/*.h src/*.cu 46 | python build.py 47 | 48 | clean: 49 | rm -f $(obj_build_root)/*.o 50 | rm -f $(obj_build_root)/*.d 51 | rm -rf _ext 52 | rm -f functions/*.pyc 53 | rm -f modules/*.pyc 54 | -include $(DEPS) 55 | -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__init__.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/custom_kernel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/custom_kernel.o -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/my_lib/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__init__.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/_ext/my_lib/_my_lib.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/_my_lib.so -------------------------------------------------------------------------------- /adversarial_robustness/code/common/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | this_file = os.path.dirname(__file__) 6 | 7 | sources = ['src/my_lib.c'] 8 | headers = ['src/my_lib.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/my_lib_cuda.c'] 15 | headers += ['src/my_lib_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | extra_objects = ['_ext/custom_kernel.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.my_lib', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects, 31 | extra_compile_args=['-fopenmp'], 32 | extra_link_args=['-lgomp'] 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /adversarial_robustness/code/common/cmd_args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | # import cPickle as cp 3 | import pickle as cp 4 | 5 | cmd_opt = argparse.ArgumentParser(description='Argparser for molecule vae') 6 | cmd_opt.add_argument('-data_folder', type=str, default=None, help='data folder') 7 | cmd_opt.add_argument('-saved_model', type=str, default=None, help='saved model') 8 | cmd_opt.add_argument('-save_dir', type=str, default=None, help='save folder') 9 | cmd_opt.add_argument('-ctx', type=str, default='cpu', help='cpu/gpu') 10 | cmd_opt.add_argument('-phase', type=str, default='test', help='train/test') 11 | cmd_opt.add_argument('-logfile', type=str, default=None, help='log') 12 | 13 | cmd_opt.add_argument('-batch_size', type=int, default=50, help='minibatch size') 14 | cmd_opt.add_argument('-seed', type=int, default=1, help='seed') 15 | cmd_opt.add_argument('-min_n', type=int, default=0, help='min #nodes') 16 | cmd_opt.add_argument('-max_n', type=int, default=0, help='max #nodes') 17 | cmd_opt.add_argument('-min_c', type=int, default=0, help='min #components') 18 | cmd_opt.add_argument('-max_c', type=int, default=0, help='max #components') 19 | cmd_opt.add_argument('-er_p', type=float, default=0, help='parameter of er graphs') 20 | cmd_opt.add_argument('-n_graphs', type=int, default=0, help='number of graphs') 21 | cmd_opt.add_argument('-gm', default='mean_field', help='mean_field/loopy_bp/gcn') 22 | cmd_opt.add_argument('-latent_dim', type=int, default=64, help='dimension of latent layers') 23 | cmd_opt.add_argument('-out_dim', type=int, default=0, help='s2v output size') 24 | cmd_opt.add_argument('-hidden', type=int, default=32, help='dimension of classification') 25 | cmd_opt.add_argument('-max_lv', type=int, default=2, help='max rounds of message passing') 26 | 27 | cmd_opt.add_argument('-num_epochs', type=int, default=1000, help='number of epochs') 28 | cmd_opt.add_argument('-learning_rate', type=float, default=0.001, help='init learning_rate') 29 | cmd_opt.add_argument('-weight_decay', type=float, default=5e-4, help='weight_decay') 30 | cmd_opt.add_argument('-dropout', type=float, default=0.5, help='dropout rate') 31 | 32 | # for node classification 33 | cmd_opt.add_argument('-dataset', type=str, default=None, help='citeseer/cora/pubmed') 34 | cmd_opt.add_argument('-feature_dim', type=int, default=None, help='node feature dim') 35 | cmd_opt.add_argument('-num_class', type=int, default=None, help='# classes') 36 | cmd_opt.add_argument('-adj_norm', type=int, default=1, help='normalize the adj or not') 37 | 38 | # for bio graph classification 39 | cmd_opt.add_argument('-feat_dim', type=int, default=0, help='dimension of node feature') 40 | cmd_opt.add_argument('-fold', type=int, default=1, help='fold (1..10)') 41 | 42 | # for AT-SimGRACE 43 | cmd_opt.add_argument('-lr_inner', type=float, default=0.001, help='lr of inner opt') 44 | cmd_opt.add_argument('-epison', type=float, default=0.01, help='radius of perturbation ball') 45 | cmd_opt.add_argument('--clip_norm', type=int, default=50, help='Maximum norm of parameter gradient.') 46 | # for attack 47 | 48 | cmd_opt.add_argument('-idx_start', type=int, default=None, help='id of graph or node index') 49 | cmd_opt.add_argument('-num_instances', type=int, default=None, help='num of samples for attack, in genetic algorithm') 50 | cmd_opt.add_argument('-num_steps', type=int, default=100000, help='rl training steps') 51 | cmd_opt.add_argument('-targeted', type=int, default=0, help='0/1 target attack or not') 52 | cmd_opt.add_argument('-frac_meta', type=float, default=0, help='fraction for meta rl learning') 53 | cmd_opt.add_argument('-meta_test', type=int, default=0, help='for meta rl learning') 54 | cmd_opt.add_argument('-rand_att_type', type=str, default=None, help='random/exhaust') 55 | cmd_opt.add_argument('-reward_type', type=str, default=None, help='binary/nll') 56 | cmd_opt.add_argument('-base_model_dump', type=str, default=None, help='saved base model') 57 | cmd_opt.add_argument('-num_mod', type=int, default=1, help='number of modifications allowed') 58 | 59 | # for genetic algorithm 60 | cmd_opt.add_argument('-population_size', type=int, default=100, help='population size') 61 | cmd_opt.add_argument('-cross_rate', type=float, default=0.1, help='cross_rate') 62 | cmd_opt.add_argument('-mutate_rate', type=float, default=0.2, help='mutate rate') 63 | cmd_opt.add_argument('-rounds', type=int, default=10, help='rounds of evolution') 64 | 65 | # for node attack 66 | cmd_opt.add_argument('-bilin_q', type=int, default=0, help='bilinear q or not') 67 | cmd_opt.add_argument('-mlp_hidden', type=int, default=64, help='mlp hidden layer size') 68 | cmd_opt.add_argument('-n_hops', type=int, default=2, help='attack range') 69 | 70 | # for defence 71 | cmd_opt.add_argument('-del_rate', type=float, default=0, help='rate of deleting edge') 72 | 73 | cmd_args, _ = cmd_opt.parse_known_args() 74 | 75 | print(cmd_args) 76 | 77 | def build_kwargs(keys, arg_dict): 78 | st = '' 79 | for key in keys: 80 | st += '%s-%s' % (key, str(arg_dict[key])) 81 | return st 82 | 83 | def save_args(fout, args): 84 | with open(fout, 'wb') as f: 85 | cp.dump(args, f, cp.HIGHEST_PROTOCOL) 86 | -------------------------------------------------------------------------------- /adversarial_robustness/code/common/cmd_args.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/cmd_args.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/dnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/dnn.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/functions/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__init__.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/functions/custom_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from _ext import my_lib 4 | import sys 5 | 6 | class JaggedLogSoftmax(Function): 7 | def forward(self, logits, prefix_sum): 8 | self.save_for_backward(prefix_sum) 9 | 10 | assert len(prefix_sum.size()) == 1 11 | output = logits.new() 12 | if not logits.is_cuda: 13 | my_lib.jagged_log_softmax_forward(logits, prefix_sum, output) 14 | else: 15 | my_lib.jagged_log_softmax_forward_cuda(logits, prefix_sum, output) 16 | 17 | self.save_for_backward(prefix_sum, output) 18 | return output 19 | 20 | def backward(self, grad_output): 21 | prefix_sum, output = self.saved_variables 22 | grad_input = grad_output.new() 23 | if not grad_output.is_cuda: 24 | my_lib.jagged_log_softmax_backward(output.data, grad_output, prefix_sum.data, grad_input) 25 | else: 26 | my_lib.jagged_log_softmax_backward_cuda(output.data, grad_output, prefix_sum.data, grad_input) 27 | return grad_input, None 28 | 29 | class JaggedArgmax(Function): 30 | def forward(self, values, prefix_sum): 31 | assert len(prefix_sum.size()) == 1 32 | output = prefix_sum.new() 33 | if not values.is_cuda: 34 | my_lib.jagged_argmax_forward(values, prefix_sum, output) 35 | else: 36 | my_lib.jagged_argmax_forward_cuda(values, prefix_sum, output) 37 | 38 | return output 39 | 40 | def backward(self, grad_output): 41 | assert False 42 | 43 | class JaggedMax(Function): 44 | def forward(self, values, prefix_sum): 45 | assert len(prefix_sum.size()) == 1 46 | idxes = prefix_sum.new() 47 | vmax = values.new() 48 | if not values.is_cuda: 49 | my_lib.jagged_max_forward(values, prefix_sum, vmax, idxes) 50 | else: 51 | my_lib.jagged_max_forward_cuda(values, prefix_sum, vmax, idxes) 52 | 53 | return vmax, idxes 54 | 55 | def backward(self, grad_output): 56 | assert False 57 | 58 | def GraphLaplacianNorm(raw_adj): 59 | ones = torch.ones(raw_adj.size()[0], 1) 60 | if raw_adj.is_cuda: 61 | ones = ones.cuda() 62 | norm = torch.mm(raw_adj, ones) ** 0.5 63 | indices = raw_adj._indices() 64 | values = raw_adj._values() 65 | if not values.is_cuda: 66 | my_lib.graph_laplacian_norm(indices, values, norm) 67 | else: 68 | my_lib.graph_laplacian_norm_cuda(indices, values, norm) 69 | 70 | def GraphDegreeNorm(raw_adj): 71 | ones = torch.ones(raw_adj.size()[0], 1) 72 | if raw_adj.is_cuda: 73 | ones = ones.cuda() 74 | norm = torch.mm(raw_adj, ones) 75 | indices = raw_adj._indices() 76 | values = raw_adj._values() 77 | if not values.is_cuda: 78 | my_lib.graph_degree_norm(indices, values, norm) 79 | else: 80 | my_lib.graph_degree_norm_cuda(indices, values, norm) -------------------------------------------------------------------------------- /adversarial_robustness/code/common/functions/custom_func.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/custom_func.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/graph_embedding.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/graph_embedding.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/modules/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__init__.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/modules/custom_mod.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from functions.custom_func import JaggedLogSoftmax, JaggedArgmax, JaggedMax 3 | import networkx as nx 4 | import numpy as np 5 | 6 | class JaggedLogSoftmaxModule(Module): 7 | def forward(self, logits, prefix_sum): 8 | return JaggedLogSoftmax()(logits, prefix_sum) 9 | 10 | class JaggedArgmaxModule(Module): 11 | def forward(self, values, prefix_sum): 12 | return JaggedArgmax()(values, prefix_sum) 13 | 14 | class JaggedMaxModule(Module): 15 | def forward(self, values, prefix_sum): 16 | return JaggedMax()(values, prefix_sum) -------------------------------------------------------------------------------- /adversarial_robustness/code/common/modules/custom_mod.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/custom_mod.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/common/src/custom_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef JAGGED_SOFTMAX_KERNEL_H 2 | #define JAGGED_SOFTMAX_KERNEL_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void HostSoftMaxForward(cudaStream_t stream, float *input, float *output, long* ps, int bsize); 9 | 10 | void HostSoftMaxBackward(cudaStream_t stream, float *gradOutput, float *gradInput, float *output, long* ps, int bsize); 11 | 12 | void HostArgmaxForward(cudaStream_t stream, float *input, long *output, long* ps, int bsize); 13 | 14 | void HostMaxForward(cudaStream_t stream, float *input, float* vmax, long *idxes, long* ps, int bsize); 15 | 16 | void HostGLapNorm(cudaStream_t stream, long* row_indices, long* col_indices, float* p_v, float* p_norm, int nnz); 17 | 18 | void HostGDegreeNorm(cudaStream_t stream, long* row_indices, float* p_v, float* p_norm, int nnz); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /adversarial_robustness/code/common/src/my_lib.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output) 5 | { 6 | values = THFloatTensor_newContiguous(values); 7 | THLongTensor_resizeAs(output, prefix_sum); 8 | 9 | float *input_data_base = values->storage->data + values->storageOffset;; 10 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset; 11 | long *p_out = output->storage->data + output->storageOffset; 12 | long bsize = (long)prefix_sum->size[0]; 13 | long i, d; 14 | 15 | #pragma omp parallel for private(i, d) 16 | for (i = 0; i < bsize; i++) 17 | { 18 | long offset = (i == 0) ? 0 : ps[i - 1]; 19 | long n_ele = ps[i] - offset; 20 | 21 | float* input_data = input_data_base + offset; 22 | 23 | float max_input = -FLT_MAX; 24 | long max_id = -1; 25 | for (d = 0; d < n_ele; d++) 26 | if (input_data[d] > max_input) 27 | { 28 | max_input = input_data[d]; 29 | max_id = d; 30 | } 31 | assert(max_id >= 0); 32 | p_out[i] = max_id; 33 | } 34 | 35 | THFloatTensor_free(values); 36 | return 1; 37 | } 38 | 39 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes) 40 | { 41 | int64_t inputsize = prefix_sum->size[0]; 42 | 43 | values = THFloatTensor_newContiguous(values); 44 | THLongTensor_resize1d(idxes, inputsize); 45 | THFloatTensor_resize1d(vmax, inputsize); 46 | 47 | float *input_data_base = values->storage->data + values->storageOffset; 48 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset; 49 | float *p_maxv = vmax->storage->data + vmax->storageOffset; 50 | long *p_i = idxes->storage->data + idxes->storageOffset; 51 | 52 | long bsize = (long)prefix_sum->size[0]; 53 | long i, d; 54 | 55 | #pragma omp parallel for private(i, d) 56 | for (i = 0; i < bsize; i++) 57 | { 58 | long offset = (i == 0) ? 0 : ps[i - 1]; 59 | long n_ele = ps[i] - offset; 60 | 61 | float* input_data = input_data_base + offset; 62 | 63 | float max_input = -FLT_MAX; 64 | long max_id = -1; 65 | for (d = 0; d < n_ele; d++) 66 | if (input_data[d] > max_input) 67 | { 68 | max_input = input_data[d]; 69 | max_id = d; 70 | } 71 | assert(max_id >= 0); 72 | p_i[i] = max_id; 73 | p_maxv[i] = max_input; 74 | } 75 | 76 | THFloatTensor_free(values); 77 | return 1; 78 | } 79 | 80 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output) 81 | { 82 | logits = THFloatTensor_newContiguous(logits); 83 | THFloatTensor_resizeAs(output, logits); 84 | float *input_data_base = logits->storage->data + logits->storageOffset;// THTensor_(data)(logits); 85 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset; 86 | float *output_data_base = output->storage->data + output->storageOffset; 87 | uint64_t bsize = (uint64_t)prefix_sum->size[0]; 88 | uint64_t i, d; 89 | 90 | #pragma omp parallel for private(i, d) 91 | for (i = 0; i < bsize; i++) 92 | { 93 | long offset = (i == 0) ? 0 : ps[i - 1]; 94 | 95 | float* input_data = input_data_base + offset; 96 | float* output_data = output_data_base + offset; 97 | 98 | long n_ele = ps[i] - offset; 99 | float max_input = -FLT_MAX; 100 | for (d = 0; d < n_ele; d++) 101 | max_input = THMax(max_input, input_data[d]); 102 | 103 | double logsum = 0; 104 | for (d = 0; d < n_ele; d++) 105 | logsum += exp(input_data[d] - max_input); 106 | logsum = max_input + log(logsum); 107 | 108 | for (d = 0; d < n_ele; d++) 109 | output_data[d] = input_data[d] - logsum; 110 | } 111 | 112 | THFloatTensor_free(logits); 113 | return 1; 114 | } 115 | 116 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input) 117 | { 118 | grad_output = THFloatTensor_newContiguous(grad_output); 119 | output = THFloatTensor_newContiguous(output); 120 | THFloatTensor_resizeAs(grad_input, grad_output); 121 | 122 | float *output_data_base = output->storage->data + output->storageOffset; 123 | float *gradOutput_data_base = grad_output->storage->data + grad_output->storageOffset; 124 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset; 125 | float *gradInput_data_base = grad_input->storage->data + grad_input->storageOffset; 126 | 127 | uint64_t bsize = (uint64_t)prefix_sum->size[0]; 128 | uint64_t i, d; 129 | #pragma omp parallel for private(i, d) 130 | for (i = 0; i < bsize; i++) 131 | { 132 | long offset = (i == 0) ? 0 : ps[i - 1]; 133 | float *gradInput_data = gradInput_data_base + offset; 134 | float *output_data = output_data_base + offset; 135 | float *gradOutput_data = gradOutput_data_base + offset; 136 | 137 | double sum = 0; 138 | long n_ele = ps[i] - offset; 139 | for (d = 0; d < n_ele; d++) 140 | sum += gradOutput_data[d]; 141 | 142 | for (d = 0; d < n_ele; d++) 143 | gradInput_data[d] = gradOutput_data[d] - exp(output_data[d]) * sum; 144 | } 145 | 146 | THFloatTensor_free(grad_output); 147 | THFloatTensor_free(output); 148 | return 1; 149 | } 150 | 151 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm) 152 | { 153 | uint64_t nnz = (uint64_t)values->size[0]; 154 | long *row_indices = indices->storage->data + indices->storageOffset; 155 | long *col_indices = row_indices + indices->stride[0]; 156 | float *p_v = values->storage->data + values->storageOffset; 157 | float *p_norm = norm->storage->data + norm->storageOffset; 158 | 159 | uint64_t i; 160 | #pragma omp parallel for private(i) 161 | for (i = 0; i < nnz; i++) 162 | { 163 | float norm = p_norm[ row_indices[i] ] * p_norm[ col_indices[i] ]; 164 | p_v[i] /= norm; 165 | } 166 | 167 | return 1; 168 | } 169 | 170 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm) 171 | { 172 | uint64_t nnz = (uint64_t)values->size[0]; 173 | long *row_indices = indices->storage->data + indices->storageOffset; 174 | float *p_v = values->storage->data + values->storageOffset; 175 | float *p_norm = norm->storage->data + norm->storageOffset; 176 | 177 | uint64_t i; 178 | #pragma omp parallel for private(i) 179 | for (i = 0; i < nnz; i++) 180 | { 181 | float norm = p_norm[ row_indices[i] ]; 182 | p_v[i] /= norm; 183 | } 184 | 185 | return 1; 186 | } -------------------------------------------------------------------------------- /adversarial_robustness/code/common/src/my_lib.h: -------------------------------------------------------------------------------- 1 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output); 2 | 3 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input); 4 | 5 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output); 6 | 7 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes); 8 | 9 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm); 10 | 11 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm); -------------------------------------------------------------------------------- /adversarial_robustness/code/common/src/my_lib_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "custom_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output) 9 | { 10 | logits = THCudaTensor_newContiguous(state, logits); 11 | THCudaTensor_resizeAs(state, output, logits); 12 | 13 | float *input_data_base = THCudaTensor_data(state, logits); 14 | long* ps = THCudaLongTensor_data(state, prefix_sum); 15 | float *output_data_base = THCudaTensor_data(state, output); 16 | 17 | int bsize = (int)prefix_sum->size[0]; 18 | cudaStream_t stream = THCState_getCurrentStream(state); 19 | HostSoftMaxForward(stream, input_data_base, output_data_base, ps, bsize); 20 | 21 | THCudaTensor_free(state, logits); 22 | return 1; 23 | } 24 | 25 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input) 26 | { 27 | output = THCudaTensor_newContiguous(state, output); 28 | grad_output = THCudaTensor_newContiguous(state, grad_output); 29 | 30 | THCudaTensor_resizeAs(state, grad_input, grad_output); 31 | float *output_data_base = THCudaTensor_data(state, output); 32 | float *gradOutput_data_base = THCudaTensor_data(state, grad_output); 33 | long* ps = THCudaLongTensor_data(state, prefix_sum); 34 | float *gradInput_data_base = THCudaTensor_data(state, grad_input); 35 | 36 | int bsize = (int)prefix_sum->size[0]; 37 | cudaStream_t stream = THCState_getCurrentStream(state); 38 | HostSoftMaxBackward(stream, gradOutput_data_base, gradInput_data_base, output_data_base, ps, bsize); 39 | THCudaTensor_free(state, grad_output); 40 | THCudaTensor_free(state, output); 41 | return 1; 42 | } 43 | 44 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output) 45 | { 46 | values = THCudaTensor_newContiguous(state, values); 47 | THCudaLongTensor_resizeAs(state, output, prefix_sum); 48 | 49 | float *input_data_base = THCudaTensor_data(state, values); 50 | long* ps = THCudaLongTensor_data(state, prefix_sum); 51 | long *output_data_base = THCudaLongTensor_data(state, output); 52 | 53 | int bsize = (int)prefix_sum->size[0]; 54 | cudaStream_t stream = THCState_getCurrentStream(state); 55 | HostArgmaxForward(stream, input_data_base, output_data_base, ps, bsize); 56 | 57 | THCudaTensor_free(state, values); 58 | return 1; 59 | } 60 | 61 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes) 62 | { 63 | int64_t inputsize = prefix_sum->size[0]; 64 | values = THCudaTensor_newContiguous(state, values); 65 | THCudaLongTensor_resize1d(state, idxes, inputsize); 66 | THCudaTensor_resize1d(state, vmax, inputsize); 67 | 68 | float *input_data_base = THCudaTensor_data(state, values); 69 | long* ps = THCudaLongTensor_data(state, prefix_sum); 70 | long *p_i = THCudaLongTensor_data(state, idxes); 71 | float *p_maxv = THCudaTensor_data(state, vmax); 72 | 73 | int bsize = (int)prefix_sum->size[0]; 74 | cudaStream_t stream = THCState_getCurrentStream(state); 75 | HostMaxForward(stream, input_data_base, p_maxv, p_i, ps, bsize); 76 | 77 | THCudaTensor_free(state, values); 78 | return 1; 79 | } 80 | 81 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm) 82 | { 83 | uint64_t nnz = (uint64_t)values->size[0]; 84 | long *row_indices = THCudaLongTensor_data(state, indices); 85 | long *col_indices = row_indices + THCudaLongTensor_stride(state, indices, 0); 86 | float *p_v = THCudaTensor_data(state, values); 87 | float *p_norm = THCudaTensor_data(state, norm); 88 | 89 | cudaStream_t stream = THCState_getCurrentStream(state); 90 | HostGLapNorm(stream, row_indices, col_indices, p_v, p_norm, nnz); 91 | return 1; 92 | } 93 | 94 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm) 95 | { 96 | uint64_t nnz = (uint64_t)values->size[0]; 97 | long *row_indices = THCudaLongTensor_data(state, indices); 98 | float *p_v = THCudaTensor_data(state, values); 99 | float *p_norm = THCudaTensor_data(state, norm); 100 | 101 | cudaStream_t stream = THCState_getCurrentStream(state); 102 | HostGDegreeNorm(stream, row_indices, p_v, p_norm, nnz); 103 | return 1; 104 | } -------------------------------------------------------------------------------- /adversarial_robustness/code/common/src/my_lib_cuda.h: -------------------------------------------------------------------------------- 1 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output); 2 | 3 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input); 4 | 5 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output); 6 | 7 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes); 8 | 9 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm); 10 | 11 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm); -------------------------------------------------------------------------------- /adversarial_robustness/code/common/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | import numpy as np 7 | from modules.custom_mod import JaggedLogSoftmaxModule, JaggedArgmaxModule, JaggedMaxModule 8 | import sys 9 | 10 | def cpu_test(): 11 | mod = JaggedLogSoftmaxModule() 12 | for i in range(10): 13 | a = torch.rand(10000, 10) 14 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])) 15 | c = mod(Variable(a), Variable(b)) 16 | c2 = F.log_softmax(Variable(a), dim=1) 17 | print(torch.sum(torch.abs(c - c2))) 18 | 19 | a = torch.rand(100, 30) 20 | b = torch.from_numpy(np.array([ (i + 1) * 30 for i in range(100)])) 21 | va = Variable(a, requires_grad=True) 22 | vb = Variable(b) 23 | c = mod(va, vb) 24 | t = F.torch.mean(c) 25 | t.backward() 26 | b1 = va.grad 27 | 28 | va = Variable(a, requires_grad=True) 29 | c = F.log_softmax(va, dim=1) 30 | t = F.torch.mean(c) 31 | t.backward() 32 | b2 = va.grad 33 | 34 | print(torch.sum(torch.abs(b1 - b2))) 35 | 36 | def gpu_test(): 37 | mod = JaggedLogSoftmaxModule() 38 | for i in range(10): 39 | a = torch.rand(10000, 10).cuda() 40 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda() 41 | c1 = mod(Variable(a), Variable(b)) 42 | c2 = F.log_softmax(Variable(a), dim=1) 43 | c3 = F.log_softmax(Variable(a.cpu()), dim=1).cuda() 44 | print(torch.sum(torch.abs(c3 - c2)).data[0], torch.sum(torch.abs(c3 - c1)).data[0], torch.sum(torch.abs(c2 - c1)).data[0]) 45 | 46 | a = torch.rand(1000, 100).cuda() 47 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda() 48 | va = Variable(a, requires_grad=True) 49 | vb = Variable(b) 50 | c = mod(va, vb) 51 | t = F.torch.sum(c) 52 | t.backward() 53 | b1 = va.grad 54 | 55 | va = Variable(a, requires_grad=True) 56 | c = F.log_softmax(va, dim=1) 57 | t = F.torch.sum(c) 58 | t.backward() 59 | b2 = va.grad 60 | 61 | va = Variable(a.cpu(), requires_grad=True) 62 | c = F.log_softmax(va, dim=1) 63 | t = F.torch.sum(c) 64 | t.backward() 65 | b3 = va.grad.cuda() 66 | print(torch.sum(torch.abs(b3 - b2)).data[0], torch.sum(torch.abs(b3 - b1)).data[0], torch.sum(torch.abs(b2 - b1)).data[0]) 67 | 68 | def argmax(): 69 | torch.manual_seed(1) 70 | mod = JaggedArgmaxModule() 71 | 72 | a = torch.rand(10, 4).cuda() 73 | print(a) 74 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda() 75 | c = mod(Variable(a), Variable(b)) 76 | print(c) 77 | 78 | a = torch.randn(10).cuda() 79 | print(a) 80 | b = torch.LongTensor([2, 5, 9, 10]).cuda() 81 | c = mod(Variable(a), Variable(b)) 82 | print(c) 83 | 84 | torch.manual_seed(1) 85 | mod = JaggedMaxModule() 86 | 87 | a = torch.rand(10, 4).cuda() 88 | print(a) 89 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda() 90 | c1, c2 = mod(Variable(a), Variable(b)) 91 | print(c1) 92 | print(c2) 93 | 94 | a = torch.randn(10).cuda() 95 | print(a) 96 | b = torch.LongTensor([2, 5, 9, 10]).cuda() 97 | c = mod(Variable(a), Variable(b)) 98 | print(c[0], c[1]) -------------------------------------------------------------------------------- /adversarial_robustness/code/data_generator/data_util.py: -------------------------------------------------------------------------------- 1 | # import cPickle as cp 2 | import pickle as cp 3 | import networkx as nx 4 | 5 | def load_pkl(fname, num_graph): 6 | g_list = [] 7 | with open(fname, 'rb') as f: 8 | for i in range(num_graph): 9 | g = cp.load(f) 10 | g_list.append(g) 11 | return g_list 12 | 13 | def g2txt(g, label, fid): 14 | fid.write('%d %d\n' % (len(g), label)) 15 | for i in range(len(g)): 16 | fid.write('%d' % len(g.neighbors(i))) 17 | for j in g.neighbors(i): 18 | fid.write(' %d' % j) 19 | fid.write('\n') -------------------------------------------------------------------------------- /adversarial_robustness/code/data_generator/data_util.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/data_generator/data_util.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/data_generator/gen_er_components.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | # import cPickle as cp 4 | import pickle as cp 5 | import random 6 | import numpy as np 7 | import networkx as nx 8 | import time 9 | from tqdm import tqdm 10 | 11 | 12 | def get_component(): 13 | cur_n = np.random.randint(max_n - min_n + 1) + min_n 14 | g = nx.erdos_renyi_graph(n = cur_n, p = p) 15 | 16 | comps = [c for c in nx.connected_component_subgraphs(g)] 17 | random.shuffle(comps) 18 | for i in range(1, len(comps)): 19 | x = random.choice(comps[i - 1].nodes()) 20 | y = random.choice(comps[i].nodes()) 21 | g.add_edge(x, y) 22 | assert nx.is_connected(g) 23 | return g 24 | 25 | if __name__ == '__main__': 26 | save_dir = None 27 | max_n = None 28 | min_n = None 29 | num_graph = None 30 | p = None 31 | n_comp = None 32 | for i in range(1, len(sys.argv), 2): 33 | if sys.argv[i] == '-save_dir': 34 | save_dir = sys.argv[i + 1] 35 | if sys.argv[i] == '-max_n': 36 | max_n = int(sys.argv[i + 1]) 37 | if sys.argv[i] == '-min_n': 38 | min_n = int(sys.argv[i + 1]) 39 | if sys.argv[i] == '-num_graph': 40 | num_graph = int(sys.argv[i + 1]) 41 | if sys.argv[i] == '-p': 42 | p = float(sys.argv[i + 1]) 43 | if sys.argv[i] == '-n_comp': 44 | n_comp = int(sys.argv[i + 1]) 45 | 46 | assert save_dir is not None 47 | assert max_n is not None 48 | assert min_n is not None 49 | assert num_graph is not None 50 | assert p is not None 51 | assert n_comp is not None 52 | 53 | fout_name = '%s/ncomp-%d-nrange-%d-%d-n_graph-%d-p-%.2f.pkl' % (save_dir, n_comp, min_n, max_n, num_graph, p) 54 | print('Final Output: ' + fout_name) 55 | print("Generating graphs...") 56 | min_n = min_n // n_comp 57 | max_n = max_n // n_comp 58 | 59 | for i in tqdm(range(num_graph)): 60 | 61 | for j in range(n_comp): 62 | g = get_component() 63 | 64 | if j == 0: 65 | g_all = g 66 | else: 67 | g_all = nx.disjoint_union(g_all, g) 68 | assert nx.number_connected_components(g_all) == n_comp 69 | 70 | with open(fout_name, 'ab') as fout: 71 | cp.dump(g_all, fout, cp.HIGHEST_PROTOCOL) 72 | -------------------------------------------------------------------------------- /adversarial_robustness/code/data_generator/pkl_dump.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | min_n=90 4 | max_n=100 5 | p=0.02 6 | output_root=../../dropbox/data/components 7 | 8 | if [ ! -e $output_root ]; 9 | then 10 | mkdir -p $output_root 11 | fi 12 | 13 | for t_c in 1 2 3 4 5; do 14 | 15 | n_comp=$t_c 16 | 17 | python gen_er_components.py \ 18 | -save_dir $output_root \ 19 | -max_n $max_n \ 20 | -min_n $min_n \ 21 | -num_graph 5000 \ 22 | -p $p \ 23 | -n_comp $n_comp 24 | 25 | done 26 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/collect_rl_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | if __name__ == '__main__': 4 | result_root = '../../dropbox/scratch/results/graph_classification/components' 5 | targets = os.listdir(result_root) 6 | targets = sorted(targets) 7 | for fname in targets: 8 | if fname[0] == '.': 9 | continue 10 | configs = os.listdir(result_root + '/' + fname) 11 | best_num = 100 12 | best_config = None 13 | 14 | for config in configs: 15 | if config[0] == '.' or 'epoch-best' in config: 16 | continue 17 | if '0.1' in config: 18 | continue 19 | result = result_root + '/' + fname + '/' + config + '/epoch-best.txt' 20 | with open(result, 'r') as f: 21 | num = float(f.readline().strip()) 22 | if num < best_num: 23 | best_config = config 24 | best_num = num 25 | print fname, best_config, best_num 26 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/er_trivial_attack.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import torch 7 | import networkx as nx 8 | import random 9 | from torch.autograd import Variable 10 | from torch.nn.parameter import Parameter 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from tqdm import tqdm 15 | from copy import deepcopy 16 | 17 | from q_net import NStepQNet, QNet, greedy_actions 18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 19 | from cmd_args import cmd_args 20 | from graph_embedding import S2VGraph 21 | 22 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model 23 | 24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__))) 25 | from graph_common import loop_dataset, load_er_data 26 | 27 | def propose_attack(model, s2v_g, num_added=1): 28 | g = s2v_g.to_networkx() 29 | comps = [c for c in nx.connected_component_subgraphs(g)] 30 | set_id = {} 31 | 32 | for i in range(len(comps)): 33 | for j in comps[i].nodes(): 34 | set_id[j] = i 35 | 36 | cand = [] 37 | for i in range(len(g) - 1): 38 | for j in range(i + 1, len(g)): 39 | if set_id[i] != set_id[j] or i == j: 40 | continue 41 | cand.append('%d %d' % (i, j)) 42 | 43 | if cmd_args.rand_att_type == 'random': 44 | added = np.random.choice(cand, num_added) 45 | added = [(int(w.split()[0]), int(w.split()[1])) for w in added] 46 | g.add_edges_from(added) 47 | return S2VGraph(g, s2v_g.label) 48 | elif cmd_args.rand_att_type == 'exhaust': 49 | g_list = [] 50 | for c in cand: 51 | x, y = [int(w) for w in c.split()] 52 | g2 = g.copy() 53 | g2.add_edge(x, y) 54 | g_list.append(S2VGraph(g2, s2v_g.label)) 55 | _, _, acc = model(g_list) 56 | ans = g_list[0] 57 | for i in range(len(g_list)): 58 | if acc.numpy()[i] < 1: 59 | ans = g_list[i] 60 | break 61 | return ans 62 | else: 63 | raise NotImplementedError 64 | 65 | if __name__ == '__main__': 66 | random.seed(cmd_args.seed) 67 | np.random.seed(cmd_args.seed) 68 | torch.manual_seed(cmd_args.seed) 69 | 70 | label_map, train_glist, test_glist = load_er_data() 71 | 72 | base_classifier = load_base_model(label_map, test_glist) 73 | 74 | new_test_list = [] 75 | for g in tqdm(test_glist): 76 | new_test_list.append(propose_attack(base_classifier, g)) 77 | 78 | test_graphs(base_classifier, new_test_list) -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/grad_attack.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import numpy as np 5 | import torch 6 | import networkx as nx 7 | import random 8 | from torch.autograd import Variable 9 | from torch.nn.parameter import Parameter 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from tqdm import tqdm 14 | from copy import deepcopy 15 | 16 | from q_net import NStepQNet, QNet, greedy_actions 17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 18 | from cmd_args import cmd_args 19 | from graph_embedding import S2VGraph 20 | 21 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model 22 | 23 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__))) 24 | from graph_common import loop_dataset, load_er_data 25 | 26 | def propose_attack(model, s2v_g, num_added=1): 27 | g = s2v_g.to_networkx() 28 | comps = [c for c in nx.connected_component_subgraphs(g)] 29 | set_id = {} 30 | for i in range(len(comps)): 31 | for j in comps[i].nodes(): 32 | set_id[j] = i 33 | 34 | node_feat, edge_feat, labels = model.PrepareFeatureLabel([s2v_g]) 35 | if cmd_args.ctx == 'gpu': 36 | node_feat = node_feat.cuda() 37 | labels = labels.cuda() 38 | 39 | cand_list = [s2v_g] 40 | for l in range( len(model.label_map) ): 41 | print('66666666') 42 | if l == s2v_g.label: 43 | continue 44 | labels[0] = l 45 | model.zero_grad() 46 | (_, embed), sp_dict = model.s2v([s2v_g], node_feat, edge_feat, pool_global=True, n2n_grad=True) 47 | print('77777777') 48 | _, loss, _ = model.mlp(embed, labels) 49 | print(loss) 50 | loss.backward() 51 | grad = sp_dict['n2n'].grad.data.numpy().flatten() 52 | idxes = np.argsort(grad) 53 | added = [] 54 | 55 | for p in idxes: 56 | x = p // s2v_g.num_nodes 57 | y = p % s2v_g.num_nodes 58 | if set_id[x] != set_id[y] or x == y or grad[p] > 0: 59 | continue 60 | added.append((x, y)) 61 | if len(added) >= num_added: 62 | break 63 | if len(added) == 0: 64 | continue 65 | g2 = g.copy() 66 | g2.add_edges_from(added) 67 | 68 | cand_list.append( S2VGraph(g2, s2v_g.label) ) 69 | 70 | _, _, acc = model(cand_list) 71 | acc = acc.double().cpu().numpy() 72 | for i in range(len(cand_list)): 73 | if acc[i] < 1.0: 74 | return cand_list[i] 75 | return cand_list[0] 76 | 77 | if __name__ == '__main__': 78 | random.seed(cmd_args.seed) 79 | np.random.seed(cmd_args.seed) 80 | torch.manual_seed(cmd_args.seed) 81 | 82 | label_map, train_glist, test_glist = load_er_data() 83 | 84 | base_classifier = load_base_model(label_map, test_glist) 85 | 86 | new_test_list = [] 87 | for g in tqdm(test_glist): 88 | new_test_list.append(propose_attack(base_classifier, g)) 89 | 90 | test_graphs(base_classifier, new_test_list) -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/nstep_replay_mem.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | class NstepReplaySubMemCell(object): 5 | def __init__(self, memory_size): 6 | self.memory_size = memory_size 7 | 8 | self.actions = [None] * self.memory_size 9 | self.rewards = [None] * self.memory_size 10 | self.states = [None] * self.memory_size 11 | self.s_primes = [None] * self.memory_size 12 | self.terminals = [None] * self.memory_size 13 | 14 | self.count = 0 15 | self.current = 0 16 | 17 | def add(self, s_t, a_t, r_t, s_prime, terminal): 18 | self.actions[self.current] = a_t 19 | self.rewards[self.current] = r_t 20 | self.states[self.current] = s_t 21 | self.s_primes[self.current] = s_prime 22 | self.terminals[self.current] = terminal 23 | 24 | self.count = max(self.count, self.current + 1) 25 | self.current = (self.current + 1) % self.memory_size 26 | 27 | def add_list(self, list_st, list_at, list_rt, list_sp, list_term): 28 | for i in range(len(list_st)): 29 | if list_sp is None: 30 | sp = (None, None, None) 31 | else: 32 | sp = list_sp[i] 33 | self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i]) 34 | 35 | def sample(self, batch_size): 36 | assert self.count >= batch_size 37 | 38 | list_st = [] 39 | list_at = [] 40 | list_rt = [] 41 | list_s_primes = [] 42 | list_term = [] 43 | 44 | for i in range(batch_size): 45 | idx = random.randint(0, self.count - 1) 46 | list_st.append(self.states[idx]) 47 | list_at.append(self.actions[idx]) 48 | list_rt.append(float(self.rewards[idx])) 49 | list_s_primes.append(self.s_primes[idx]) 50 | list_term.append(self.terminals[idx]) 51 | 52 | return list_st, list_at, list_rt, list_s_primes, list_term 53 | 54 | def hash_state_action(s_t, a_t): 55 | key = s_t[0] 56 | base = 179424673 57 | for e in s_t[1].directed_edges: 58 | key = (key * base + e[0]) % base 59 | key = (key * base + e[1]) % base 60 | if s_t[2] is not None: 61 | key = (key * base + s_t[2]) % base 62 | else: 63 | key = (key * base) % base 64 | 65 | key = (key * base + a_t) % base 66 | return key 67 | 68 | class NstepReplayMemCell(object): 69 | def __init__(self, memory_size, balance_sample = False): 70 | self.sub_list = [] 71 | self.balance_sample = balance_sample 72 | self.sub_list.append(NstepReplaySubMemCell(memory_size)) 73 | if balance_sample: 74 | self.sub_list.append(NstepReplaySubMemCell(memory_size)) 75 | self.state_set = set() 76 | 77 | def add(self, s_t, a_t, r_t, s_prime, terminal): 78 | if not self.balance_sample or r_t < 0: 79 | self.sub_list[0].add(s_t, a_t, r_t, s_prime, terminal) 80 | else: 81 | assert r_t > 0 82 | key = hash_state_action(s_t, a_t) 83 | if key in self.state_set: 84 | return 85 | self.state_set.add(key) 86 | self.sub_list[1].add(s_t, a_t, r_t, s_prime, terminal) 87 | 88 | def sample(self, batch_size): 89 | if not self.balance_sample or self.sub_list[1].count < batch_size: 90 | return self.sub_list[0].sample(batch_size) 91 | 92 | list_st, list_at, list_rt, list_s_primes, list_term = self.sub_list[0].sample(batch_size // 2) 93 | list_st2, list_at2, list_rt2, list_s_primes2, list_term2 = self.sub_list[1].sample(batch_size - batch_size // 2) 94 | 95 | return list_st + list_st2, list_at + list_at2, list_rt + list_rt2, list_s_primes + list_s_primes2, list_term + list_term2 96 | 97 | class NstepReplayMem(object): 98 | def __init__(self, memory_size, n_steps, balance_sample = False): 99 | self.mem_cells = [] 100 | for i in range(n_steps - 1): 101 | self.mem_cells.append(NstepReplayMemCell(memory_size, False)) 102 | self.mem_cells.append(NstepReplayMemCell(memory_size, balance_sample)) 103 | 104 | self.n_steps = n_steps 105 | self.memory_size = memory_size 106 | 107 | def add(self, s_t, a_t, r_t, s_prime, terminal, t): 108 | assert t >= 0 and t < self.n_steps 109 | if t == self.n_steps - 1: 110 | assert terminal 111 | else: 112 | assert not terminal 113 | self.mem_cells[t].add(s_t, a_t, r_t, s_prime, terminal) 114 | 115 | def add_list(self, list_st, list_at, list_rt, list_sp, list_term, t): 116 | for i in range(len(list_st)): 117 | if list_sp is None: 118 | sp = (None, None, None) 119 | else: 120 | sp = list_sp[i] 121 | self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i], t) 122 | 123 | def sample(self, batch_size, t = None): 124 | if t is None: 125 | t = np.random.randint(self.n_steps) 126 | list_st, list_at, list_rt, list_s_primes, list_term = self.mem_cells[t].sample(batch_size) 127 | return t, list_st, list_at, list_rt, list_s_primes, list_term -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/plot_dqn.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import torch 7 | import networkx as nx 8 | import random 9 | from torch.autograd import Variable 10 | from torch.nn.parameter import Parameter 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from tqdm import tqdm 15 | from copy import deepcopy 16 | 17 | from q_net import NStepQNet, QNet, greedy_actions 18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 19 | from cmd_args import cmd_args 20 | 21 | from rl_common import GraphEdgeEnv, local_args, load_graphs, test_graphs, load_base_model, attackable, get_supervision 22 | from nstep_replay_mem import NstepReplayMem 23 | 24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__))) 25 | from graph_common import loop_dataset 26 | 27 | class Agent(object): 28 | def __init__(self, g_list, test_g_list, env): 29 | self.g_list = g_list 30 | if test_g_list is None: 31 | self.test_g_list = g_list 32 | else: 33 | self.test_g_list = test_g_list 34 | self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2) 35 | self.env = env 36 | # self.net = QNet() 37 | self.net = NStepQNet(2) 38 | self.old_net = NStepQNet(2) 39 | if cmd_args.ctx == 'gpu': 40 | self.net = self.net.cuda() 41 | self.old_net = self.old_net.cuda() 42 | self.eps_start = 1.0 43 | self.eps_end = 1.0 44 | self.eps_step = 10000 45 | self.burn_in = 100 46 | self.step = 0 47 | 48 | self.best_eval = None 49 | self.pos = 0 50 | self.sample_idxes = list(range(len(g_list))) 51 | random.shuffle(self.sample_idxes) 52 | self.take_snapshot() 53 | 54 | def take_snapshot(self): 55 | self.old_net.load_state_dict(self.net.state_dict()) 56 | 57 | def make_actions(self, time_t, greedy=False): 58 | self.eps = self.eps_end + max(0., (self.eps_start - self.eps_end) 59 | * (self.eps_step - max(0., self.step)) / self.eps_step) 60 | 61 | if random.random() < self.eps and not greedy: 62 | actions = self.env.uniformRandActions() 63 | else: 64 | cur_state = self.env.getStateRef() 65 | actions, _, _ = self.net(time_t, cur_state, None, greedy_acts=True) 66 | actions = list(actions.cpu().numpy()) 67 | 68 | return actions 69 | 70 | def run_simulation(self): 71 | if (self.pos + 1) * cmd_args.batch_size > len(self.sample_idxes): 72 | self.pos = 0 73 | random.shuffle(self.sample_idxes) 74 | 75 | selected_idx = self.sample_idxes[self.pos * cmd_args.batch_size : (self.pos + 1) * cmd_args.batch_size] 76 | self.pos += 1 77 | self.env.setup([self.g_list[idx] for idx in selected_idx]) 78 | 79 | t = 0 80 | while not env.isTerminal(): 81 | list_at = self.make_actions(t) 82 | list_st = self.env.cloneState() 83 | self.env.step(list_at) 84 | 85 | assert (env.rewards is not None) == env.isTerminal() 86 | if env.isTerminal(): 87 | rewards = env.rewards 88 | s_prime = None 89 | else: 90 | rewards = np.zeros(len(list_at), dtype=np.float32) 91 | s_prime = self.env.cloneState() 92 | 93 | self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t) 94 | t += 1 95 | 96 | def eval(self): 97 | self.env.setup(deepcopy(self.test_g_list)) 98 | t = 0 99 | while not self.env.isTerminal(): 100 | list_at = self.make_actions(t, greedy=True) 101 | self.env.step(list_at) 102 | t += 1 103 | test_loss = loop_dataset(env.g_list, env.classifier, list(range(len(env.g_list))), epoch=101) 104 | print('\033[93m average test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1])) 105 | with open('%s/edge_added.txt' % cmd_args.save_dir, 'w') as f: 106 | for i in range(len(self.test_g_list)): 107 | f.write('%d %d ' % (self.test_g_list[i].label, env.pred[i] + 1)) 108 | f.write('%d %d\n' % env.added_edges[i]) 109 | reward = np.mean(self.env.rewards) 110 | print(reward) 111 | return reward, test_loss[1] 112 | 113 | if __name__ == '__main__': 114 | random.seed(cmd_args.seed) 115 | np.random.seed(cmd_args.seed) 116 | torch.manual_seed(cmd_args.seed) 117 | 118 | label_map, _, g_list = load_graphs() 119 | # random.shuffle(g_list) 120 | base_classifier = load_base_model(label_map, g_list) 121 | env = GraphEdgeEnv(base_classifier, n_edges = 1) 122 | 123 | if cmd_args.frac_meta > 0: 124 | num_train = int( len(g_list) * (1 - cmd_args.frac_meta) ) 125 | agent = Agent(g_list[:num_train], g_list[num_train:], env) 126 | else: 127 | agent = Agent(g_list, None, env) 128 | 129 | assert cmd_args.phase == 'test' 130 | agent.net.load_state_dict(torch.load(cmd_args.save_dir + '/epoch-best.model')) 131 | agent.eval() 132 | # env.setup([g_list[idx] for idx in selected_idx]) 133 | # t = 0 134 | # while not env.isTerminal(): 135 | # policy_net = net_list[t] 136 | # t += 1 137 | # batch_graph, picked_nodes = env.getState() 138 | # log_probs, prefix_sum = policy_net(batch_graph, picked_nodes) 139 | # actions = env.sampleActions(torch.exp(log_probs).data.cpu().numpy(), prefix_sum.data.cpu().numpy(), greedy=True) 140 | # env.step(actions) 141 | 142 | # test_loss = loop_dataset(env.g_list, base_classifier, list(range(len(env.g_list)))) 143 | # print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1])) 144 | 145 | # print(np.mean(avg_rewards), np.mean(env.rewards)) 146 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/plot_dqn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dropbox=../../dropbox 4 | 5 | min_n=40 6 | max_n=50 7 | p=0.05 8 | min_c=1 9 | max_c=3 10 | base_lv=4 11 | data_folder=$dropbox/data/components 12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv} 13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best 14 | 15 | lr=0.001 16 | max_lv=5 17 | frac_meta=0 18 | 19 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold 20 | 21 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta} 22 | 23 | python plot_dqn.py \ 24 | -data_folder $data_folder \ 25 | -save_dir $output_root \ 26 | -max_n $max_n \ 27 | -min_n $min_n \ 28 | -max_lv $max_lv \ 29 | -frac_meta $frac_meta \ 30 | -min_c $min_c \ 31 | -max_c $max_c \ 32 | -n_graphs 5000 \ 33 | -er_p $p \ 34 | -learning_rate $lr \ 35 | -base_model_dump $base_model_dump \ 36 | -logfile $output_root/log.txt \ 37 | $@ 38 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/q_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | import numpy as np 5 | import torch 6 | import networkx as nx 7 | import random 8 | from torch.autograd import Variable 9 | from torch.nn.parameter import Parameter 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from tqdm import tqdm 14 | sys.path.append('%s/../../pytorch_structure2vec/s2v_lib' % os.path.dirname(os.path.realpath(__file__))) 15 | from pytorch_util import weights_init 16 | 17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 18 | from graph_embedding import EmbedMeanField, EmbedLoopyBP 19 | from cmd_args import cmd_args 20 | from modules.custom_mod import JaggedMaxModule 21 | from rl_common import local_args 22 | 23 | def greedy_actions(q_values, v_p, banned_list): 24 | actions = [] 25 | offset = 0 26 | banned_acts = [] 27 | prefix_sum = v_p.data.cpu().numpy() 28 | for i in range(len(prefix_sum)): 29 | n_nodes = prefix_sum[i] - offset 30 | 31 | if banned_list is not None and banned_list[i] is not None: 32 | for j in banned_list[i]: 33 | banned_acts.append(offset + j) 34 | offset = prefix_sum[i] 35 | 36 | q_values = q_values.data.clone() 37 | if len(banned_acts): 38 | q_values[banned_acts, :] = np.finfo(np.float64).min 39 | jmax = JaggedMaxModule() 40 | values, actions = jmax(Variable(q_values), v_p) 41 | 42 | return actions.data, values.data 43 | 44 | class QNet(nn.Module): 45 | def __init__(self, s2v_module = None): 46 | super(QNet, self).__init__() 47 | if cmd_args.gm == 'mean_field': 48 | model = EmbedMeanField 49 | elif cmd_args.gm == 'loopy_bp': 50 | model = EmbedLoopyBP 51 | else: 52 | print('unknown gm %s' % cmd_args.gm) 53 | sys.exit() 54 | 55 | if cmd_args.out_dim == 0: 56 | embed_dim = cmd_args.latent_dim 57 | else: 58 | embed_dim = cmd_args.out_dim 59 | if local_args.mlp_hidden: 60 | self.linear_1 = nn.Linear(embed_dim * 2, local_args.mlp_hidden) 61 | self.linear_out = nn.Linear(local_args.mlp_hidden, 1) 62 | else: 63 | self.linear_out = nn.Linear(embed_dim * 2, 1) 64 | weights_init(self) 65 | 66 | if s2v_module is None: 67 | self.s2v = model(latent_dim=cmd_args.latent_dim, 68 | output_dim=cmd_args.out_dim, 69 | num_node_feats=2, 70 | num_edge_feats=0, 71 | max_lv=cmd_args.max_lv) 72 | else: 73 | self.s2v = s2v_module 74 | 75 | def PrepareFeatures(self, batch_graph, picked_nodes): 76 | n_nodes = 0 77 | prefix_sum = [] 78 | picked_ones = [] 79 | for i in range(len(batch_graph)): 80 | if picked_nodes is not None and picked_nodes[i] is not None: 81 | assert picked_nodes[i] >= 0 and picked_nodes[i] < batch_graph[i].num_nodes 82 | picked_ones.append(n_nodes + picked_nodes[i]) 83 | n_nodes += batch_graph[i].num_nodes 84 | prefix_sum.append(n_nodes) 85 | 86 | node_feat = torch.zeros(n_nodes, 2) 87 | node_feat[:, 0] = 1.0 88 | 89 | if len(picked_ones): 90 | node_feat.numpy()[picked_ones, 1] = 1.0 91 | node_feat.numpy()[picked_ones, 0] = 0.0 92 | 93 | return node_feat, torch.LongTensor(prefix_sum) 94 | 95 | def add_offset(self, actions, v_p): 96 | prefix_sum = v_p.data.cpu().numpy() 97 | 98 | shifted = [] 99 | for i in range(len(prefix_sum)): 100 | if i > 0: 101 | offset = prefix_sum[i - 1] 102 | else: 103 | offset = 0 104 | shifted.append(actions[i] + offset) 105 | 106 | return shifted 107 | 108 | def rep_global_embed(self, graph_embed, v_p): 109 | prefix_sum = v_p.data.cpu().numpy() 110 | 111 | rep_idx = [] 112 | for i in range(len(prefix_sum)): 113 | if i == 0: 114 | n_nodes = prefix_sum[i] 115 | else: 116 | n_nodes = prefix_sum[i] - prefix_sum[i - 1] 117 | rep_idx += [i] * n_nodes 118 | 119 | rep_idx = Variable(torch.LongTensor(rep_idx)) 120 | if cmd_args.ctx == 'gpu': 121 | rep_idx = rep_idx.cuda() 122 | graph_embed = torch.index_select(graph_embed, 0, rep_idx) 123 | return graph_embed 124 | 125 | def forward(self, time_t, states, actions, greedy_acts = False): 126 | batch_graph, picked_nodes, banned_list = zip(*states) 127 | 128 | node_feat, prefix_sum = self.PrepareFeatures(batch_graph, picked_nodes) 129 | 130 | if cmd_args.ctx == 'gpu': 131 | node_feat = node_feat.cuda() 132 | prefix_sum = prefix_sum.cuda() 133 | prefix_sum = Variable(prefix_sum) 134 | 135 | embed, graph_embed = self.s2v(batch_graph, node_feat, None, pool_global=True) 136 | 137 | if actions is None: 138 | graph_embed = self.rep_global_embed(graph_embed, prefix_sum) 139 | else: 140 | shifted = self.add_offset(actions, prefix_sum) 141 | embed = embed[shifted, :] 142 | 143 | embed_s_a = torch.cat((embed, graph_embed), dim=1) 144 | 145 | if local_args.mlp_hidden: 146 | embed_s_a = F.relu( self.linear_1(embed_s_a) ) 147 | 148 | raw_pred = self.linear_out(embed_s_a) 149 | 150 | if greedy_acts: 151 | actions, _ = greedy_actions(raw_pred, prefix_sum, banned_list) 152 | 153 | return actions, raw_pred, prefix_sum 154 | 155 | class NStepQNet(nn.Module): 156 | def __init__(self, num_steps, s2v_module = None): 157 | super(NStepQNet, self).__init__() 158 | 159 | list_mod = [QNet(s2v_module)] 160 | 161 | for i in range(1, num_steps): 162 | list_mod.append(QNet(list_mod[0].s2v)) 163 | 164 | self.list_mod = nn.ModuleList(list_mod) 165 | 166 | self.num_steps = num_steps 167 | 168 | def forward(self, time_t, states, actions, greedy_acts = False): 169 | assert time_t >= 0 and time_t < self.num_steps 170 | 171 | return self.list_mod[time_t](time_t, states, actions, greedy_acts) 172 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/q_net.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/q_net.pyc -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/run_dqn.sh: -------------------------------------------------------------------------------- 1 | dropbox=../../dropbox 2 | 3 | min_n=$1 4 | max_n=$2 5 | p=$3 6 | min_c=1 7 | max_c=3 8 | base_lv=$4 9 | data_folder=$dropbox/data/components 10 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv} 11 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best 12 | 13 | lr=0.001 14 | max_lv=5 15 | frac_meta=0 16 | 17 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold 18 | 19 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta} 20 | 21 | if [ ! -e $output_root ]; 22 | then 23 | mkdir -p $output_root 24 | fi 25 | 26 | python dqn.py \ 27 | -data_folder $data_folder \ 28 | -save_dir $output_root \ 29 | -max_n $max_n \ 30 | -min_n $min_n \ 31 | -max_lv $max_lv \ 32 | -frac_meta $frac_meta \ 33 | -min_c $min_c \ 34 | -max_c $max_c \ 35 | -n_graphs 5000 \ 36 | -er_p $p \ 37 | -learning_rate $lr \ 38 | -base_model_dump $base_model_dump \ 39 | -logfile $output_root/log.txt \ 40 | $@ 41 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/run_ga.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dropbox=../../dropbox 4 | 5 | min_n=$1 6 | max_n=$2 7 | p=$3 8 | min_c=1 9 | max_c=3 10 | base_lv=$4 11 | data_folder=$dropbox/data/components 12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv} 13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best 14 | 15 | idx_start=0 16 | num=2000 17 | pop=50 18 | cross=0.1 19 | mutate=0.2 20 | rounds=10 21 | 22 | output_base=$HOME/scratch/results/graph_classification/components/$save_fold 23 | output_root=$output_base/ga-p-${pop}-c-${cross}-m-${mutate}-r-${rounds} 24 | 25 | if [ ! -e $output_root ]; 26 | then 27 | mkdir -p $output_root 28 | fi 29 | 30 | python genetic_algorithm.py \ 31 | -data_folder $data_folder \ 32 | -save_dir $output_root \ 33 | -idx_start $idx_start \ 34 | -population_size $pop \ 35 | -cross_rate $cross \ 36 | -mutate_rate $mutate \ 37 | -rounds $rounds \ 38 | -num_instances $num \ 39 | -max_n $max_n \ 40 | -min_n $min_n \ 41 | -min_c $min_c \ 42 | -max_c $max_c \ 43 | -n_graphs 5000 \ 44 | -er_p $p \ 45 | -base_model_dump $base_model_dump \ 46 | $@ 47 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/run_grad.sh: -------------------------------------------------------------------------------- 1 | min_n=$1 2 | max_n=$2 3 | p=$3 4 | dropbox=../../dropbox/ 5 | data_folder=$dropbox/data/components 6 | min_c=1 7 | max_c=3 8 | max_lv=$4 9 | rand=random 10 | 11 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv} 12 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best 13 | 14 | output_root=./saved 15 | 16 | if [ ! -e $output_root ]; 17 | then 18 | mkdir -p $output_root 19 | fi 20 | 21 | python grad_attack.py \ 22 | -data_folder $data_folder \ 23 | -save_dir $output_root \ 24 | -max_n $max_n \ 25 | -min_n $min_n \ 26 | -rand_att_type $rand \ 27 | -min_c $min_c \ 28 | -max_c $max_c \ 29 | -base_model_dump $base_model_dump \ 30 | -n_graphs 5000 \ 31 | -er_p $p \ 32 | $@ 33 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_attack/run_trivial.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | min_n=$1 4 | max_n=$2 5 | p=$3 6 | dropbox=../../dropbox/ 7 | data_folder=$dropbox/data/components 8 | min_c=1 9 | max_c=3 10 | max_lv=$4 11 | # rand=exhaust 12 | rand=random 13 | 14 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv} 15 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best 16 | 17 | output_root=./saved 18 | 19 | if [ ! -e $output_root ]; 20 | then 21 | mkdir -p $output_root 22 | fi 23 | 24 | python er_trivial_attack.py \ 25 | -data_folder $data_folder \ 26 | -save_dir $output_root \ 27 | -max_n $max_n \ 28 | -min_n $min_n \ 29 | -max_lv $max_lv \ 30 | -rand_att_type $rand \ 31 | -min_c $min_c \ 32 | -max_c $max_c \ 33 | -base_model_dump $base_model_dump \ 34 | -n_graphs 5000 \ 35 | -er_p $p \ 36 | $@ 37 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_classification/er_components.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import torch 7 | import random 8 | from torch.autograd import Variable 9 | from torch.nn.parameter import Parameter 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import torch.optim as optim 13 | from tqdm import tqdm 14 | # import cPickle as cp 15 | import pickle as cp 16 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 17 | from cmd_args import cmd_args, save_args 18 | from dnn import GraphClassifier 19 | from graph_embedding import S2VGraph 20 | 21 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__))) 22 | from data_util import load_pkl 23 | 24 | from graph_common import loop_dataset, load_er_data 25 | 26 | if __name__ == '__main__': 27 | random.seed(cmd_args.seed) 28 | np.random.seed(cmd_args.seed) 29 | torch.manual_seed(cmd_args.seed) 30 | 31 | label_map, train_glist, test_glist = load_er_data() 32 | 33 | if cmd_args.saved_model is not None and cmd_args.saved_model != '': 34 | print('loading model from %s' % cmd_args.saved_model) 35 | with open('%s-args.pkl' % cmd_args.saved_model, 'rb') as f: 36 | base_args = cp.load(f) 37 | classifier = GraphClassifier(label_map, **vars(base_args)) 38 | classifier.load_state_dict(torch.load(cmd_args.saved_model + '.model')) 39 | else: 40 | classifier = GraphClassifier(label_map, **vars(cmd_args)) 41 | 42 | if cmd_args.ctx == 'gpu': 43 | classifier = classifier.cuda() 44 | if cmd_args.phase == 'test': 45 | test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=101) 46 | print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1])) 47 | 48 | if cmd_args.phase == 'train': 49 | optimizer = optim.Adam(classifier.parameters(), lr=cmd_args.learning_rate) 50 | 51 | train_idxes = list(range(len(train_glist))) 52 | best_loss = None 53 | for epoch in range(cmd_args.num_epochs): 54 | random.shuffle(train_idxes) 55 | avg_loss = loop_dataset(train_glist, classifier, train_idxes, optimizer=optimizer, epoch=epoch) 56 | print('\033[92maverage training of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, avg_loss[0], avg_loss[1])) 57 | 58 | test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=epoch) 59 | print('\033[93maverage test of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, test_loss[0], test_loss[1])) 60 | 61 | if best_loss is None or test_loss[0] < best_loss: 62 | best_loss = test_loss[0] 63 | print('----saving to best model since this is the best valid loss so far.----') 64 | torch.save(classifier.state_dict(), cmd_args.save_dir + '/epoch-best.model') 65 | save_args(cmd_args.save_dir + '/epoch-best-args.pkl', cmd_args) -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_classification/graph_common.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import numpy as np 6 | import torch 7 | import networkx as nx 8 | import random 9 | from torch.autograd import Variable 10 | from torch.nn.parameter import Parameter 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from tqdm import tqdm 15 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__))) 16 | from cmd_args import cmd_args 17 | from graph_embedding import S2VGraph 18 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__))) 19 | from data_util import load_pkl 20 | from copy import deepcopy 21 | 22 | @torch.no_grad() 23 | def gen_adv_output(data, model, z): 24 | z = Variable(z.detach().data, requires_grad=False) 25 | model_adv = deepcopy(model) 26 | adv_optim = optim.Adam(model_adv.parameters(), lr=cmd_args.lr_inner) 27 | def closure(z): 28 | adv_optim.zero_grad() 29 | z_tmp = model_adv.forward_cl(data) 30 | loss_tmp = model_adv.loss_cl(z, z_tmp) 31 | loss_tmp.backward() 32 | torch.nn.utils.clip_grad_norm_(model_adv.parameters(), cmd_args.clip_norm) 33 | closure = torch.enable_grad()(closure) 34 | closure(z) 35 | state = dict() 36 | for i in range(2): 37 | for name, param in model_adv.named_parameters(): 38 | if name.split('.')[0] != 'mlp' and name.split('.')[0] != 'projection_head': 39 | if i == 0: 40 | state[name] = torch.zeros_like(param.grad) 41 | dev = state[name] + cmd_args.lr_inner * param.grad 42 | clip_coef = cmd_args.epison / (dev.norm() + 1e-12) 43 | dev = clip_coef * dev if clip_coef < 1 else dev 44 | param.sub_(state[name]).add_(dev) 45 | state[name] = dev 46 | closure(z) 47 | z2 = model_adv.forward_cl(data) 48 | return z2 49 | 50 | def loop_dataset(g_list, classifier, sample_idxes, optimizer=None, bsize=cmd_args.batch_size, epoch=0): 51 | total_loss = [] 52 | total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize 53 | pbar = tqdm(range(total_iters), unit='batch') 54 | 55 | n_samples = 0 56 | for pos in pbar: 57 | selected_idx = sample_idxes[pos * bsize : (pos + 1) * bsize] 58 | batch_graph = [g_list[idx] for idx in selected_idx] 59 | if epoch <= 150: 60 | x1 = classifier.forward_cl(batch_graph) 61 | x2 = gen_adv_output(batch_graph, classifier, x1) 62 | x2 = Variable(x2.detach().data, requires_grad=False) 63 | loss = classifier.loss_cl(x1, x2) 64 | acc = torch.zeros(1) 65 | else: 66 | _, loss, acc = classifier(batch_graph) 67 | acc = acc.sum().item() / float(acc.size()[0]) 68 | if optimizer is not None: 69 | optimizer.zero_grad() 70 | loss.backward() 71 | optimizer.step() 72 | loss = loss.data.cpu().numpy() 73 | pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc)) 74 | total_loss.append( np.array([loss, acc]) * len(selected_idx)) 75 | n_samples += len(selected_idx) 76 | if optimizer is None: 77 | assert n_samples == len(sample_idxes) 78 | total_loss = np.array(total_loss) 79 | avg_loss = np.sum(total_loss, 0) / n_samples 80 | return avg_loss 81 | 82 | def load_er_data(): 83 | frac_train = 0.9 84 | pattern = 'nrange-%d-%d-n_graph-%d-p-%.2f' % (cmd_args.min_n, cmd_args.max_n, cmd_args.n_graphs, cmd_args.er_p) 85 | num_train = int(frac_train * cmd_args.n_graphs) 86 | train_glist = [] 87 | test_glist = [] 88 | label_map = {} 89 | for i in range(cmd_args.min_c, cmd_args.max_c + 1): 90 | cur_list = load_pkl('%s/ncomp-%d-%s.pkl' % (cmd_args.data_folder, i, pattern), cmd_args.n_graphs) 91 | assert len(cur_list) == cmd_args.n_graphs 92 | train_glist += [S2VGraph(cur_list[j], i) for j in range(num_train)] 93 | test_glist += [S2VGraph(cur_list[j], i) for j in range(num_train, len(cur_list))] 94 | label_map[i] = i - cmd_args.min_c 95 | cmd_args.num_class = len(label_map) 96 | cmd_args.feat_dim = 1 97 | print('# train:', len(train_glist), ' # test:', len(test_glist)) 98 | 99 | return label_map, train_glist, test_glist 100 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_classification/run_er_components.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | min_n=$1 4 | max_n=$2 5 | p=$3 6 | dropbox=../../dropbox 7 | data_folder=$dropbox/data/components 8 | min_c=1 9 | max_c=3 10 | max_lv=$4 11 | 12 | 13 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv} 14 | output_root=../../dropbox/scratch/results/graph_classification/components/$save_fold 15 | 16 | if [ ! -e $output_root ]; 17 | then 18 | mkdir -p $output_root 19 | fi 20 | 21 | python er_components.py \ 22 | -data_folder $data_folder \ 23 | -save_dir $output_root \ 24 | -max_n $max_n \ 25 | -min_n $min_n \ 26 | -max_lv $max_lv \ 27 | -min_c $min_c \ 28 | -max_c $max_c \ 29 | -n_graphs 5000 \ 30 | -er_p $p \ 31 | $@ 32 | -------------------------------------------------------------------------------- /adversarial_robustness/code/graph_classification/test_er_comp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | min_n=40 4 | max_n=50 5 | p=0.05 6 | dropbox=../../dropbox/ 7 | data_folder=$dropbox/data/components 8 | min_c=1 9 | max_c=3 10 | max_lv=4 11 | 12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv} 13 | output_root=$HOME/scratch/results/graph_classification/components/$save_fold 14 | saved_model=$output_root/epoch-best 15 | 16 | if [ ! -e $output_root ]; 17 | then 18 | mkdir -p $output_root 19 | fi 20 | 21 | python er_components.py \ 22 | -data_folder $data_folder \ 23 | -save_dir $output_root \ 24 | -max_n $max_n \ 25 | -min_n $min_n \ 26 | -max_lv $max_lv \ 27 | -min_c $min_c \ 28 | -max_c $max_c \ 29 | -saved_model $saved_model \ 30 | -n_graphs 5000 \ 31 | -er_p $p \ 32 | $@ 33 | -------------------------------------------------------------------------------- /semisupervised_TU/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | 3 | You can create a conda environment named simgrace with the command: 4 | ``` 5 | conda env create -f environment.yml 6 | conda activate simgrace 7 | ``` 8 | 9 | Then, you need to create two directories for pre-trained models and finetuned results to avoid errors: 10 | 11 | ``` 12 | cd ./pre-training 13 | mkdir models 14 | mkdir logs 15 | cd .. 16 | cd ./funetuning 17 | mkdir logs 18 | cd .. 19 | ``` 20 | 21 | ## SimGRACE with Perturbations of Various Magnitudes 22 | 23 | Take NCI1 as an example: 24 | 25 | ### Pre-training: ### 26 | 27 | ``` 28 | cd ./pre-training 29 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 0 30 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 1 31 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 2 32 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 3 33 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 4 34 | ``` 35 | 36 | ### Finetuning: ### 37 | 38 | ``` 39 | cd ./funetuning 40 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 0 41 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 1 42 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 2 43 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 3 44 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 4 45 | ``` 46 | 47 | Five suffixes stand for five runs (with mean & std reported), and eta could be tuned among ```0.1, 1.0, 10.0, 100.0```. 48 | ```lr``` in pre-training should be tuned from {0.01, 0.001, 0.0001} and ```model_epoch``` in finetuning (this means the epoch checkpoint loaded from pre-trained model) from {20, 40, 60, 80, 100}. 49 | 50 | ## Acknowledgements 51 | * https://github.com/Shen-Lab/GraphCL/tree/master/semisupervised_TU 52 | * https://github.com/chentingpc/gfn. 53 | -------------------------------------------------------------------------------- /semisupervised_TU/environment.yml: -------------------------------------------------------------------------------- 1 | name: simgrace 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - ca-certificates=2020.6.24=0 7 | - certifi=2020.6.20=py36_0 8 | - ld_impl_linux-64=2.33.1=h53a641e_7 9 | - libedit=3.1.20191231=h7b6447c_0 10 | - libffi=3.3=he6710b0_1 11 | - libgcc-ng=9.1.0=hdf63c60_0 12 | - libstdcxx-ng=9.1.0=hdf63c60_0 13 | - ncurses=6.2=he6710b0_1 14 | - openssl=1.1.1g=h7b6447c_0 15 | - pip=20.1.1=py36_1 16 | - python=3.6.10=h7579374_2 17 | - readline=8.0=h7b6447c_0 18 | - setuptools=47.3.1=py36_0 19 | - sqlite=3.32.3=h62c20be_0 20 | - tk=8.6.10=hbc83047_0 21 | - wheel=0.34.2=py36_0 22 | - xz=5.2.5=h7b6447c_0 23 | - zlib=1.2.11=h7b6447c_3 24 | - pip: 25 | - decorator==4.4.2 26 | - future==0.18.2 27 | - isodate==0.6.0 28 | - joblib==0.16.0 29 | - networkx==2.4 30 | - numpy==1.19.0 31 | - pandas==1.0.5 32 | - pillow==7.2.0 33 | - plyfile==0.7.2 34 | - pyparsing==2.4.7 35 | - python-dateutil==2.8.1 36 | - pytz==2020.1 37 | - rdflib==5.0.0 38 | - scikit-learn==0.23.1 39 | - scipy==1.5.0 40 | - six==1.15.0 41 | - threadpoolctl==2.1.0 42 | - torch==1.4.0 43 | - torch-cluster==1.4.5 44 | - torch-geometric==1.1.0 45 | - torch-scatter==1.1.0 46 | - torch-sparse==0.4.4 47 | - torchvision==0.5.0 48 | -------------------------------------------------------------------------------- /semisupervised_TU/finetuning/datasets.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import re 3 | 4 | import torch 5 | from torch_geometric.datasets import MNISTSuperpixels 6 | from torch_geometric.utils import degree 7 | import torch_geometric.transforms as T 8 | from feature_expansion import FeatureExpander 9 | from image_dataset import ImageDataset 10 | from tu_dataset import TUDatasetExt 11 | 12 | 13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None): 14 | if root is None or root == '': 15 | path = osp.join(osp.expanduser('~'), 'pyG_data', name) 16 | else: 17 | path = osp.join(root, name) 18 | degree = feat_str.find("deg") >= 0 19 | onehot_maxdeg = re.findall("odeg(\d+)", feat_str) 20 | onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None 21 | k = re.findall("an{0,1}k(\d+)", feat_str) 22 | k = int(k[0]) if k else 0 23 | groupd = re.findall("groupd(\d+)", feat_str) 24 | groupd = int(groupd[0]) if groupd else 0 25 | remove_edges = re.findall("re(\w+)", feat_str) 26 | remove_edges = remove_edges[0] if remove_edges else 'none' 27 | edge_noises_add = re.findall("randa([\d\.]+)", feat_str) 28 | edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0 29 | edge_noises_delete = re.findall("randd([\d\.]+)", feat_str) 30 | edge_noises_delete = float( 31 | edge_noises_delete[0]) if edge_noises_delete else 0 32 | centrality = feat_str.find("cent") >= 0 33 | coord = feat_str.find("coord") >= 0 34 | 35 | pre_transform = FeatureExpander( 36 | degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k, 37 | centrality=centrality, remove_edges=remove_edges, 38 | edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete, 39 | group_degree=groupd).transform 40 | 41 | if 'MNIST' in name or 'CIFAR' in name: 42 | if name == 'MNIST_SUPERPIXEL': 43 | train_dataset = MNISTSuperpixels(path, True, 44 | pre_transform=pre_transform, transform=T.Cartesian()) 45 | test_dataset = MNISTSuperpixels(path, False, 46 | pre_transform=pre_transform, transform=T.Cartesian()) 47 | else: 48 | train_dataset = ImageDataset(path, name, True, 49 | pre_transform=pre_transform, coord=coord, 50 | processed_file_prefix="data_%s" % feat_str) 51 | test_dataset = ImageDataset(path, name, False, 52 | pre_transform=pre_transform, coord=coord, 53 | processed_file_prefix="data_%s" % feat_str) 54 | dataset = (train_dataset, test_dataset) 55 | else: 56 | dataset = TUDatasetExt( 57 | path, name, pre_transform=pre_transform, 58 | use_node_attr=True, processed_filename="data_%s.pt" % feat_str) 59 | 60 | dataset.data.edge_attr = None 61 | 62 | return dataset 63 | -------------------------------------------------------------------------------- /semisupervised_TU/finetuning/gcn_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | from torch_scatter import scatter_add 4 | from torch_geometric.nn.conv import MessagePassing 5 | from torch_geometric.utils import remove_self_loops, add_self_loops 6 | from torch_geometric.nn.inits import glorot, zeros 7 | 8 | 9 | class GCNConv(MessagePassing): 10 | r"""The graph convolutional operator from the `"Semi-supervised 11 | Classfication with Graph Convolutional Networks" 12 | `_ paper 13 | 14 | .. math:: 15 | \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} 16 | \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, 17 | 18 | where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the 19 | adjacency matrix with inserted self-loops and 20 | :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. 21 | 22 | Args: 23 | in_channels (int): Size of each input sample. 24 | out_channels (int): Size of each output sample. 25 | improved (bool, optional): If set to :obj:`True`, the layer computes 26 | :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`. 27 | (default: :obj:`False`) 28 | cached (bool, optional): If set to :obj:`True`, the layer will cache 29 | the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2} 30 | \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`. 31 | (default: :obj:`False`) 32 | bias (bool, optional): If set to :obj:`False`, the layer will not learn 33 | an additive bias. (default: :obj:`True`) 34 | edge_norm (bool, optional): whether or not to normalize adj matrix. 35 | (default: :obj:`True`) 36 | gfn (bool, optional): If `True`, only linear transform (1x1 conv) is 37 | applied to every nodes. (default: :obj:`False`) 38 | """ 39 | 40 | def __init__(self, 41 | in_channels, 42 | out_channels, 43 | improved=False, 44 | cached=False, 45 | bias=True, 46 | edge_norm=True, 47 | gfn=False): 48 | super(GCNConv, self).__init__('add') 49 | 50 | self.in_channels = in_channels 51 | self.out_channels = out_channels 52 | self.improved = improved 53 | self.cached = cached 54 | self.cached_result = None 55 | self.edge_norm = edge_norm 56 | self.gfn = gfn 57 | 58 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 59 | 60 | if bias: 61 | self.bias = Parameter(torch.Tensor(out_channels)) 62 | else: 63 | self.register_parameter('bias', None) 64 | 65 | self.reset_parameters() 66 | 67 | def reset_parameters(self): 68 | glorot(self.weight) 69 | zeros(self.bias) 70 | self.cached_result = None 71 | 72 | @staticmethod 73 | def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None): 74 | if edge_weight is None: 75 | edge_weight = torch.ones((edge_index.size(1), ), 76 | dtype=dtype, 77 | device=edge_index.device) 78 | edge_weight = edge_weight.view(-1) 79 | assert edge_weight.size(0) == edge_index.size(1) 80 | 81 | edge_index, edge_weight = remove_self_loops(edge_index, edge_weight) 82 | edge_index = add_self_loops(edge_index, num_nodes=num_nodes) 83 | # Add edge_weight for loop edges. 84 | loop_weight = torch.full((num_nodes, ), 85 | 1 if not improved else 2, 86 | dtype=edge_weight.dtype, 87 | device=edge_weight.device) 88 | edge_weight = torch.cat([edge_weight, loop_weight], dim=0) 89 | 90 | row, col = edge_index 91 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) 92 | deg_inv_sqrt = deg.pow(-0.5) 93 | deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 94 | 95 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 96 | 97 | def forward(self, x, edge_index, edge_weight=None): 98 | """""" 99 | x = torch.matmul(x, self.weight) 100 | if self.gfn: 101 | return x 102 | 103 | if not self.cached or self.cached_result is None: 104 | if self.edge_norm: 105 | edge_index, norm = GCNConv.norm( 106 | edge_index, x.size(0), edge_weight, self.improved, x.dtype) 107 | else: 108 | norm = None 109 | self.cached_result = edge_index, norm 110 | 111 | edge_index, norm = self.cached_result 112 | return self.propagate(edge_index, x=x, norm=norm) 113 | 114 | def message(self, x_j, norm): 115 | if self.edge_norm: 116 | return norm.view(-1, 1) * x_j 117 | else: 118 | return x_j 119 | 120 | def update(self, aggr_out): 121 | if self.bias is not None: 122 | aggr_out = aggr_out + self.bias 123 | return aggr_out 124 | 125 | def __repr__(self): 126 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, 127 | self.out_channels) 128 | -------------------------------------------------------------------------------- /semisupervised_TU/finetuning/image_dataset.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | 4 | import torch 5 | import torchvision 6 | from torchvision import datasets, transforms 7 | from torch_geometric.data import InMemoryDataset, Data 8 | 9 | 10 | class ImageDataset(InMemoryDataset): 11 | def __init__(self, 12 | root, 13 | name, 14 | train=True, 15 | transform=None, 16 | pre_transform=None, 17 | pre_filter=None, 18 | coord=False, 19 | processed_file_prefix='data'): 20 | assert name in ['MNIST', 'CIFAR10'], "Unsupported data name %s" % name 21 | self.name = name 22 | self.coord = coord 23 | self.processed_file_prefix = processed_file_prefix 24 | self.traindata = None 25 | self.testdata = None 26 | super(ImageDataset, self).__init__( 27 | root, transform, pre_transform, pre_filter) 28 | path = self.processed_paths[0] if train else self.processed_paths[1] 29 | self.data, self.slices = torch.load(path) 30 | 31 | @property 32 | def raw_file_names(self): 33 | if self.name == 'MNIST': 34 | return ['t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte', 35 | 'train-images-idx3-ubyte', 'train-labels-idx1-ubyte'] 36 | elif self.name == 'CIFAR10': 37 | return ['data_batch_1', 'data_batch_2', 'data_batch_3', 38 | 'data_batch_4', 'data_batch_5', 'test_batch'] 39 | 40 | @property 41 | def processed_file_names(self): 42 | return ['%s_training.pt' % self.processed_file_prefix, 43 | '%s_test.pt' % self.processed_file_prefix] 44 | 45 | def download(self): 46 | transform = transforms.ToTensor() 47 | if self.name == 'CIFAR10': 48 | data_train = datasets.CIFAR10(root=self.raw_dir, 49 | transform=transform, 50 | train=True, 51 | download=True) 52 | data_test = datasets.CIFAR10(root=self.raw_dir, 53 | transform=transform, 54 | train=False, 55 | download=True) 56 | elif self.name == 'MNIST': 57 | data_train = datasets.MNIST(root=self.raw_dir, 58 | transform=transform, 59 | train=True, 60 | download=True) 61 | data_test = datasets.MNIST(root=self.raw_dir, 62 | transform=transform, 63 | train=False, 64 | download=True) 65 | else: 66 | raise ValueError("Unknown data name {}".format(self.name)) 67 | self.traindata = data_train 68 | self.testdata = data_test 69 | 70 | def process(self): 71 | trainLoader = torch.utils.data.DataLoader(self.traindata) 72 | testLoader = torch.utils.data.DataLoader(self.testdata) 73 | if self.name == 'MNIST': 74 | num_row, num_col = 28, 28 75 | elif self.name == 'CIFAR10': 76 | num_row, num_col = 32, 32 77 | else: 78 | raise ValueError('dataset error') 79 | num_edges = (3 * num_row - 2) * (3 * num_col - 2) 80 | edge_index_array = np.zeros(shape=[2, num_edges]) 81 | edge_attr_array = np.zeros(shape=[1, num_edges]) 82 | curt = 0 83 | for j in range(num_row): 84 | for k in range(num_col): 85 | for m in range(max(j-1, 0), min(j+1, num_row-1)+1): 86 | for n in range(max(k-1, 0), min(k+1, num_col-1)+1): 87 | edge_index_array[0][curt] = j * num_row + k 88 | edge_index_array[1][curt] = m * num_row + n 89 | edge_attr_array[0][curt] = self.weight(j, k, m, n) 90 | curt += 1 91 | edge_index = torch.from_numpy(edge_index_array).to(torch.int64) 92 | edge_attr = torch.from_numpy(edge_attr_array).to(torch.float) 93 | 94 | def transform_data(data_loader, edge_index, edge_attr): 95 | data_list = [] 96 | channel, num_row, num_col = data_loader.dataset[0][0].size() 97 | if self.coord: 98 | x = torch.arange(num_col, dtype=torch.float) 99 | x = x.view((1, -1)).repeat(num_row, 1).view((-1, 1)) - x.mean() 100 | y = torch.arange(num_row, dtype=torch.float) 101 | y = y.view((-1, 1)).repeat(1, num_col).view((-1, 1)) - y.mean() 102 | coord = torch.cat([x, y], -1) 103 | 104 | for image, label in iter(data_loader): 105 | x = image[0].permute([1,2,0]).view( 106 | num_row * num_col, image[0].size()[0]) 107 | if self.coord: 108 | x = torch.cat([x, coord], -1) 109 | data = Data( 110 | edge_index=edge_index, edge_attr=edge_attr, x=x, y=label) 111 | if self.pre_filter is not None: 112 | data = self.pre_filter(data) 113 | if self.pre_transform is not None: 114 | data = self.pre_transform(data) 115 | data_list.append(data) 116 | return data_list 117 | 118 | train_data_list = transform_data(trainLoader, edge_index, edge_attr) 119 | torch.save(self.collate(train_data_list), self.processed_paths[0]) 120 | 121 | test_data_list = transform_data(testLoader, edge_index, edge_attr) 122 | torch.save(self.collate(test_data_list), self.processed_paths[1]) 123 | 124 | @staticmethod 125 | def weight(pos_x, pos_y, pos_x_new, pos_y_new): 126 | dist = (pos_x - pos_x_new) ** 2 + (pos_y - pos_y_new) ** 2 127 | return math.exp(-dist) 128 | 129 | def __repr__(self): 130 | return '{}({})'.format(self.name, len(self)) 131 | 132 | -------------------------------------------------------------------------------- /semisupervised_TU/finetuning/tu_dataset.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.datasets import TUDataset 2 | import torch 3 | from itertools import repeat, product 4 | from copy import deepcopy 5 | 6 | 7 | 8 | class TUDatasetExt(TUDataset): 9 | r"""A variety of graph kernel benchmark datasets, *.e.g.* "IMDB-BINARY", 10 | "REDDIT-BINARY" or "PROTEINS", collected from the `TU Dortmund University 11 | `_. 12 | 13 | Args: 14 | root (string): Root directory where the dataset should be saved. 15 | name (string): The `name `_ of 16 | the dataset. 17 | transform (callable, optional): A function/transform that takes in an 18 | :obj:`torch_geometric.data.Data` object and returns a transformed 19 | version. The data object will be transformed before every access. 20 | (default: :obj:`None`) 21 | pre_transform (callable, optional): A function/transform that takes in 22 | an :obj:`torch_geometric.data.Data` object and returns a 23 | transformed version. The data object will be transformed before 24 | being saved to disk. (default: :obj:`None`) 25 | pre_filter (callable, optional): A function that takes in an 26 | :obj:`torch_geometric.data.Data` object and returns a boolean 27 | value, indicating whether the data object should be included in the 28 | final dataset. (default: :obj:`None`) 29 | use_node_attr (bool, optional): If :obj:`True`, the dataset will 30 | contain additional continuous node features (if present). 31 | (default: :obj:`False`) 32 | """ 33 | 34 | url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/' \ 35 | 'graphkerneldatasets' 36 | 37 | def __init__(self, 38 | root, 39 | name, 40 | transform=None, 41 | pre_transform=None, 42 | pre_filter=None, 43 | use_node_attr=False, 44 | processed_filename='data.pt'): 45 | self.processed_filename = processed_filename 46 | super(TUDatasetExt, self).__init__(root, name, transform, pre_transform, 47 | pre_filter, use_node_attr) 48 | 49 | @property 50 | def processed_file_names(self): 51 | return self.processed_filename 52 | 53 | -------------------------------------------------------------------------------- /semisupervised_TU/finetuning/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def print_weights(model): 4 | for name, param in model.named_parameters(): 5 | if param.requires_grad: 6 | print(name, param.shape) 7 | sys.stdout.flush() 8 | 9 | 10 | def logger(info): 11 | fold, epoch = info['fold'], info['epoch'] 12 | if epoch == 1 or epoch % 10 == 0: 13 | train_acc, test_acc = info['train_acc'], info['test_acc'] 14 | print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format( 15 | fold, epoch, train_acc, test_acc)) 16 | sys.stdout.flush() 17 | 18 | 19 | -------------------------------------------------------------------------------- /semisupervised_TU/pre-training/datasets.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import re 3 | 4 | import torch 5 | from torch_geometric.datasets import MNISTSuperpixels 6 | from torch_geometric.utils import degree 7 | import torch_geometric.transforms as T 8 | from feature_expansion import FeatureExpander 9 | from image_dataset import ImageDataset 10 | from tu_dataset import TUDatasetExt 11 | 12 | 13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None, aug=None, aug_ratio=None): 14 | if root is None or root == '': 15 | path = osp.join(osp.expanduser('~'), 'pyG_data', name) 16 | else: 17 | path = osp.join(root, name) 18 | degree = feat_str.find("deg") >= 0 19 | onehot_maxdeg = re.findall("odeg(\d+)", feat_str) 20 | onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None 21 | k = re.findall("an{0,1}k(\d+)", feat_str) 22 | k = int(k[0]) if k else 0 23 | groupd = re.findall("groupd(\d+)", feat_str) 24 | groupd = int(groupd[0]) if groupd else 0 25 | remove_edges = re.findall("re(\w+)", feat_str) 26 | remove_edges = remove_edges[0] if remove_edges else 'none' 27 | edge_noises_add = re.findall("randa([\d\.]+)", feat_str) 28 | edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0 29 | edge_noises_delete = re.findall("randd([\d\.]+)", feat_str) 30 | edge_noises_delete = float( 31 | edge_noises_delete[0]) if edge_noises_delete else 0 32 | centrality = feat_str.find("cent") >= 0 33 | coord = feat_str.find("coord") >= 0 34 | 35 | pre_transform = FeatureExpander( 36 | degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k, 37 | centrality=centrality, remove_edges=remove_edges, 38 | edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete, 39 | group_degree=groupd).transform 40 | 41 | print(aug, aug_ratio) 42 | if 'MNIST' in name or 'CIFAR' in name: 43 | if name == 'MNIST_SUPERPIXEL': 44 | train_dataset = MNISTSuperpixels(path, True, 45 | pre_transform=pre_transform, transform=T.Cartesian()) 46 | test_dataset = MNISTSuperpixels(path, False, 47 | pre_transform=pre_transform, transform=T.Cartesian()) 48 | else: 49 | train_dataset = ImageDataset(path, name, True, 50 | pre_transform=pre_transform, coord=coord, 51 | processed_file_prefix="data_%s" % feat_str) 52 | test_dataset = ImageDataset(path, name, False, 53 | pre_transform=pre_transform, coord=coord, 54 | processed_file_prefix="data_%s" % feat_str) 55 | dataset = (train_dataset, test_dataset) 56 | else: 57 | dataset = TUDatasetExt( 58 | path, name, pre_transform=pre_transform, 59 | use_node_attr=True, processed_filename="data_%s.pt" % feat_str, aug=aug, aug_ratio=aug_ratio) 60 | 61 | dataset.data.edge_attr = None 62 | 63 | return dataset 64 | -------------------------------------------------------------------------------- /semisupervised_TU/pre-training/gcn_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Parameter 3 | from torch_scatter import scatter_add 4 | from torch_geometric.nn.conv import MessagePassing 5 | from torch_geometric.utils import remove_self_loops, add_self_loops 6 | from torch_geometric.nn.inits import glorot, zeros 7 | 8 | 9 | class GCNConv(MessagePassing): 10 | r"""The graph convolutional operator from the `"Semi-supervised 11 | Classfication with Graph Convolutional Networks" 12 | `_ paper 13 | 14 | .. math:: 15 | \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}} 16 | \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}, 17 | 18 | where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the 19 | adjacency matrix with inserted self-loops and 20 | :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix. 21 | 22 | Args: 23 | in_channels (int): Size of each input sample. 24 | out_channels (int): Size of each output sample. 25 | improved (bool, optional): If set to :obj:`True`, the layer computes 26 | :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`. 27 | (default: :obj:`False`) 28 | cached (bool, optional): If set to :obj:`True`, the layer will cache 29 | the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2} 30 | \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`. 31 | (default: :obj:`False`) 32 | bias (bool, optional): If set to :obj:`False`, the layer will not learn 33 | an additive bias. (default: :obj:`True`) 34 | edge_norm (bool, optional): whether or not to normalize adj matrix. 35 | (default: :obj:`True`) 36 | gfn (bool, optional): If `True`, only linear transform (1x1 conv) is 37 | applied to every nodes. (default: :obj:`False`) 38 | """ 39 | 40 | def __init__(self, 41 | in_channels, 42 | out_channels, 43 | improved=False, 44 | cached=False, 45 | bias=True, 46 | edge_norm=True, 47 | gfn=False): 48 | super(GCNConv, self).__init__('add') 49 | 50 | self.in_channels = in_channels 51 | self.out_channels = out_channels 52 | self.improved = improved 53 | self.cached = cached 54 | self.cached_result = None 55 | self.edge_norm = edge_norm 56 | self.gfn = gfn 57 | 58 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 59 | 60 | if bias: 61 | self.bias = Parameter(torch.Tensor(out_channels)) 62 | else: 63 | self.register_parameter('bias', None) 64 | 65 | self.reset_parameters() 66 | 67 | def reset_parameters(self): 68 | glorot(self.weight) 69 | zeros(self.bias) 70 | self.cached_result = None 71 | 72 | @staticmethod 73 | def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None): 74 | if edge_weight is None: 75 | edge_weight = torch.ones((edge_index.size(1), ), 76 | dtype=dtype, 77 | device=edge_index.device) 78 | edge_weight = edge_weight.view(-1) 79 | assert edge_weight.size(0) == edge_index.size(1) 80 | 81 | edge_index, edge_weight = remove_self_loops(edge_index, edge_weight) 82 | edge_index = add_self_loops(edge_index, num_nodes=num_nodes) 83 | # Add edge_weight for loop edges. 84 | loop_weight = torch.full((num_nodes, ), 85 | 1 if not improved else 2, 86 | dtype=edge_weight.dtype, 87 | device=edge_weight.device) 88 | edge_weight = torch.cat([edge_weight, loop_weight], dim=0) 89 | 90 | row, col = edge_index 91 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes) 92 | deg_inv_sqrt = deg.pow(-0.5) 93 | deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0 94 | 95 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 96 | 97 | def forward(self, x, edge_index, edge_weight=None): 98 | """""" 99 | x = torch.matmul(x, self.weight) 100 | if self.gfn: 101 | return x 102 | 103 | if not self.cached or self.cached_result is None: 104 | if self.edge_norm: 105 | edge_index, norm = GCNConv.norm( 106 | edge_index, x.size(0), edge_weight, self.improved, x.dtype) 107 | else: 108 | norm = None 109 | self.cached_result = edge_index, norm 110 | 111 | edge_index, norm = self.cached_result 112 | return self.propagate(edge_index, x=x, norm=norm) 113 | 114 | def message(self, x_j, norm): 115 | if self.edge_norm: 116 | return norm.view(-1, 1) * x_j 117 | else: 118 | return x_j 119 | 120 | def update(self, aggr_out): 121 | if self.bias is not None: 122 | aggr_out = aggr_out + self.bias 123 | return aggr_out 124 | 125 | def __repr__(self): 126 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, 127 | self.out_channels) 128 | -------------------------------------------------------------------------------- /semisupervised_TU/pre-training/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def print_weights(model): 4 | for name, param in model.named_parameters(): 5 | if param.requires_grad: 6 | print(name, param.shape) 7 | sys.stdout.flush() 8 | 9 | 10 | def logger(info): 11 | fold, epoch = info['fold'], info['epoch'] 12 | if epoch == 1 or epoch % 10 == 0: 13 | train_acc, test_acc = info['train_acc'], info['test_acc'] 14 | print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format( 15 | fold, epoch, train_acc, test_acc)) 16 | sys.stdout.flush() 17 | 18 | 19 | -------------------------------------------------------------------------------- /simgrace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/simgrace.png -------------------------------------------------------------------------------- /transfer_learning/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies & Dataset 2 | 3 | Please refer to https://github.com/snap-stanford/pretrain-gnns#installation for environment setup and https://github.com/snap-stanford/pretrain-gnns#dataset-download to download dataset. 4 | 5 | ## Training & Evaluation 6 | ### Step 1: Pre-training: ### 7 | ``` 8 | cd ./bio 9 | python pretrain_simgrace.py --eta 0.1 10 | cd ./chem 11 | python pretrain_simgrace.py --eta 0.1 12 | ``` 13 | ### Step 2: Finetuning: ### 14 | ``` 15 | cd ./bio 16 | ./finetune.sh 17 | cd ./chem 18 | ./run.sh 19 | ``` 20 | Results will be recorded in ```result.log```. 21 | 22 | 23 | ## Acknowledgements 24 | 25 | * https://github.com/snap-stanford/pretrain-gnns. 26 | * https://github.com/Shen-Lab/GraphCL/tree/master/transferLearning_MoleculeNet_PPI. 27 | -------------------------------------------------------------------------------- /transfer_learning/bio/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from torch.utils.data.dataloader import default_collate 3 | 4 | from batch import BatchFinetune, BatchMasking, BatchAE, BatchSubstructContext 5 | 6 | class DataLoaderFinetune(torch.utils.data.DataLoader): 7 | r"""Data loader which merges data objects from a 8 | :class:`torch_geometric.data.dataset` to a mini-batch. 9 | Args: 10 | dataset (Dataset): The dataset from which to load the data. 11 | batch_size (int, optional): How may samples per batch to load. 12 | (default: :obj:`1`) 13 | shuffle (bool, optional): If set to :obj:`True`, the data will be 14 | reshuffled at every epoch (default: :obj:`True`) 15 | """ 16 | 17 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 18 | super(DataLoaderFinetune, self).__init__( 19 | dataset, 20 | batch_size, 21 | shuffle, 22 | collate_fn=lambda data_list: BatchFinetune.from_data_list(data_list), 23 | **kwargs) 24 | 25 | class DataLoaderMasking(torch.utils.data.DataLoader): 26 | r"""Data loader which merges data objects from a 27 | :class:`torch_geometric.data.dataset` to a mini-batch. 28 | Args: 29 | dataset (Dataset): The dataset from which to load the data. 30 | batch_size (int, optional): How may samples per batch to load. 31 | (default: :obj:`1`) 32 | shuffle (bool, optional): If set to :obj:`True`, the data will be 33 | reshuffled at every epoch (default: :obj:`True`) 34 | """ 35 | 36 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 37 | super(DataLoaderMasking, self).__init__( 38 | dataset, 39 | batch_size, 40 | shuffle, 41 | collate_fn=lambda data_list: BatchMasking.from_data_list(data_list), 42 | **kwargs) 43 | 44 | 45 | class DataLoaderAE(torch.utils.data.DataLoader): 46 | r"""Data loader which merges data objects from a 47 | :class:`torch_geometric.data.dataset` to a mini-batch. 48 | Args: 49 | dataset (Dataset): The dataset from which to load the data. 50 | batch_size (int, optional): How may samples per batch to load. 51 | (default: :obj:`1`) 52 | shuffle (bool, optional): If set to :obj:`True`, the data will be 53 | reshuffled at every epoch (default: :obj:`True`) 54 | """ 55 | 56 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 57 | super(DataLoaderAE, self).__init__( 58 | dataset, 59 | batch_size, 60 | shuffle, 61 | collate_fn=lambda data_list: BatchAE.from_data_list(data_list), 62 | **kwargs) 63 | 64 | 65 | class DataLoaderSubstructContext(torch.utils.data.DataLoader): 66 | r"""Data loader which merges data objects from a 67 | :class:`torch_geometric.data.dataset` to a mini-batch. 68 | Args: 69 | dataset (Dataset): The dataset from which to load the data. 70 | batch_size (int, optional): How may samples per batch to load. 71 | (default: :obj:`1`) 72 | shuffle (bool, optional): If set to :obj:`True`, the data will be 73 | reshuffled at every epoch (default: :obj:`True`) 74 | """ 75 | 76 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 77 | super(DataLoaderSubstructContext, self).__init__( 78 | dataset, 79 | batch_size, 80 | shuffle, 81 | collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list), 82 | **kwargs) 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /transfer_learning/bio/finetune.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | split=species 3 | ### for GIN 4 | for runseed in 0 1 2 3 4 5 6 7 8 9 5 | do 6 | python finetune.py --model_file models_simgrace/simgcl_80.pth --split $split --epochs 10 --device 0 --runseed $runseed --gnn_type gin --lr 1e-3 7 | done 8 | -------------------------------------------------------------------------------- /transfer_learning/bio/finetune_tune.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | runseed=$1 3 | device=$2 4 | split=species 5 | 6 | ### for GIN 7 | for unsup in contextpred infomax edgepred masking 8 | do 9 | model_file=${unsup} 10 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin 11 | 12 | model_file=supervised_${unsup} 13 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin 14 | done 15 | 16 | python finetune.py --split $split --filename gin_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type gin 17 | python finetune.py --split $split --model_file model_gin/supervised.pth --filename gin_supervised --epochs 50 --device $device --runseed $runseed --gnn_type gin 18 | 19 | 20 | ### for other GNNs 21 | for gnn_type in gcn gat graphsage 22 | do 23 | python finetune.py --split $split --filename ${gnn_type}_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type 24 | 25 | model_file=${gnn_type}_supervised_masking 26 | python finetune.py --model_file model_architecture/${model_file}.pth --split $split --filename ${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type 27 | 28 | done -------------------------------------------------------------------------------- /transfer_learning/bio/models_simgrace/simgrace_100.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_100.pth -------------------------------------------------------------------------------- /transfer_learning/bio/models_simgrace/simgrace_20.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_20.pth -------------------------------------------------------------------------------- /transfer_learning/bio/models_simgrace/simgrace_40.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_40.pth -------------------------------------------------------------------------------- /transfer_learning/bio/models_simgrace/simgrace_60.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_60.pth -------------------------------------------------------------------------------- /transfer_learning/bio/models_simgrace/simgrace_80.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_80.pth -------------------------------------------------------------------------------- /transfer_learning/bio/pretrain_deepgraphinfomax.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import BioDataset 4 | from torch_geometric.data import DataLoader 5 | from torch_geometric.nn.inits import uniform 6 | from torch_geometric.nn import global_mean_pool 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | 13 | from tqdm import tqdm 14 | import numpy as np 15 | 16 | from model import GNN 17 | from sklearn.metrics import roc_auc_score 18 | 19 | import pandas as pd 20 | 21 | 22 | def cycle_index(num, shift): 23 | arr = torch.arange(num) + shift 24 | arr[-shift:] = torch.arange(shift) 25 | return arr 26 | 27 | class Discriminator(nn.Module): 28 | def __init__(self, hidden_dim): 29 | super(Discriminator, self).__init__() 30 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim)) 31 | self.reset_parameters() 32 | 33 | def reset_parameters(self): 34 | size = self.weight.size(0) 35 | uniform(size, self.weight) 36 | 37 | def forward(self, x, summary): 38 | h = torch.matmul(summary, self.weight) 39 | return torch.sum(x*h, dim = 1) 40 | 41 | class Infomax(nn.Module): 42 | def __init__(self, gnn, discriminator): 43 | super(Infomax, self).__init__() 44 | self.gnn = gnn 45 | self.discriminator = discriminator 46 | self.loss = nn.BCEWithLogitsLoss() 47 | self.pool = global_mean_pool 48 | 49 | 50 | def train(args, model, device, loader, optimizer): 51 | model.train() 52 | 53 | train_acc_accum = 0 54 | train_loss_accum = 0 55 | 56 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 57 | batch = batch.to(device) 58 | node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr) 59 | summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch)) 60 | 61 | positive_expanded_summary_emb = summary_emb[batch.batch] 62 | 63 | shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)] 64 | negative_expanded_summary_emb = shifted_summary_emb[batch.batch] 65 | 66 | positive_score = model.discriminator(node_emb, positive_expanded_summary_emb) 67 | negative_score = model.discriminator(node_emb, negative_expanded_summary_emb) 68 | 69 | optimizer.zero_grad() 70 | loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score)) 71 | loss.backward() 72 | 73 | optimizer.step() 74 | 75 | train_loss_accum += float(loss.detach().cpu().item()) 76 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score)) 77 | train_acc_accum += float(acc.detach().cpu().item()) 78 | 79 | return train_acc_accum/(step+1), train_loss_accum/(step+1) 80 | 81 | 82 | def main(): 83 | # Training settings 84 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 85 | parser.add_argument('--device', type=int, default=0, 86 | help='which gpu to use if any (default: 0)') 87 | parser.add_argument('--batch_size', type=int, default=256, 88 | help='input batch size for training (default: 256)') 89 | parser.add_argument('--epochs', type=int, default=100, 90 | help='number of epochs to train (default: 100)') 91 | parser.add_argument('--lr', type=float, default=0.001, 92 | help='learning rate (default: 0.001)') 93 | parser.add_argument('--decay', type=float, default=0, 94 | help='weight decay (default: 0)') 95 | parser.add_argument('--num_layer', type=int, default=5, 96 | help='number of GNN message passing layers (default: 5).') 97 | parser.add_argument('--emb_dim', type=int, default=300, 98 | help='embedding dimensions (default: 300)') 99 | parser.add_argument('--dropout_ratio', type=float, default=0, 100 | help='dropout ratio (default: 0)') 101 | parser.add_argument('--JK', type=str, default="last", 102 | help='how the node features across layers are combined. last, sum, max or concat') 103 | parser.add_argument('--gnn_type', type=str, default="gin") 104 | parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model') 105 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.") 106 | parser.add_argument('--num_workers', type=int, default = 4, help='number of workers for dataset loading') 107 | args = parser.parse_args() 108 | 109 | 110 | torch.manual_seed(0) 111 | np.random.seed(0) 112 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 113 | if torch.cuda.is_available(): 114 | torch.cuda.manual_seed_all(0) 115 | 116 | #set up dataset 117 | root_unsupervised = 'dataset/unsupervised' 118 | dataset = BioDataset(root_unsupervised, data_type='unsupervised') 119 | 120 | print(dataset) 121 | 122 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 123 | 124 | #set up model 125 | gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type) 126 | 127 | discriminator = Discriminator(args.emb_dim) 128 | 129 | model = Infomax(gnn, discriminator) 130 | 131 | model.to(device) 132 | 133 | #set up optimizer 134 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 135 | print(optimizer) 136 | 137 | 138 | for epoch in range(1, args.epochs+1): 139 | print("====epoch " + str(epoch)) 140 | 141 | train_acc, train_loss = train(args, model, device, loader, optimizer) 142 | 143 | print(train_acc) 144 | print(train_loss) 145 | 146 | 147 | if not args.model_file == "": 148 | torch.save(model.gnn.state_dict(), args.model_file + ".pth") 149 | 150 | if __name__ == "__main__": 151 | main() 152 | -------------------------------------------------------------------------------- /transfer_learning/bio/pretrain_edgepred.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import BioDataset 4 | from dataloader import DataLoaderAE 5 | from util import NegativeEdge 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | from tqdm import tqdm 13 | import numpy as np 14 | 15 | from model import GNN 16 | import pandas as pd 17 | 18 | criterion = nn.BCEWithLogitsLoss() 19 | 20 | def train(args, model, device, loader, optimizer): 21 | model.train() 22 | 23 | train_acc_accum = 0 24 | train_loss_accum = 0 25 | 26 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 27 | batch = batch.to(device) 28 | node_emb = model(batch.x, batch.edge_index, batch.edge_attr) 29 | 30 | positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1) 31 | negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1) 32 | 33 | optimizer.zero_grad() 34 | loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score)) 35 | loss.backward() 36 | optimizer.step() 37 | 38 | train_loss_accum += float(loss.detach().cpu().item()) 39 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score)) 40 | train_acc_accum += float(acc.detach().cpu().item()) 41 | 42 | return train_acc_accum/(step+1), train_loss_accum/(step + 1) 43 | 44 | 45 | def main(): 46 | # Training settings 47 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 48 | parser.add_argument('--device', type=int, default=0, 49 | help='which gpu to use if any (default: 0)') 50 | parser.add_argument('--batch_size', type=int, default=256, 51 | help='input batch size for training (default: 256)') 52 | parser.add_argument('--epochs', type=int, default=100, 53 | help='number of epochs to train (default: 100)') 54 | parser.add_argument('--lr', type=float, default=0.001, 55 | help='learning rate (default: 0.001)') 56 | parser.add_argument('--decay', type=float, default=0, 57 | help='weight decay (default: 0)') 58 | parser.add_argument('--num_layer', type=int, default=5, 59 | help='number of GNN message passing layers (default: 5).') 60 | parser.add_argument('--emb_dim', type=int, default=300, 61 | help='embedding dimensions (default: 300)') 62 | parser.add_argument('--dropout_ratio', type=float, default=0, 63 | help='dropout ratio (default: 0)') 64 | parser.add_argument('--JK', type=str, default="last", 65 | help='how the node features across layers are combined. last, sum, max or concat') 66 | parser.add_argument('--gnn_type', type=str, default="gin") 67 | parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model') 68 | parser.add_argument('--num_workers', type=int, default = 12, help='number of workers for dataset loading') 69 | args = parser.parse_args() 70 | 71 | 72 | torch.manual_seed(0) 73 | np.random.seed(0) 74 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 75 | if torch.cuda.is_available(): 76 | torch.cuda.manual_seed_all(0) 77 | 78 | #set up dataset 79 | root_unsupervised = 'dataset/unsupervised' 80 | dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = NegativeEdge()) 81 | dataset.data.to(device) 82 | 83 | print(dataset) 84 | 85 | loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 86 | 87 | #set up model 88 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device) 89 | 90 | model.to(device) 91 | 92 | #set up optimizer 93 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 94 | #optimizer = optim.Adam(model.graph_pred_linear.parameters(), lr=args.lr, weight_decay=args.decay) 95 | print(optimizer) 96 | 97 | 98 | for epoch in range(1, args.epochs+1): 99 | print("====epoch " + str(epoch)) 100 | 101 | train_acc, train_loss = train(args, model, device, loader, optimizer) 102 | 103 | print(train_acc) 104 | print(train_loss) 105 | 106 | if not args.model_file == "": 107 | torch.save(model.state_dict(), args.model_file + ".pth") 108 | 109 | 110 | 111 | if __name__ == "__main__": 112 | main() -------------------------------------------------------------------------------- /transfer_learning/bio/pretrain_masking.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import BioDataset 4 | from dataloader import DataLoaderMasking 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | 11 | from tqdm import tqdm 12 | import numpy as np 13 | 14 | from model import GNN, GNN_graphpred 15 | 16 | import pandas as pd 17 | 18 | from util import MaskEdge 19 | 20 | from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool 21 | 22 | #criterion = nn.BCEWithLogitsLoss() 23 | criterion = nn.CrossEntropyLoss() 24 | 25 | def compute_accuracy(pred, target): 26 | #return float(torch.sum((pred.detach() > 0) == target.to(torch.uint8)).cpu().item())/(pred.shape[0]*pred.shape[1]) 27 | return float(torch.sum(torch.max(pred.detach(), dim = 1)[1] == target).cpu().item())/len(pred) 28 | 29 | def train(args, model_list, loader, optimizer_list, device): 30 | model, linear_pred_edges = model_list 31 | optimizer_model, optimizer_linear_pred_edges = optimizer_list 32 | 33 | model.train() 34 | linear_pred_edges.train() 35 | 36 | loss_accum = 0 37 | acc_accum = 0 38 | 39 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 40 | batch = batch.to(device) 41 | 42 | node_rep = model(batch.x, batch.edge_index, batch.edge_attr) 43 | 44 | ### predict the edge types. 45 | masked_edge_index = batch.edge_index[:, batch.masked_edge_idx] 46 | edge_rep = node_rep[masked_edge_index[0]] + node_rep[masked_edge_index[1]] 47 | pred_edge = linear_pred_edges(edge_rep) 48 | 49 | #converting the binary classification to multiclass classification 50 | edge_label = torch.argmax(batch.mask_edge_label, dim = 1) 51 | 52 | acc_edge = compute_accuracy(pred_edge, edge_label) 53 | acc_accum += acc_edge 54 | 55 | optimizer_model.zero_grad() 56 | optimizer_linear_pred_edges.zero_grad() 57 | 58 | loss = criterion(pred_edge, edge_label) 59 | loss.backward() 60 | 61 | optimizer_model.step() 62 | optimizer_linear_pred_edges.step() 63 | 64 | loss_accum += float(loss.cpu().item()) 65 | 66 | return loss_accum/(step + 1), acc_accum/(step + 1) 67 | 68 | def main(): 69 | # Training settings 70 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 71 | parser.add_argument('--device', type=int, default=0, 72 | help='which gpu to use if any (default: 0)') 73 | parser.add_argument('--batch_size', type=int, default=256, 74 | help='input batch size for training (default: 256)') 75 | parser.add_argument('--epochs', type=int, default=100, 76 | help='number of epochs to train (default: 100)') 77 | parser.add_argument('--lr', type=float, default=0.001, 78 | help='learning rate (default: 0.001)') 79 | parser.add_argument('--decay', type=float, default=0, 80 | help='weight decay (default: 0)') 81 | parser.add_argument('--num_layer', type=int, default=5, 82 | help='number of GNN message passing layers (default: 5).') 83 | parser.add_argument('--emb_dim', type=int, default=300, 84 | help='embedding dimensions (default: 300)') 85 | parser.add_argument('--dropout_ratio', type=float, default=0, 86 | help='dropout ratio (default: 0)') 87 | parser.add_argument('--mask_rate', type=float, default=0.15, 88 | help='dropout ratio (default: 0.15)') 89 | parser.add_argument('--JK', type=str, default="last", 90 | help='how the node features are combined across layers. last, sum, max or concat') 91 | parser.add_argument('--gnn_type', type=str, default="gin") 92 | parser.add_argument('--model_file', type=str, default = '', help='filename to output the model') 93 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.") 94 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading') 95 | args = parser.parse_args() 96 | 97 | torch.manual_seed(0) 98 | np.random.seed(0) 99 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 100 | if torch.cuda.is_available(): 101 | torch.cuda.manual_seed_all(0) 102 | 103 | print("num layer: %d mask rate: %f" %(args.num_layer, args.mask_rate)) 104 | 105 | #set up dataset 106 | root_unsupervised = 'dataset/unsupervised' 107 | dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = MaskEdge(mask_rate = args.mask_rate)) 108 | 109 | print(dataset) 110 | 111 | loader = DataLoaderMasking(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 112 | 113 | 114 | #set up models, one for pre-training and one for context embeddings 115 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device) 116 | #Linear layer for classifying different edge types 117 | linear_pred_edges = torch.nn.Linear(args.emb_dim, 7).to(device) 118 | 119 | model_list = [model, linear_pred_edges] 120 | 121 | #set up optimizers 122 | optimizer_model = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 123 | optimizer_linear_pred_edges = optim.Adam(linear_pred_edges.parameters(), lr=args.lr, weight_decay=args.decay) 124 | 125 | optimizer_list = [optimizer_model, optimizer_linear_pred_edges] 126 | 127 | for epoch in range(1, args.epochs+1): 128 | print("====epoch " + str(epoch)) 129 | 130 | train_loss, train_acc = train(args, model_list, loader, optimizer_list, device) 131 | print(train_loss, train_acc) 132 | 133 | if not args.model_file == "": 134 | torch.save(model.state_dict(), args.model_file + ".pth") 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | -------------------------------------------------------------------------------- /transfer_learning/bio/pretrain_supervised.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from splitters import random_split, species_split 4 | from loader import BioDataset 5 | from torch_geometric.data import DataLoader 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | from tqdm import tqdm 13 | import numpy as np 14 | 15 | from model import GNN, GNN_graphpred 16 | from sklearn.metrics import roc_auc_score 17 | 18 | import pandas as pd 19 | 20 | from util import combine_dataset 21 | 22 | criterion = nn.BCEWithLogitsLoss() 23 | 24 | def train(args, model, device, loader, optimizer): 25 | model.train() 26 | 27 | loss_accum = 0 28 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 29 | batch = batch.to(device) 30 | pred = model(batch) 31 | y = batch.go_target_pretrain.view(pred.shape).to(torch.float64) 32 | 33 | optimizer.zero_grad() 34 | loss = criterion(pred.double(), y) 35 | loss.backward() 36 | 37 | optimizer.step() 38 | 39 | loss_accum += loss.detach().cpu() 40 | 41 | return loss_accum / (step + 1) 42 | 43 | 44 | def main(): 45 | # Training settings 46 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 47 | parser.add_argument('--device', type=int, default=0, 48 | help='which gpu to use if any (default: 0)') 49 | parser.add_argument('--batch_size', type=int, default=32, 50 | help='input batch size for training (default: 32)') 51 | parser.add_argument('--epochs', type=int, default=100, 52 | help='number of epochs to train (default: 100)') 53 | parser.add_argument('--lr', type=float, default=0.001, 54 | help='learning rate (default: 0.001)') 55 | parser.add_argument('--decay', type=float, default=0, 56 | help='weight decay (default: 0)') 57 | parser.add_argument('--num_layer', type=int, default=5, 58 | help='number of GNN message passing layers (default: 5).') 59 | parser.add_argument('--emb_dim', type=int, default=300, 60 | help='embedding dimensions (default: 300)') 61 | parser.add_argument('--dropout_ratio', type=float, default=0.2, 62 | help='dropout ratio (default: 0.2)') 63 | parser.add_argument('--graph_pooling', type=str, default="mean", 64 | help='graph level pooling (sum, mean, max, set2set, attention)') 65 | parser.add_argument('--JK', type=str, default="last", 66 | help='how the node features across layers are combined. last, sum, max or concat') 67 | parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)') 68 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model') 69 | parser.add_argument('--gnn_type', type=str, default="gin") 70 | parser.add_argument('--num_workers', type=int, default = 0, help='number of workers for dataset loading') 71 | parser.add_argument('--seed', type=int, default=42, help = "Seed for splitting dataset.") 72 | parser.add_argument('--split', type=str, default = "species", help='Random or species split') 73 | args = parser.parse_args() 74 | 75 | 76 | torch.manual_seed(0) 77 | np.random.seed(0) 78 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 79 | if torch.cuda.is_available(): 80 | torch.cuda.manual_seed_all(0) 81 | 82 | root_supervised = 'dataset/supervised' 83 | 84 | dataset = BioDataset(root_supervised, data_type='supervised') 85 | 86 | if args.split == "random": 87 | print("random splitting") 88 | train_dataset, valid_dataset, test_dataset = random_split(dataset, seed = args.seed) 89 | print(train_dataset) 90 | print(valid_dataset) 91 | pretrain_dataset = combine_dataset(train_dataset, valid_dataset) 92 | print(pretrain_dataset) 93 | elif args.split == "species": 94 | print("species splitting") 95 | trainval_dataset, test_dataset = species_split(dataset) 96 | test_dataset_broad, test_dataset_none, _ = random_split(test_dataset, seed = args.seed, frac_train=0.5, frac_valid=0.5, frac_test=0) 97 | print(trainval_dataset) 98 | print(test_dataset_broad) 99 | pretrain_dataset = combine_dataset(trainval_dataset, test_dataset_broad) 100 | print(pretrain_dataset) 101 | #train_dataset, valid_dataset, _ = random_split(trainval_dataset, seed = args.seed, frac_train=0.85, frac_valid=0.15, frac_test=0) 102 | else: 103 | raise ValueError("Unknown split name.") 104 | 105 | 106 | train_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 107 | 108 | num_tasks = len(pretrain_dataset[0].go_target_pretrain) 109 | 110 | #set up model 111 | model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type) 112 | if not args.input_model_file == "": 113 | model.from_pretrained(args.input_model_file + ".pth") 114 | 115 | model.to(device) 116 | 117 | #set up optimizer 118 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 119 | print(optimizer) 120 | 121 | for epoch in range(1, args.epochs+1): 122 | print("====epoch " + str(epoch)) 123 | 124 | train_loss = train(args, model, device, train_loader, optimizer) 125 | 126 | if not args.output_model_file == "": 127 | torch.save(model.gnn.state_dict(), args.output_model_file + ".pth") 128 | 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /transfer_learning/bio/splitters.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | 5 | def random_split(dataset, frac_train=0.8, frac_valid=0.1, frac_test=0.1, 6 | seed=0): 7 | """ 8 | Adapted from graph-pretrain 9 | :param dataset: 10 | :param task_idx: 11 | :param null_value: 12 | :param frac_train: 13 | :param frac_valid: 14 | :param frac_test: 15 | :param seed: 16 | :return: train, valid, test slices of the input dataset obj. 17 | """ 18 | np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0) 19 | 20 | num_mols = len(dataset) 21 | random.seed(seed) 22 | all_idx = list(range(num_mols)) 23 | random.shuffle(all_idx) 24 | 25 | train_idx = all_idx[:int(frac_train * num_mols)] 26 | valid_idx = all_idx[int(frac_train * num_mols):int(frac_valid * num_mols) 27 | + int(frac_train * num_mols)] 28 | test_idx = all_idx[int(frac_valid * num_mols) + int(frac_train * num_mols):] 29 | 30 | assert len(set(train_idx).intersection(set(valid_idx))) == 0 31 | assert len(set(valid_idx).intersection(set(test_idx))) == 0 32 | assert len(train_idx) + len(valid_idx) + len(test_idx) == num_mols 33 | 34 | train_dataset = dataset[torch.tensor(train_idx)] 35 | valid_dataset = dataset[torch.tensor(valid_idx)] 36 | if frac_test == 0: 37 | test_dataset = None 38 | else: 39 | test_dataset = dataset[torch.tensor(test_idx)] 40 | 41 | return train_dataset, valid_dataset, test_dataset 42 | 43 | def species_split(dataset, train_valid_species_id_list=[3702, 6239, 511145, 44 | 7227, 10090, 4932, 7955], 45 | test_species_id_list=[9606]): 46 | """ 47 | Split dataset based on species_id attribute 48 | :param dataset: 49 | :param train_valid_species_id_list: 50 | :param test_species_id_list: 51 | :return: train_valid dataset, test dataset 52 | """ 53 | # NB: pytorch geometric dataset object can be indexed using slices or 54 | # byte tensors. We will use byte tensors here 55 | 56 | train_valid_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8) 57 | for id in train_valid_species_id_list: 58 | train_valid_byte_tensor += (dataset.data.species_id == id) 59 | 60 | test_species_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8) 61 | for id in test_species_id_list: 62 | test_species_byte_tensor += (dataset.data.species_id == id) 63 | 64 | assert ((train_valid_byte_tensor + test_species_byte_tensor) == 1).all() 65 | 66 | train_valid_dataset = dataset[train_valid_byte_tensor] 67 | test_valid_dataset = dataset[test_species_byte_tensor] 68 | 69 | return train_valid_dataset, test_valid_dataset 70 | 71 | if __name__ == "__main__": 72 | from collections import Counter 73 | -------------------------------------------------------------------------------- /transfer_learning/chem/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from torch.utils.data.dataloader import default_collate 3 | 4 | from batch import BatchSubstructContext, BatchMasking, BatchAE 5 | 6 | class DataLoaderSubstructContext(torch.utils.data.DataLoader): 7 | r"""Data loader which merges data objects from a 8 | :class:`torch_geometric.data.dataset` to a mini-batch. 9 | Args: 10 | dataset (Dataset): The dataset from which to load the data. 11 | batch_size (int, optional): How may samples per batch to load. 12 | (default: :obj:`1`) 13 | shuffle (bool, optional): If set to :obj:`True`, the data will be 14 | reshuffled at every epoch (default: :obj:`True`) 15 | """ 16 | 17 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 18 | super(DataLoaderSubstructContext, self).__init__( 19 | dataset, 20 | batch_size, 21 | shuffle, 22 | collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list), 23 | **kwargs) 24 | 25 | class DataLoaderMasking(torch.utils.data.DataLoader): 26 | r"""Data loader which merges data objects from a 27 | :class:`torch_geometric.data.dataset` to a mini-batch. 28 | Args: 29 | dataset (Dataset): The dataset from which to load the data. 30 | batch_size (int, optional): How may samples per batch to load. 31 | (default: :obj:`1`) 32 | shuffle (bool, optional): If set to :obj:`True`, the data will be 33 | reshuffled at every epoch (default: :obj:`True`) 34 | """ 35 | 36 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 37 | super(DataLoaderMasking, self).__init__( 38 | dataset, 39 | batch_size, 40 | shuffle, 41 | collate_fn=lambda data_list: BatchMasking.from_data_list(data_list), 42 | **kwargs) 43 | 44 | 45 | class DataLoaderAE(torch.utils.data.DataLoader): 46 | r"""Data loader which merges data objects from a 47 | :class:`torch_geometric.data.dataset` to a mini-batch. 48 | Args: 49 | dataset (Dataset): The dataset from which to load the data. 50 | batch_size (int, optional): How may samples per batch to load. 51 | (default: :obj:`1`) 52 | shuffle (bool, optional): If set to :obj:`True`, the data will be 53 | reshuffled at every epoch (default: :obj:`True`) 54 | """ 55 | 56 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 57 | super(DataLoaderAE, self).__init__( 58 | dataset, 59 | batch_size, 60 | shuffle, 61 | collate_fn=lambda data_list: BatchAE.from_data_list(data_list), 62 | **kwargs) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /transfer_learning/chem/finetune.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | split=scaffold 3 | dataset=$1 4 | 5 | CUDA_VISIBLE_DEVICES=0 6 | for runseed in 0 1 2 3 4 5 6 7 8 9 7 | do 8 | model_file=${unsup} 9 | python finetune.py --input_model_file models_simgrace/simgrace_80.pth --split $split --runseed $runseed --gnn_type gin --dataset $dataset --lr 1e-3 --epochs 100 10 | done 11 | -------------------------------------------------------------------------------- /transfer_learning/chem/finetune_tune.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | runseed=$1 3 | device=$2 4 | split=scaffold 5 | 6 | ### for GIN 7 | for dataset in bbbp sider toxcast 8 | do 9 | for unsup in contextpred infomax edgepred masking 10 | do 11 | model_file=${unsup} 12 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset 13 | 14 | model_file=supervised_${unsup} 15 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset 16 | done 17 | 18 | python finetune.py --split $split --filename ${dataset}/gin_nopretrain --device $device --runseed $runseed --gnn_type gin --dataset $dataset 19 | python finetune.py --split $split --input_model_file model_gin/supervised.pth --filename ${dataset}/gin_supervised --device $device --runseed $runseed --gnn_type gin --dataset $dataset 20 | 21 | 22 | ### for other GNNs 23 | for gnn_type in gcn gat graphsage 24 | do 25 | python finetune.py --split $split --filename ${dataset}/${gnn_type}_nopretrain --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset 26 | 27 | model_file=${gnn_type}_supervised_contextpred 28 | python finetune.py --input_model_file model_architecture/${model_file}.pth --split $split --filename ${dataset}/${model_file} --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset 29 | 30 | done 31 | done 32 | 33 | 34 | fold_idx=$1 35 | 36 | for batch_size in 8 64 37 | do 38 | for drop_ratio in 0 0.2 0.5 39 | do 40 | for dataset in ptc_mr mutag 41 | do 42 | for unsup in contextpred edgepred masking infomax 43 | do 44 | 45 | model_file=${unsup} 46 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size 47 | 48 | 49 | model_file=supervised_${unsup} 50 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size 51 | 52 | done 53 | 54 | model_file=supervised 55 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size 56 | 57 | python finetune_mutag_ptc.py --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/nopretrain --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size 58 | 59 | done 60 | done 61 | done -------------------------------------------------------------------------------- /transfer_learning/chem/models_simgrace/simgrace_100.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_100.pth -------------------------------------------------------------------------------- /transfer_learning/chem/models_simgrace/simgrace_20.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_20.pth -------------------------------------------------------------------------------- /transfer_learning/chem/models_simgrace/simgrace_40.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_40.pth -------------------------------------------------------------------------------- /transfer_learning/chem/models_simgrace/simgrace_60.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_60.pth -------------------------------------------------------------------------------- /transfer_learning/chem/models_simgrace/simgrace_80.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_80.pth -------------------------------------------------------------------------------- /transfer_learning/chem/parse_result.py: -------------------------------------------------------------------------------- 1 | ### Parsing the result! 2 | import tensorflow as tf 3 | import os 4 | import numpy as np 5 | import pickle 6 | 7 | def get_test_acc(event_file): 8 | val_auc_list = np.zeros(100) 9 | test_auc_list = np.zeros(100) 10 | for e in list(tf.train.summary_iterator(event_file)): 11 | if len(e.summary.value) == 0: 12 | continue 13 | if e.summary.value[0].tag == "data/val_auc": 14 | val_auc_list[e.step-1] = e.summary.value[0].simple_value 15 | if e.summary.value[0].tag == "data/test_auc": 16 | test_auc_list[e.step-1] = e.summary.value[0].simple_value 17 | 18 | best_epoch = np.argmax(val_auc_list) 19 | 20 | return test_auc_list[best_epoch] 21 | 22 | if __name__ == "__main__": 23 | 24 | dataset_list = ["bbbp", "sider", "toxcast"] 25 | #10 random seed 26 | seed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 27 | config_list = [] 28 | 29 | config_list.append("gin_nopretrain") 30 | config_list.append("gin_infomax") 31 | config_list.append("gin_edgepred") 32 | config_list.append("gin_masking") 33 | config_list.append("gin_contextpred") 34 | config_list.append("gin_supervised") 35 | config_list.append("gin_supervised_infomax") 36 | config_list.append("gin_supervised_edgepred") 37 | config_list.append("gin_supervised_masking") 38 | config_list.append("gin_supervised_contextpred") 39 | config_list.append("gcn_nopretrain") 40 | config_list.append("gcn_supervised_contextpred") 41 | config_list.append("graphsage_nopretrain") 42 | config_list.append("graphsage_supervised_contextpred") 43 | config_list.append("gat_nopretrain") 44 | config_list.append("gat_supervised_contextpred") 45 | 46 | result_mat = np.zeros((len(seed_list), len(config_list), len(dataset_list))) 47 | 48 | for i, seed in enumerate(seed_list): 49 | for j, config in enumerate(config_list): 50 | for k, dataset in enumerate(dataset_list): 51 | dir_name = "runs/finetune_cls_runseed" + str(seed) + "/" + dataset + "/" + config 52 | print(dir_name) 53 | file_in_dir = os.listdir(dir_name) 54 | event_file_list = [] 55 | for f in file_in_dir: 56 | if "events" in f: 57 | event_file_list.append(f) 58 | 59 | event_file = event_file_list[0] 60 | 61 | result_mat[i, j, k] = get_test_acc(dir_name + "/" + event_file) 62 | 63 | with open("result_summary", "wb") as f: 64 | pickle.dump({"result_mat": result_mat, "seed_list": seed_list, "config_list": config_list, "dataset_list": dataset_list}, f) 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /transfer_learning/chem/pretrain_deepgraphinfomax.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import MoleculeDataset 4 | from torch_geometric.data import DataLoader 5 | from torch_geometric.nn.inits import uniform 6 | from torch_geometric.nn import global_mean_pool 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | import torch.optim as optim 12 | 13 | from tqdm import tqdm 14 | import numpy as np 15 | 16 | from model import GNN 17 | from sklearn.metrics import roc_auc_score 18 | 19 | from splitters import scaffold_split, random_split, random_scaffold_split 20 | import pandas as pd 21 | 22 | from tensorboardX import SummaryWriter 23 | 24 | 25 | def cycle_index(num, shift): 26 | arr = torch.arange(num) + shift 27 | arr[-shift:] = torch.arange(shift) 28 | return arr 29 | 30 | class Discriminator(nn.Module): 31 | def __init__(self, hidden_dim): 32 | super(Discriminator, self).__init__() 33 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim)) 34 | self.reset_parameters() 35 | 36 | def reset_parameters(self): 37 | size = self.weight.size(0) 38 | uniform(size, self.weight) 39 | 40 | def forward(self, x, summary): 41 | h = torch.matmul(summary, self.weight) 42 | return torch.sum(x*h, dim = 1) 43 | 44 | class Infomax(nn.Module): 45 | def __init__(self, gnn, discriminator): 46 | super(Infomax, self).__init__() 47 | self.gnn = gnn 48 | self.discriminator = discriminator 49 | self.loss = nn.BCEWithLogitsLoss() 50 | self.pool = global_mean_pool 51 | 52 | 53 | def train(args, model, device, loader, optimizer): 54 | model.train() 55 | 56 | train_acc_accum = 0 57 | train_loss_accum = 0 58 | 59 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 60 | batch = batch.to(device) 61 | node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr) 62 | summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch)) 63 | 64 | positive_expanded_summary_emb = summary_emb[batch.batch] 65 | 66 | shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)] 67 | negative_expanded_summary_emb = shifted_summary_emb[batch.batch] 68 | 69 | positive_score = model.discriminator(node_emb, positive_expanded_summary_emb) 70 | negative_score = model.discriminator(node_emb, negative_expanded_summary_emb) 71 | 72 | optimizer.zero_grad() 73 | loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score)) 74 | loss.backward() 75 | 76 | optimizer.step() 77 | 78 | train_loss_accum += float(loss.detach().cpu().item()) 79 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score)) 80 | train_acc_accum += float(acc.detach().cpu().item()) 81 | 82 | return train_acc_accum/step, train_loss_accum/step 83 | 84 | 85 | def main(): 86 | # Training settings 87 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 88 | parser.add_argument('--device', type=int, default=0, 89 | help='which gpu to use if any (default: 0)') 90 | parser.add_argument('--batch_size', type=int, default=256, 91 | help='input batch size for training (default: 256)') 92 | parser.add_argument('--epochs', type=int, default=100, 93 | help='number of epochs to train (default: 100)') 94 | parser.add_argument('--lr', type=float, default=0.001, 95 | help='learning rate (default: 0.001)') 96 | parser.add_argument('--decay', type=float, default=0, 97 | help='weight decay (default: 0)') 98 | parser.add_argument('--num_layer', type=int, default=5, 99 | help='number of GNN message passing layers (default: 5).') 100 | parser.add_argument('--emb_dim', type=int, default=300, 101 | help='embedding dimensions (default: 300)') 102 | parser.add_argument('--dropout_ratio', type=float, default=0, 103 | help='dropout ratio (default: 0)') 104 | parser.add_argument('--JK', type=str, default="last", 105 | help='how the node features across layers are combined. last, sum, max or concat') 106 | parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.') 107 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model') 108 | parser.add_argument('--gnn_type', type=str, default="gin") 109 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.") 110 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading') 111 | args = parser.parse_args() 112 | 113 | 114 | torch.manual_seed(0) 115 | np.random.seed(0) 116 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 117 | if torch.cuda.is_available(): 118 | torch.cuda.manual_seed_all(0) 119 | 120 | 121 | #set up dataset 122 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset) 123 | 124 | print(dataset) 125 | 126 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 127 | 128 | #set up model 129 | gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type) 130 | 131 | discriminator = Discriminator(args.emb_dim) 132 | 133 | model = Infomax(gnn, discriminator) 134 | 135 | model.to(device) 136 | 137 | #set up optimizer 138 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 139 | print(optimizer) 140 | 141 | for epoch in range(1, args.epochs+1): 142 | print("====epoch " + str(epoch)) 143 | 144 | train_acc, train_loss = train(args, model, device, loader, optimizer) 145 | 146 | print(train_acc) 147 | print(train_loss) 148 | 149 | 150 | if not args.output_model_file == "": 151 | torch.save(gnn.state_dict(), args.output_model_file + ".pth") 152 | 153 | if __name__ == "__main__": 154 | main() 155 | -------------------------------------------------------------------------------- /transfer_learning/chem/pretrain_edgepred.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import MoleculeDataset 4 | from dataloader import DataLoaderAE 5 | from util import NegativeEdge 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | 12 | from tqdm import tqdm 13 | import numpy as np 14 | 15 | from model import GNN, GNN_graphpred 16 | from sklearn.metrics import roc_auc_score 17 | 18 | from splitters import scaffold_split, random_split, random_scaffold_split 19 | import pandas as pd 20 | 21 | from tensorboardX import SummaryWriter 22 | 23 | criterion = nn.BCEWithLogitsLoss() 24 | 25 | def train(args, model, device, loader, optimizer): 26 | model.train() 27 | 28 | train_acc_accum = 0 29 | train_loss_accum = 0 30 | 31 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 32 | batch = batch.to(device) 33 | node_emb = model(batch.x, batch.edge_index, batch.edge_attr) 34 | 35 | positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1) 36 | negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1) 37 | 38 | optimizer.zero_grad() 39 | loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score)) 40 | loss.backward() 41 | optimizer.step() 42 | 43 | train_loss_accum += float(loss.detach().cpu().item()) 44 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score)) 45 | train_acc_accum += float(acc.detach().cpu().item()) 46 | 47 | return train_acc_accum/step, train_loss_accum/step 48 | 49 | 50 | def main(): 51 | # Training settings 52 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 53 | parser.add_argument('--device', type=int, default=0, 54 | help='which gpu to use if any (default: 0)') 55 | parser.add_argument('--batch_size', type=int, default=256, 56 | help='input batch size for training (default: 256)') 57 | parser.add_argument('--epochs', type=int, default=100, 58 | help='number of epochs to train (default: 100)') 59 | parser.add_argument('--lr', type=float, default=0.001, 60 | help='learning rate (default: 0.001)') 61 | parser.add_argument('--decay', type=float, default=0, 62 | help='weight decay (default: 0)') 63 | parser.add_argument('--num_layer', type=int, default=5, 64 | help='number of GNN message passing layers (default: 5).') 65 | parser.add_argument('--emb_dim', type=int, default=300, 66 | help='embedding dimensions (default: 300)') 67 | parser.add_argument('--dropout_ratio', type=float, default=0, 68 | help='dropout ratio (default: 0)') 69 | parser.add_argument('--JK', type=str, default="last", 70 | help='how the node features across layers are combined. last, sum, max or concat') 71 | parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.') 72 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model') 73 | parser.add_argument('--gnn_type', type=str, default="gin") 74 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading') 75 | args = parser.parse_args() 76 | 77 | 78 | torch.manual_seed(0) 79 | np.random.seed(0) 80 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 81 | if torch.cuda.is_available(): 82 | torch.cuda.manual_seed_all(0) 83 | 84 | #set up dataset 85 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset, transform = NegativeEdge()) 86 | 87 | print(dataset[0]) 88 | 89 | loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 90 | 91 | #set up model 92 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type) 93 | 94 | model.to(device) 95 | 96 | #set up optimizer 97 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 98 | print(optimizer) 99 | 100 | for epoch in range(1, args.epochs+1): 101 | print("====epoch " + str(epoch)) 102 | 103 | train_acc, train_loss = train(args, model, device, loader, optimizer) 104 | 105 | print(train_acc) 106 | print(train_loss) 107 | 108 | if not args.output_model_file == "": 109 | torch.save(model.state_dict(), args.output_model_file + ".pth") 110 | 111 | if __name__ == "__main__": 112 | main() 113 | -------------------------------------------------------------------------------- /transfer_learning/chem/pretrain_supervised.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from loader import MoleculeDataset 4 | from torch_geometric.data import DataLoader 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | 11 | from tqdm import tqdm 12 | import numpy as np 13 | 14 | from model import GNN, GNN_graphpred 15 | # from model_extra import GNN, GNN_graphpred 16 | from sklearn.metrics import roc_auc_score 17 | 18 | from splitters import scaffold_split, random_split, random_scaffold_split 19 | import pandas as pd 20 | 21 | from tensorboardX import SummaryWriter 22 | 23 | criterion = nn.BCEWithLogitsLoss(reduction = "none") 24 | 25 | def train(args, model, device, loader, optimizer): 26 | model.train() 27 | 28 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 29 | batch = batch.to(device) 30 | pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch) 31 | y = batch.y.view(pred.shape).to(torch.float64) 32 | 33 | #Whether y is non-null or not. 34 | is_valid = y**2 > 0 35 | #Loss matrix 36 | loss_mat = criterion(pred.double(), (y+1)/2) 37 | #loss matrix after removing null target 38 | loss_mat = torch.where(is_valid, loss_mat, torch.zeros(loss_mat.shape).to(loss_mat.device).to(loss_mat.dtype)) 39 | 40 | optimizer.zero_grad() 41 | loss = torch.sum(loss_mat)/torch.sum(is_valid) 42 | loss.backward() 43 | 44 | optimizer.step() 45 | 46 | 47 | def eval(args, model, device, loader, normalized_weight): 48 | model.eval() 49 | y_true = [] 50 | y_scores = [] 51 | 52 | for step, batch in enumerate(tqdm(loader, desc="Iteration")): 53 | batch = batch.to(device) 54 | 55 | with torch.no_grad(): 56 | pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch) 57 | 58 | y_true.append(batch.y.view(pred.shape).cpu()) 59 | y_scores.append(pred.cpu()) 60 | 61 | y_true = torch.cat(y_true, dim = 0).numpy() 62 | y_scores = torch.cat(y_scores, dim = 0).numpy() 63 | 64 | roc_list = [] 65 | weight = [] 66 | for i in range(y_true.shape[1]): 67 | #AUC is only defined when there is at least one positive data. 68 | if np.sum(y_true[:,i] == 1) > 0 and np.sum(y_true[:,i] == -1) > 0: 69 | is_valid = y_true[:,i]**2 > 0 70 | roc_list.append(roc_auc_score((y_true[is_valid,i] + 1)/2, y_scores[is_valid,i])) 71 | weight.append(normalized_weight[i]) 72 | 73 | if len(roc_list) < y_true.shape[1]: 74 | print("Some target is missing!") 75 | print("Missing ratio: %f" %(1 - float(len(roc_list))/y_true.shape[1])) 76 | 77 | weight = np.array(weight) 78 | roc_list = np.array(roc_list) 79 | 80 | return weight.dot(roc_list) 81 | 82 | 83 | def main(): 84 | # Training settings 85 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 86 | parser.add_argument('--device', type=int, default=0, 87 | help='which gpu to use if any (default: 0)') 88 | parser.add_argument('--batch_size', type=int, default=32, 89 | help='input batch size for training (default: 32)') 90 | parser.add_argument('--epochs', type=int, default=100, 91 | help='number of epochs to train (default: 100)') 92 | parser.add_argument('--lr', type=float, default=0.001, 93 | help='learning rate (default: 0.001)') 94 | parser.add_argument('--decay', type=float, default=0, 95 | help='weight decay (default: 0)') 96 | parser.add_argument('--num_layer', type=int, default=5, 97 | help='number of GNN message passing layers (default: 5).') 98 | parser.add_argument('--emb_dim', type=int, default=300, 99 | help='embedding dimensions (default: 300)') 100 | parser.add_argument('--dropout_ratio', type=float, default=0.2, 101 | help='dropout ratio (default: 0.2)') 102 | parser.add_argument('--graph_pooling', type=str, default="mean", 103 | help='graph level pooling (sum, mean, max, set2set, attention)') 104 | parser.add_argument('--JK', type=str, default="last", 105 | help='how the node features across layers are combined. last, sum, max or concat') 106 | parser.add_argument('--dataset', type=str, default = 'chembl_filtered', help='root directory of dataset. For now, only classification.') 107 | parser.add_argument('--gnn_type', type=str, default="gin") 108 | parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)') 109 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model') 110 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading') 111 | args = parser.parse_args() 112 | 113 | 114 | torch.manual_seed(0) 115 | np.random.seed(0) 116 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 117 | if torch.cuda.is_available(): 118 | torch.cuda.manual_seed_all(0) 119 | 120 | #Bunch of classification tasks 121 | if args.dataset == "chembl_filtered": 122 | num_tasks = 1310 123 | else: 124 | raise ValueError("Invalid dataset name.") 125 | 126 | #set up dataset 127 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset) 128 | 129 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) 130 | 131 | #set up model 132 | model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type) 133 | if not args.input_model_file == "": 134 | model.from_pretrained(args.input_model_file + ".pth") 135 | 136 | model.to(device) 137 | 138 | #set up optimizer 139 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) 140 | print(optimizer) 141 | 142 | 143 | for epoch in range(1, args.epochs+1): 144 | print("====epoch " + str(epoch)) 145 | 146 | train(args, model, device, loader, optimizer) 147 | 148 | if not args.output_model_file == "": 149 | torch.save(model.gnn.state_dict(), args.output_model_file + ".pth") 150 | 151 | 152 | if __name__ == "__main__": 153 | main() 154 | -------------------------------------------------------------------------------- /transfer_learning/chem/run.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | 3 | nohup ./finetune.sh bbbp > log_bbbp & 4 | nohup ./finetune.sh sider > log_sider & 5 | nohup ./finetune.sh toxcast > log_toxcast & -------------------------------------------------------------------------------- /unsupervised_TU/Accuracy.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/Accuracy.txt -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/arguments.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/arguments.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/aug.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/aug.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/gin.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/gin.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/losses.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/losses.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def arg_parse(): 4 | parser = argparse.ArgumentParser(description='GcnInformax Arguments.') 5 | parser.add_argument('--DS', dest='DS', default='NCI1', help='NCI1,PTC_MR,IMDB-BINARY,IMDB-MULTI,REDDIT-BINARY') 6 | parser.add_argument('--local', dest='local', action='store_const', 7 | const=True, default=False) 8 | parser.add_argument('--glob', dest='glob', action='store_const', 9 | const=True, default=False) 10 | parser.add_argument('--prior', dest='prior', action='store_const', 11 | const=True, default=False) 12 | parser.add_argument('--device', default='cuda:6', type=str, help='gpu device ids') 13 | parser.add_argument('--lr', dest='lr', type=float, default= 0.01, 14 | help='Learning rate.') 15 | parser.add_argument('--alpha', default=1.2, type=float, help='stregnth for regularization') 16 | parser.add_argument('--num-gc-layers', dest='num_gc_layers', type=int, default=5, 17 | help='Number of graph convolution layers before each pooling') 18 | parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=32, help='') 19 | parser.add_argument('--seed', type=int, default=0) 20 | parser.add_argument('--epochs', type=int, default=20) 21 | # Random 22 | parser.add_argument('--eta', type=float, default=1.0, help='0.1, 1.0, 10, 100, 1000') 23 | parser.add_argument('--batch_size', type=int, default=128, help='128, 256, 512, 1024') 24 | 25 | return parser.parse_args() 26 | 27 | -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/configs/convnets.py: -------------------------------------------------------------------------------- 1 | '''Basic convnet hyperparameters. 2 | 3 | conv_args are in format (dim_h, f_size, stride, pad batch_norm, dropout, nonlinearity, pool) 4 | fc_args are in format (dim_h, batch_norm, dropout, nonlinearity) 5 | 6 | ''' 7 | 8 | from cortex_DIM.nn_modules.encoder import ConvnetEncoder, FoldedConvnetEncoder 9 | 10 | 11 | # Basic DCGAN-like encoders 12 | 13 | _basic28x28 = dict( 14 | Encoder=ConvnetEncoder, 15 | conv_args=[(64, 5, 2, 2, True, False, 'ReLU', None), 16 | (128, 5, 2, 2, True, False, 'ReLU', None)], 17 | fc_args=[(1024, True, False, 'ReLU', None)], 18 | local_idx=1, 19 | fc_idx=0 20 | ) 21 | 22 | _basic32x32 = dict( 23 | Encoder=ConvnetEncoder, 24 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None), 25 | (128, 4, 2, 1, True, False, 'ReLU', None), 26 | (256, 4, 2, 1, True, False, 'ReLU', None)], 27 | fc_args=[(1024, True, False, 'ReLU')], 28 | local_idx=1, 29 | conv_idx=2, 30 | fc_idx=0 31 | ) 32 | 33 | _basic64x64 = dict( 34 | Encoder=ConvnetEncoder, 35 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None), 36 | (128, 4, 2, 1, True, False, 'ReLU', None), 37 | (256, 4, 2, 1, True, False, 'ReLU', None), 38 | (512, 4, 2, 1, True, False, 'ReLU', None)], 39 | fc_args=[(1024, True, False, 'ReLU')], 40 | local_idx=2, 41 | conv_idx=3, 42 | fc_idx=0 43 | ) 44 | 45 | # Alexnet-like encoders 46 | 47 | _alex64x64 = dict( 48 | Encoder=ConvnetEncoder, 49 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 50 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 51 | (384, 3, 1, 1, True, False, 'ReLU', None), 52 | (384, 3, 1, 1, True, False, 'ReLU', None), 53 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))], 54 | fc_args=[(4096, True, False, 'ReLU'), 55 | (4096, True, False, 'ReLU')], 56 | local_idx=2, 57 | conv_idx=4, 58 | fc_idx=1 59 | ) 60 | 61 | _foldalex64x64 = dict( 62 | Encoder=FoldedConvnetEncoder, 63 | crop_size=16, 64 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 65 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 66 | (384, 3, 1, 1, True, False, 'ReLU', None), 67 | (384, 3, 1, 1, True, False, 'ReLU', None), 68 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))], 69 | fc_args=[(4096, True, False, 'ReLU'), 70 | (4096, True, False, 'ReLU')], 71 | local_idx=4, 72 | fc_idx=1 73 | ) 74 | 75 | _foldmultialex64x64 = dict( 76 | Encoder=FoldedConvnetEncoder, 77 | crop_size=16, 78 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 79 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 80 | (384, 3, 1, 1, True, False, 'ReLU', None), 81 | (384, 3, 1, 1, True, False, 'ReLU', None), 82 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 83 | (192, 3, 1, 0, True, False, 'ReLU', None), 84 | (192, 1, 1, 0, True, False, 'ReLU', None)], 85 | fc_args=[(4096, True, False, 'ReLU')], 86 | local_idx=4, 87 | multi_idx=6, 88 | fc_idx=1 89 | ) 90 | 91 | configs = dict( 92 | basic28x28=_basic28x28, 93 | basic32x32=_basic32x32, 94 | basic64x64=_basic64x64, 95 | alex64x64=_alex64x64, 96 | foldalex64x64=_foldalex64x64, 97 | foldmultialex64x64=_foldmultialex64x64 98 | ) -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/configs/resnets.py: -------------------------------------------------------------------------------- 1 | """Configurations for ResNets 2 | 3 | """ 4 | 5 | from cortex_DIM.nn_modules.encoder import ResnetEncoder, FoldedResnetEncoder 6 | 7 | 8 | _resnet19_32x32 = dict( 9 | Encoder=ResnetEncoder, 10 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)], 11 | res_args=[ 12 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 13 | (64, 3, 1, 1, True, False, 'ReLU', None), 14 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 15 | 1), 16 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 17 | (64, 3, 1, 1, True, False, 'ReLU', None), 18 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 19 | 1), 20 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 21 | (128, 3, 2, 1, True, False, 'ReLU', None), 22 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 23 | 1), 24 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 25 | (128, 3, 1, 1, True, False, 'ReLU', None), 26 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 27 | 1), 28 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 29 | (256, 3, 2, 1, True, False, 'ReLU', None), 30 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 31 | 1), 32 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 33 | (256, 3, 1, 1, True, False, 'ReLU', None), 34 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 35 | 1) 36 | ], 37 | fc_args=[(1024, True, False, 'ReLU')], 38 | local_idx=4, 39 | fc_idx=0 40 | ) 41 | 42 | _foldresnet19_32x32 = dict( 43 | Encoder=FoldedResnetEncoder, 44 | crop_size=8, 45 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)], 46 | res_args=[ 47 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 48 | (64, 3, 1, 1, True, False, 'ReLU', None), 49 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 50 | 1), 51 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 52 | (64, 3, 1, 1, True, False, 'ReLU', None), 53 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 54 | 1), 55 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 56 | (128, 3, 2, 1, True, False, 'ReLU', None), 57 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 58 | 1), 59 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 60 | (128, 3, 1, 1, True, False, 'ReLU', None), 61 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 62 | 1), 63 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 64 | (256, 3, 2, 1, True, False, 'ReLU', None), 65 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 66 | 1), 67 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 68 | (256, 3, 1, 1, True, False, 'ReLU', None), 69 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 70 | 1) 71 | ], 72 | fc_args=[(1024, True, False, 'ReLU')], 73 | local_idx=6, 74 | fc_idx=0 75 | ) 76 | 77 | _resnet34_32x32 = dict( 78 | Encoder=ResnetEncoder, 79 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)], 80 | res_args=[ 81 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 82 | (64, 3, 1, 1, True, False, 'ReLU', None), 83 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 84 | 1), 85 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 86 | (64, 3, 1, 1, True, False, 'ReLU', None), 87 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 88 | 2), 89 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 90 | (128, 3, 2, 1, True, False, 'ReLU', None), 91 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 92 | 1), 93 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 94 | (128, 3, 1, 1, True, False, 'ReLU', None), 95 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 96 | 5), 97 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 98 | (256, 3, 2, 1, True, False, 'ReLU', None), 99 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 100 | 1), 101 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 102 | (256, 3, 1, 1, True, False, 'ReLU', None), 103 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 104 | 2) 105 | ], 106 | fc_args=[(1024, True, False, 'ReLU')], 107 | local_idx=2, 108 | fc_idx=0 109 | ) 110 | 111 | _foldresnet34_32x32 = dict( 112 | Encoder=FoldedResnetEncoder, 113 | crop_size=8, 114 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)], 115 | res_args=[ 116 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 117 | (64, 3, 1, 1, True, False, 'ReLU', None), 118 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 119 | 1), 120 | ([(64, 1, 1, 0, True, False, 'ReLU', None), 121 | (64, 3, 1, 1, True, False, 'ReLU', None), 122 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)], 123 | 2), 124 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 125 | (128, 3, 2, 1, True, False, 'ReLU', None), 126 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 127 | 1), 128 | ([(128, 1, 1, 0, True, False, 'ReLU', None), 129 | (128, 3, 1, 1, True, False, 'ReLU', None), 130 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)], 131 | 5), 132 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 133 | (256, 3, 2, 1, True, False, 'ReLU', None), 134 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 135 | 1), 136 | ([(256, 1, 1, 0, True, False, 'ReLU', None), 137 | (256, 3, 1, 1, True, False, 'ReLU', None), 138 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)], 139 | 2) 140 | ], 141 | fc_args=[(1024, True, False, 'ReLU')], 142 | local_idx=12, 143 | fc_idx=0 144 | ) 145 | 146 | configs = dict( 147 | resnet19_32x32=_resnet19_32x32, 148 | resnet34_32x32=_resnet34_32x32, 149 | foldresnet19_32x32=_foldresnet19_32x32, 150 | foldresnet34_32x32=_foldresnet34_32x32 151 | ) -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/functions/gan_losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import math 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from cortex_DIM.functions.misc import log_sum_exp 11 | 12 | 13 | def raise_measure_error(measure): 14 | supported_measures = ['GAN', 'JSD', 'X2', 'KL', 'RKL', 'DV', 'H2', 'W1'] 15 | raise NotImplementedError( 16 | 'Measure `{}` not supported. Supported: {}'.format(measure, 17 | supported_measures)) 18 | 19 | 20 | def get_positive_expectation(p_samples, measure, average=True): 21 | """Computes the positive part of a divergence / difference. 22 | 23 | Args: 24 | p_samples: Positive samples. 25 | measure: Measure to compute for. 26 | average: Average the result over samples. 27 | 28 | Returns: 29 | torch.Tensor 30 | 31 | """ 32 | log_2 = math.log(2.) 33 | 34 | if measure == 'GAN': 35 | Ep = - F.softplus(-p_samples) 36 | elif measure == 'JSD': 37 | Ep = log_2 - F.softplus(- p_samples) 38 | elif measure == 'X2': 39 | Ep = p_samples ** 2 40 | elif measure == 'KL': 41 | Ep = p_samples + 1. 42 | elif measure == 'RKL': 43 | Ep = -torch.exp(-p_samples) 44 | elif measure == 'DV': 45 | Ep = p_samples 46 | elif measure == 'H2': 47 | Ep = 1. - torch.exp(-p_samples) 48 | elif measure == 'W1': 49 | Ep = p_samples 50 | else: 51 | raise_measure_error(measure) 52 | 53 | if average: 54 | return Ep.mean() 55 | else: 56 | return Ep 57 | 58 | 59 | def get_negative_expectation(q_samples, measure, average=True): 60 | """Computes the negative part of a divergence / difference. 61 | 62 | Args: 63 | q_samples: Negative samples. 64 | measure: Measure to compute for. 65 | average: Average the result over samples. 66 | 67 | Returns: 68 | torch.Tensor 69 | 70 | """ 71 | log_2 = math.log(2.) 72 | 73 | if measure == 'GAN': 74 | Eq = F.softplus(-q_samples) + q_samples 75 | elif measure == 'JSD': 76 | Eq = F.softplus(-q_samples) + q_samples - log_2 77 | elif measure == 'X2': 78 | Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2) 79 | elif measure == 'KL': 80 | Eq = torch.exp(q_samples) 81 | elif measure == 'RKL': 82 | Eq = q_samples - 1. 83 | elif measure == 'DV': 84 | Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0)) 85 | elif measure == 'H2': 86 | Eq = torch.exp(q_samples) - 1. 87 | elif measure == 'W1': 88 | Eq = q_samples 89 | else: 90 | raise_measure_error(measure) 91 | 92 | if average: 93 | return Eq.mean() 94 | else: 95 | return Eq -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/functions/misc.py: -------------------------------------------------------------------------------- 1 | """Miscilaneous functions. 2 | 3 | """ 4 | 5 | import torch 6 | 7 | 8 | def log_sum_exp(x, axis=None): 9 | """Log sum exp function 10 | 11 | Args: 12 | x: Input. 13 | axis: Axis over which to perform sum. 14 | 15 | Returns: 16 | torch.Tensor: log sum exp 17 | 18 | """ 19 | x_max = torch.max(x, axis)[0] 20 | y = torch.log((torch.exp(x - x_max)).sum(axis)) + x_max 21 | return y 22 | 23 | 24 | def random_permute(X): 25 | """Randomly permutes a tensor. 26 | 27 | Args: 28 | X: Input tensor. 29 | 30 | Returns: 31 | torch.Tensor 32 | 33 | """ 34 | X = X.transpose(1, 2) 35 | b = torch.rand((X.size(0), X.size(1))).cuda() 36 | idx = b.sort(0)[1] 37 | adx = torch.range(0, X.size(1) - 1).long() 38 | X = X[idx, adx[None, :]].transpose(1, 2) 39 | return X 40 | -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/nn_modules/encoder.py: -------------------------------------------------------------------------------- 1 | '''Basic cortex_DIM encoder. 2 | 3 | ''' 4 | 5 | import torch 6 | 7 | from cortex_DIM.nn_modules.convnet import Convnet, FoldedConvnet 8 | from cortex_DIM.nn_modules.resnet import ResNet, FoldedResNet 9 | 10 | 11 | def create_encoder(Module): 12 | class Encoder(Module): 13 | '''Encoder used for cortex_DIM. 14 | 15 | ''' 16 | 17 | def __init__(self, *args, local_idx=None, multi_idx=None, conv_idx=None, fc_idx=None, **kwargs): 18 | ''' 19 | 20 | Args: 21 | args: Arguments for parent class. 22 | local_idx: Index in list of convolutional layers for local features. 23 | multi_idx: Index in list of convolutional layers for multiple globals. 24 | conv_idx: Index in list of convolutional layers for intermediate features. 25 | fc_idx: Index in list of fully-connected layers for intermediate features. 26 | kwargs: Keyword arguments for the parent class. 27 | ''' 28 | 29 | super().__init__(*args, **kwargs) 30 | 31 | if local_idx is None: 32 | raise ValueError('`local_idx` must be set') 33 | 34 | conv_idx = conv_idx or local_idx 35 | 36 | self.local_idx = local_idx 37 | self.multi_idx = multi_idx 38 | self.conv_idx = conv_idx 39 | self.fc_idx = fc_idx 40 | 41 | def forward(self, x: torch.Tensor): 42 | ''' 43 | 44 | Args: 45 | x: Input tensor. 46 | 47 | Returns: 48 | local_out, multi_out, hidden_out, global_out 49 | 50 | ''' 51 | 52 | outs = super().forward(x, return_full_list=True) 53 | if len(outs) == 2: 54 | conv_out, fc_out = outs 55 | else: 56 | conv_before_out, res_out, conv_after_out, fc_out = outs 57 | conv_out = conv_before_out + res_out + conv_after_out 58 | 59 | local_out = conv_out[self.local_idx] 60 | 61 | if self.multi_idx is not None: 62 | multi_out = conv_out[self.multi_idx] 63 | else: 64 | multi_out = None 65 | 66 | if len(fc_out) > 0: 67 | if self.fc_idx is not None: 68 | hidden_out = fc_out[self.fc_idx] 69 | else: 70 | hidden_out = None 71 | global_out = fc_out[-1] 72 | else: 73 | hidden_out = None 74 | global_out = None 75 | 76 | conv_out = conv_out[self.conv_idx] 77 | 78 | return local_out, conv_out, multi_out, hidden_out, global_out 79 | 80 | return Encoder 81 | 82 | 83 | class ConvnetEncoder(create_encoder(Convnet)): 84 | pass 85 | 86 | 87 | class FoldedConvnetEncoder(create_encoder(FoldedConvnet)): 88 | pass 89 | 90 | 91 | class ResnetEncoder(create_encoder(ResNet)): 92 | pass 93 | 94 | 95 | class FoldedResnetEncoder(create_encoder(FoldedResNet)): 96 | pass 97 | -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/nn_modules/mi_networks.py: -------------------------------------------------------------------------------- 1 | """Module for networks used for computing MI. 2 | 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | from cortex_DIM.nn_modules.misc import Permute 10 | 11 | 12 | class MIFCNet(nn.Module): 13 | """Simple custom network for computing MI. 14 | 15 | """ 16 | def __init__(self, n_input, n_units): 17 | """ 18 | 19 | Args: 20 | n_input: Number of input units. 21 | n_units: Number of output units. 22 | """ 23 | super().__init__() 24 | 25 | assert(n_units >= n_input) 26 | 27 | self.linear_shortcut = nn.Linear(n_input, n_units) 28 | self.block_nonlinear = nn.Sequential( 29 | nn.Linear(n_input, n_units), 30 | nn.BatchNorm1d(n_units), 31 | nn.ReLU(), 32 | nn.Linear(n_units, n_units) 33 | ) 34 | 35 | # initialize the initial projection to a sort of noisy copy 36 | eye_mask = np.zeros((n_units, n_input), dtype=np.uint8) 37 | for i in range(n_input): 38 | eye_mask[i, i] = 1 39 | 40 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01) 41 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.) 42 | 43 | def forward(self, x): 44 | """ 45 | 46 | Args: 47 | x: Input tensor. 48 | 49 | Returns: 50 | torch.Tensor: network output. 51 | 52 | """ 53 | h = self.block_nonlinear(x) + self.linear_shortcut(x) 54 | return h 55 | 56 | 57 | class MI1x1ConvNet(nn.Module): 58 | """Simple custorm 1x1 convnet. 59 | 60 | """ 61 | def __init__(self, n_input, n_units): 62 | """ 63 | 64 | Args: 65 | n_input: Number of input units. 66 | n_units: Number of output units. 67 | """ 68 | 69 | super().__init__() 70 | 71 | self.block_nonlinear = nn.Sequential( 72 | nn.Conv1d(n_input, n_units, kernel_size=1, stride=1, padding=0, bias=False), 73 | nn.BatchNorm1d(n_units), 74 | nn.ReLU(), 75 | nn.Conv1d(n_units, n_units, kernel_size=1, stride=1, padding=0, bias=True), 76 | ) 77 | 78 | self.block_ln = nn.Sequential( 79 | Permute(0, 2, 1), 80 | nn.LayerNorm(n_units), 81 | Permute(0, 2, 1) 82 | ) 83 | 84 | self.linear_shortcut = nn.Conv1d(n_input, n_units, kernel_size=1, 85 | stride=1, padding=0, bias=False) 86 | 87 | # initialize shortcut to be like identity (if possible) 88 | if n_units >= n_input: 89 | eye_mask = np.zeros((n_units, n_input, 1), dtype=np.uint8) 90 | for i in range(n_input): 91 | eye_mask[i, i, 0] = 1 92 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01) 93 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.) 94 | 95 | def forward(self, x): 96 | """ 97 | 98 | Args: 99 | x: Input tensor. 100 | 101 | Returns: 102 | torch.Tensor: network output. 103 | 104 | """ 105 | h = self.block_ln(self.block_nonlinear(x) + self.linear_shortcut(x)) 106 | return h 107 | -------------------------------------------------------------------------------- /unsupervised_TU/cortex_DIM/nn_modules/misc.py: -------------------------------------------------------------------------------- 1 | '''Various miscellaneous modules 2 | 3 | ''' 4 | 5 | import torch 6 | 7 | 8 | class View(torch.nn.Module): 9 | """Basic reshape module. 10 | 11 | """ 12 | def __init__(self, *shape): 13 | """ 14 | 15 | Args: 16 | *shape: Input shape. 17 | """ 18 | super().__init__() 19 | self.shape = shape 20 | 21 | def forward(self, input): 22 | """Reshapes tensor. 23 | 24 | Args: 25 | input: Input tensor. 26 | 27 | Returns: 28 | torch.Tensor: Flattened tensor. 29 | 30 | """ 31 | return input.view(*self.shape) 32 | 33 | 34 | class Unfold(torch.nn.Module): 35 | """Module for unfolding tensor. 36 | 37 | Performs strided crops on 2d (image) tensors. Stride is assumed to be half the crop size. 38 | 39 | """ 40 | def __init__(self, img_size, fold_size): 41 | """ 42 | 43 | Args: 44 | img_size: Input size. 45 | fold_size: Crop size. 46 | """ 47 | super().__init__() 48 | 49 | fold_stride = fold_size // 2 50 | self.fold_size = fold_size 51 | self.fold_stride = fold_stride 52 | self.n_locs = 2 * (img_size // fold_size) - 1 53 | self.unfold = torch.nn.Unfold((self.fold_size, self.fold_size), 54 | stride=(self.fold_stride, self.fold_stride)) 55 | 56 | def forward(self, x): 57 | """Unfolds tensor. 58 | 59 | Args: 60 | x: Input tensor. 61 | 62 | Returns: 63 | torch.Tensor: Unfolded tensor. 64 | 65 | """ 66 | N = x.size(0) 67 | x = self.unfold(x).reshape(N, -1, self.fold_size, self.fold_size, self.n_locs * self.n_locs)\ 68 | .permute(0, 4, 1, 2, 3)\ 69 | .reshape(N * self.n_locs * self.n_locs, -1, self.fold_size, self.fold_size) 70 | return x 71 | 72 | 73 | class Fold(torch.nn.Module): 74 | """Module (re)folding tensor. 75 | 76 | Undoes the strided crops above. Works only on 1x1. 77 | 78 | """ 79 | def __init__(self, img_size, fold_size): 80 | """ 81 | 82 | Args: 83 | img_size: Images size. 84 | fold_size: Crop size. 85 | """ 86 | super().__init__() 87 | self.n_locs = 2 * (img_size // fold_size) - 1 88 | 89 | def forward(self, x): 90 | """(Re)folds tensor. 91 | 92 | Args: 93 | x: Input tensor. 94 | 95 | Returns: 96 | torch.Tensor: Refolded tensor. 97 | 98 | """ 99 | dim_c, dim_x, dim_y = x.size()[1:] 100 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y) 101 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)\ 102 | .permute(0, 2, 3, 1)\ 103 | .reshape(-1, dim_c * dim_x * dim_y, self.n_locs, self.n_locs).contiguous() 104 | return x 105 | 106 | 107 | class Permute(torch.nn.Module): 108 | """Module for permuting axes. 109 | 110 | """ 111 | def __init__(self, *perm): 112 | """ 113 | 114 | Args: 115 | *perm: Permute axes. 116 | """ 117 | super().__init__() 118 | self.perm = perm 119 | 120 | def forward(self, input): 121 | """Permutes axes of tensor. 122 | 123 | Args: 124 | input: Input tensor. 125 | 126 | Returns: 127 | torch.Tensor: permuted tensor. 128 | 129 | """ 130 | return input.permute(*self.perm) 131 | -------------------------------------------------------------------------------- /unsupervised_TU/data/NCI1/NCI1/processed/data.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/data.pt -------------------------------------------------------------------------------- /unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt -------------------------------------------------------------------------------- /unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt -------------------------------------------------------------------------------- /unsupervised_TU/data/NCI1/NCI1/raw/README.txt: -------------------------------------------------------------------------------- 1 | README for dataset NCI1 2 | 3 | 4 | === Usage === 5 | 6 | This folder contains the following comma separated text files 7 | (replace DS by the name of the dataset): 8 | 9 | n = total number of nodes 10 | m = total number of edges 11 | N = number of graphs 12 | 13 | (1) DS_A.txt (m lines) 14 | sparse (block diagonal) adjacency matrix for all graphs, 15 | each line corresponds to (row, col) resp. (node_id, node_id) 16 | 17 | (2) DS_graph_indicator.txt (n lines) 18 | column vector of graph identifiers for all nodes of all graphs, 19 | the value in the i-th line is the graph_id of the node with node_id i 20 | 21 | (3) DS_graph_labels.txt (N lines) 22 | class labels for all graphs in the dataset, 23 | the value in the i-th line is the class label of the graph with graph_id i 24 | 25 | (4) DS_node_labels.txt (n lines) 26 | column vector of node labels, 27 | the value in the i-th line corresponds to the node with node_id i 28 | 29 | There are OPTIONAL files if the respective information is available: 30 | 31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt) 32 | labels for the edges in DS_A_sparse.txt 33 | 34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt) 35 | attributes for the edges in DS_A.txt 36 | 37 | (7) DS_node_attributes.txt (n lines) 38 | matrix of node attributes, 39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i 40 | 41 | (8) DS_graph_attributes.txt (N lines) 42 | regression values for all graphs in the dataset, 43 | the value in the i-th line is the attribute of the graph with graph_id i 44 | 45 | 46 | === Description === 47 | 48 | NCI1 and NCI109 represent two balanced subsets of datasets of chemical compounds screened 49 | for activity against non-small cell lung cancer and ovarian cancer cell lines respectively 50 | (Wale and Karypis (2006) and http://pubchem.ncbi.nlm.nih.gov). 51 | 52 | 53 | === Previous Use of the Dataset === 54 | 55 | Neumann, M., Garnett R., Bauckhage Ch., Kersting K.: Propagation Kernels: Efficient Graph 56 | Kernels from Propagated Information. Under review at MLJ. 57 | 58 | Neumann, M., Patricia, N., Garnett, R., Kersting, K.: Efficient Graph Kernels by 59 | Randomization. In: P.A. Flach, T.D. Bie, N. Cristianini (eds.) ECML/PKDD, Notes in 60 | Computer Science, vol. 7523, pp. 378-393. Springer (2012). 61 | 62 | Shervashidze, N., Schweitzer, P., van Leeuwen, E., Mehlhorn, K., Borgwardt, K.: 63 | Weisfeiler-Lehman Graph Kernels. Journal of Machine Learning Research 12, 2539-2561 (2011) 64 | 65 | 66 | === References === 67 | 68 | N. Wale and G. Karypis. Comparison of descriptor spaces for chemical compound retrieval and 69 | classification. In Proc. of ICDM, pages 678–689, Hong Kong, 2006. 70 | 71 | -------------------------------------------------------------------------------- /unsupervised_TU/deepinfomax.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | from torch.autograd import Variable 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import numpy as np 7 | import json 8 | # from core.encoders import * 9 | 10 | from torch_geometric.datasets import TUDataset 11 | from torch_geometric.data import DataLoader 12 | import sys 13 | import json 14 | from torch import optim 15 | 16 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet 17 | from losses import * 18 | from gin import Encoder 19 | from evaluate_embedding import evaluate_embedding 20 | from model import * 21 | 22 | from arguments import arg_parse 23 | 24 | class GcnInfomax(nn.Module): 25 | def __init__(self, hidden_dim, num_gc_layers, alpha=0.5, beta=1., gamma=.1): 26 | super(GcnInfomax, self).__init__() 27 | 28 | self.alpha = alpha 29 | self.beta = beta 30 | self.gamma = gamma 31 | self.prior = args.prior 32 | 33 | self.embedding_dim = mi_units = hidden_dim * num_gc_layers 34 | self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers) 35 | 36 | self.local_d = FF(self.embedding_dim) 37 | self.global_d = FF(self.embedding_dim) 38 | # self.local_d = MI1x1ConvNet(self.embedding_dim, mi_units) 39 | # self.global_d = MIFCNet(self.embedding_dim, mi_units) 40 | 41 | if self.prior: 42 | self.prior_d = PriorDiscriminator(self.embedding_dim) 43 | 44 | self.init_emb() 45 | 46 | def init_emb(self): 47 | initrange = -1.5 / self.embedding_dim 48 | for m in self.modules(): 49 | if isinstance(m, nn.Linear): 50 | torch.nn.init.xavier_uniform_(m.weight.data) 51 | if m.bias is not None: 52 | m.bias.data.fill_(0.0) 53 | 54 | 55 | def forward(self, x, edge_index, batch, num_graphs): 56 | 57 | # batch_size = data.num_graphs 58 | if x is None: 59 | x = torch.ones(batch.shape[0]).to(device) 60 | 61 | y, M = self.encoder(x, edge_index, batch) 62 | 63 | g_enc = self.global_d(y) 64 | l_enc = self.local_d(M) 65 | 66 | mode='fd' 67 | measure='JSD' 68 | local_global_loss = local_global_loss_(l_enc, g_enc, edge_index, batch, measure) 69 | 70 | if self.prior: 71 | prior = torch.rand_like(y) 72 | term_a = torch.log(self.prior_d(prior)).mean() 73 | term_b = torch.log(1.0 - self.prior_d(y)).mean() 74 | PRIOR = - (term_a + term_b) * self.gamma 75 | else: 76 | PRIOR = 0 77 | 78 | return local_global_loss + PRIOR 79 | 80 | if __name__ == '__main__': 81 | 82 | args = arg_parse() 83 | # accuracies = {'logreg':[], 'svc':[], 'linearsvc':[], 'randomforest':[]} 84 | accuracies = accuracies = {'val':[], 'test':[]} 85 | epochs = 20 86 | log_interval = 1 87 | batch_size = 128 88 | lr = args.lr 89 | DS = args.DS 90 | path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', DS) 91 | # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) 92 | 93 | dataset = TUDataset(path, name=DS).shuffle() 94 | try: 95 | dataset_num_features = dataset.num_features 96 | except: 97 | dataset_num_features = 1 98 | 99 | dataloader = DataLoader(dataset, batch_size=batch_size) 100 | 101 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 102 | model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device) 103 | optimizer = torch.optim.Adam(model.parameters(), lr=lr) 104 | 105 | print('================') 106 | print('lr: {}'.format(lr)) 107 | print('num_features: {}'.format(dataset_num_features)) 108 | print('hidden_dim: {}'.format(args.hidden_dim)) 109 | print('num_gc_layers: {}'.format(args.num_gc_layers)) 110 | print('================') 111 | 112 | 113 | model.eval() 114 | emb, y = model.encoder.get_embeddings(dataloader) 115 | res = evaluate_embedding(emb, y) 116 | # accuracies['logreg'].append(res[0]) 117 | # accuracies['svc'].append(res[1]) 118 | # accuracies['linearsvc'].append(res[2]) 119 | # accuracies['randomforest'].append(res[3]) 120 | accuracies['val'].append(res[0]) 121 | accuracies['test'].append(res[1]) 122 | 123 | 124 | for epoch in range(1, epochs+1): 125 | loss_all = 0 126 | model.train() 127 | for data in dataloader: 128 | data = data.to(device) 129 | optimizer.zero_grad() 130 | loss = model(data.x, data.edge_index, data.batch, data.num_graphs) 131 | loss_all += loss.item() * data.num_graphs 132 | loss.backward() 133 | optimizer.step() 134 | print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader))) 135 | 136 | if epoch % log_interval == 0: 137 | model.eval() 138 | emb, y = model.encoder.get_embeddings(dataloader) 139 | res = evaluate_embedding(emb, y) 140 | # accuracies['logreg'].append(res[0]) 141 | # accuracies['svc'].append(res[1]) 142 | # accuracies['linearsvc'].append(res[2]) 143 | # accuracies['randomforest'].append(res[3]) 144 | accuracies['val'].append(res[0]) 145 | accuracies['test'].append(res[1]) 146 | 147 | print(accuracies) 148 | 149 | tpe = ('local' if args.local else '') + ('prior' if args.prior else '') 150 | with open('new_log', 'a+') as f: 151 | s = json.dumps(accuracies) 152 | f.write('{},{},{},{},{},{},{}\n'.format(args.DS, tpe, args.num_gc_layers, epochs, log_interval, lr, s)) 153 | -------------------------------------------------------------------------------- /unsupervised_TU/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | for seed in 0 1 2 3 4 4 | do 5 | CUDA_VISIBLE_DEVICES=$1 python simgrace.py --DS $2 --lr 0.01 --local --num-gc-layers 5 --eta$3 --seed $seed 6 | done 7 | 8 | -------------------------------------------------------------------------------- /unsupervised_TU/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from cortex_DIM.functions.gan_losses import get_positive_expectation, get_negative_expectation 5 | 6 | def local_global_loss_(l_enc, g_enc, edge_index, batch, measure): 7 | ''' 8 | Args: 9 | l: Local feature map. 10 | g: Global features. 11 | measure: Type of f-divergence. For use with mode `fd` 12 | mode: Loss mode. Fenchel-dual `fd`, NCE `nce`, or Donsker-Vadadhan `dv`. 13 | Returns: 14 | torch.Tensor: Loss. 15 | ''' 16 | num_graphs = g_enc.shape[0] 17 | num_nodes = l_enc.shape[0] 18 | 19 | pos_mask = torch.zeros((num_nodes, num_graphs)).cuda() 20 | neg_mask = torch.ones((num_nodes, num_graphs)).cuda() 21 | for nodeidx, graphidx in enumerate(batch): 22 | pos_mask[nodeidx][graphidx] = 1. 23 | neg_mask[nodeidx][graphidx] = 0. 24 | 25 | res = torch.mm(l_enc, g_enc.t()) 26 | 27 | E_pos = get_positive_expectation(res * pos_mask, measure, average=False).sum() 28 | E_pos = E_pos / num_nodes 29 | E_neg = get_negative_expectation(res * neg_mask, measure, average=False).sum() 30 | E_neg = E_neg / (num_nodes * (num_graphs - 1)) 31 | 32 | return E_neg - E_pos 33 | 34 | def adj_loss_(l_enc, g_enc, edge_index, batch): 35 | num_graphs = g_enc.shape[0] 36 | num_nodes = l_enc.shape[0] 37 | 38 | adj = torch.zeros((num_nodes, num_nodes)).cuda() 39 | mask = torch.eye(num_nodes).cuda() 40 | for node1, node2 in zip(edge_index[0], edge_index[1]): 41 | adj[node1.item()][node2.item()] = 1. 42 | adj[node2.item()][node1.item()] = 1. 43 | 44 | res = torch.sigmoid((torch.mm(l_enc, l_enc.t()))) 45 | res = (1-mask) * res 46 | # print(res.shape, adj.shape) 47 | # input() 48 | 49 | loss = nn.BCELoss()(res, adj) 50 | return loss 51 | -------------------------------------------------------------------------------- /unsupervised_TU/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | # from core.encoders import * 7 | import json 8 | from torch import optim 9 | 10 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet 11 | from losses import * 12 | 13 | 14 | class GlobalDiscriminator(nn.Module): 15 | def __init__(self, args, input_dim): 16 | super().__init__() 17 | 18 | self.l0 = nn.Linear(32, 32) 19 | self.l1 = nn.Linear(32, 32) 20 | 21 | self.l2 = nn.Linear(512, 1) 22 | def forward(self, y, M, data): 23 | 24 | adj = Variable(data['adj'].float(), requires_grad=False).cuda() 25 | # h0 = Variable(data['feats'].float()).cuda() 26 | batch_num_nodes = data['num_nodes'].int().numpy() 27 | M, _ = self.encoder(M, adj, batch_num_nodes) 28 | # h = F.relu(self.c0(M)) 29 | # h = self.c1(h) 30 | # h = h.view(y.shape[0], -1) 31 | h = torch.cat((y, M), dim=1) 32 | h = F.relu(self.l0(h)) 33 | h = F.relu(self.l1(h)) 34 | return self.l2(h) 35 | 36 | class PriorDiscriminator(nn.Module): 37 | def __init__(self, input_dim): 38 | super().__init__() 39 | self.l0 = nn.Linear(input_dim, input_dim) 40 | self.l1 = nn.Linear(input_dim, input_dim) 41 | self.l2 = nn.Linear(input_dim, 1) 42 | 43 | def forward(self, x): 44 | h = F.relu(self.l0(x)) 45 | h = F.relu(self.l1(h)) 46 | return torch.sigmoid(self.l2(h)) 47 | 48 | class FF(nn.Module): 49 | def __init__(self, input_dim): 50 | super().__init__() 51 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1) 52 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1) 53 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1) 54 | self.block = nn.Sequential( 55 | nn.Linear(input_dim, input_dim), 56 | nn.ReLU(), 57 | nn.Linear(input_dim, input_dim), 58 | nn.ReLU(), 59 | nn.Linear(input_dim, input_dim), 60 | nn.ReLU() 61 | ) 62 | self.linear_shortcut = nn.Linear(input_dim, input_dim) 63 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1, stride=1, padding=0) 64 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1, stride=1, padding=0) 65 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1, stride=1, padding=0) 66 | 67 | def forward(self, x): 68 | return self.block(x) + self.linear_shortcut(x) 69 | 70 | -------------------------------------------------------------------------------- /unsupervised_TU/readme.md: -------------------------------------------------------------------------------- 1 | ## Dependencies 2 | * [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric#installation)==1.7.0 3 | 4 | ## Training & Evaluation 5 | 6 | ``` 7 | ./go.sh $GPU_ID $DATASET_NAME $ETA 8 | ``` 9 | 10 | ```$DATASET_NAME``` is the dataset name (please refer to https://chrsmrrs.github.io/datasets/docs/datasets/), ```$GPU_ID``` is the lanched GPU ID and ```$ETA``` could be tuned among {0.1, 1.0, 10.0, 100.0}. 11 | 12 | ## Acknowledgements 13 | - https://github.com/Shen-Lab/GraphCL/tree/master/unsupervised_TU 14 | 15 | - https://github.com/fanyun-sun/InfoGraph/tree/master/unsupervised. 16 | -------------------------------------------------------------------------------- /unsupervised_TU/test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import json 4 | import pandas as pd 5 | import collections 6 | 7 | if __name__ == '__main__': 8 | 9 | for epoch in [20, 100]: 10 | print(epoch) 11 | real_res = {'logreg':[-1], 'svc':[-1], 'linearsvc':[-1], 'randomforest':[-1]} 12 | for gc in [3, 5, 8, 16]: 13 | for lr in [0.01, 0.1, 0.001]: 14 | for tpe in ['local', 'localprior']: 15 | res = collections.defaultdict(lambda :collections.defaultdict(list)) 16 | with open(sys.argv[1], 'r') as f: 17 | for line in f: 18 | x = line.strip().split(',', 6) 19 | if x[1] != tpe: 20 | continue 21 | if x[2] != str(gc): 22 | continue 23 | if x[3] != str(epoch): 24 | continue 25 | if x[5] != str(lr): 26 | continue 27 | tmp = json.loads(x[-1]) 28 | 29 | DS = x[0] 30 | res[DS]['logreg'].append(tmp['logreg']) 31 | res[DS]['svc'].append(tmp['svc']) 32 | res[DS]['linearsvc'].append(tmp['linearsvc']) 33 | res[DS]['randomforest'].append(tmp['randomforest']) 34 | 35 | for DS, lst in res.items(): 36 | if DS != sys.argv[2]: 37 | continue 38 | # print('====================') 39 | # print(DS) 40 | for clf, v in lst.items(): 41 | mn = np.mean(np.array(v[:5]), axis=0) 42 | std = np.std(np.array(v[:5]), axis=0) 43 | 44 | idx = np.argmax(mn) 45 | if mn[idx] > real_res[clf][0] and len(v) > 1: 46 | real_res[clf] = [mn[idx], std[idx], epoch, lr, gc, idx, len(v)] 47 | # print(epoch, lr, gc, clf, idx, mn[idx], std[idx], len(v)) 48 | print(real_res) 49 | 50 | --------------------------------------------------------------------------------