├── README.md
├── adversarial_robustness
    ├── README.md
    └── code
    │   ├── common
    │       ├── Makefile
    │       ├── _ext
    │       │   ├── __init__.pyc
    │       │   ├── __pycache__
    │       │   │   └── __init__.cpython-37.pyc
    │       │   ├── custom_kernel.d
    │       │   ├── custom_kernel.o
    │       │   └── my_lib
    │       │   │   ├── __init__.py
    │       │   │   ├── __init__.pyc
    │       │   │   ├── __pycache__
    │       │   │       └── __init__.cpython-37.pyc
    │       │   │   ├── _my_lib.so
    │       │   │   └── ffiex.py
    │       ├── build.py
    │       ├── cmd_args.py
    │       ├── cmd_args.pyc
    │       ├── dnn.py
    │       ├── dnn.pyc
    │       ├── functions
    │       │   ├── __init__.pyc
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   └── custom_func.cpython-37.pyc
    │       │   ├── custom_func.py
    │       │   └── custom_func.pyc
    │       ├── graph_embedding.py
    │       ├── graph_embedding.pyc
    │       ├── modules
    │       │   ├── __init__.pyc
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   └── custom_mod.cpython-37.pyc
    │       │   ├── custom_mod.py
    │       │   └── custom_mod.pyc
    │       ├── src
    │       │   ├── custom_kernel.cu
    │       │   ├── custom_kernel.h
    │       │   ├── my_lib.c
    │       │   ├── my_lib.h
    │       │   ├── my_lib_cuda.c
    │       │   └── my_lib_cuda.h
    │       └── test.py
    │   ├── data_generator
    │       ├── data_util.py
    │       ├── data_util.pyc
    │       ├── gen_er_components.py
    │       └── pkl_dump.sh
    │   ├── graph_attack
    │       ├── collect_rl_results.py
    │       ├── dqn.py
    │       ├── er_trivial_attack.py
    │       ├── genetic_algorithm.py
    │       ├── grad_attack.py
    │       ├── nstep_replay_mem.py
    │       ├── nstep_replay_mem.pyc
    │       ├── plot_dqn.py
    │       ├── plot_dqn.sh
    │       ├── q_net.py
    │       ├── q_net.pyc
    │       ├── rl_common.py
    │       ├── run_dqn.sh
    │       ├── run_ga.sh
    │       ├── run_grad.sh
    │       └── run_trivial.sh
    │   └── graph_classification
    │       ├── er_components.py
    │       ├── graph_common.py
    │       ├── run_er_components.sh
    │       └── test_er_comp.sh
├── semisupervised_TU
    ├── README.md
    ├── environment.yml
    ├── finetuning
    │   ├── datasets.py
    │   ├── feature_expansion.py
    │   ├── gcn_conv.py
    │   ├── image_dataset.py
    │   ├── main.py
    │   ├── main_cl.py
    │   ├── net_cl.py
    │   ├── net_gae.py
    │   ├── net_infomax.py
    │   ├── train_eval.py
    │   ├── tu_dataset.py
    │   └── utils.py
    └── pre-training
    │   ├── datasets.py
    │   ├── feature_expansion.py
    │   ├── gcn_conv.py
    │   ├── main.py
    │   ├── res_gcn.py
    │   ├── train_eval.py
    │   ├── tu_dataset.py
    │   └── utils.py
├── simgrace.png
├── transfer_learning
    ├── README.md
    ├── bio
    │   ├── batch.py
    │   ├── dataloader.py
    │   ├── finetune.py
    │   ├── finetune.sh
    │   ├── finetune_tune.sh
    │   ├── loader.py
    │   ├── model.py
    │   ├── models_simgrace
    │   │   ├── simgrace_100.pth
    │   │   ├── simgrace_20.pth
    │   │   ├── simgrace_40.pth
    │   │   ├── simgrace_60.pth
    │   │   └── simgrace_80.pth
    │   ├── pretrain_contextpred.py
    │   ├── pretrain_deepgraphinfomax.py
    │   ├── pretrain_edgepred.py
    │   ├── pretrain_masking.py
    │   ├── pretrain_simgrace.py
    │   ├── pretrain_supervised.py
    │   ├── result_analysis.py
    │   ├── splitters.py
    │   └── util.py
    └── chem
    │   ├── batch.py
    │   ├── dataloader.py
    │   ├── finetune.py
    │   ├── finetune.sh
    │   ├── finetune_mutag_ptc.py
    │   ├── finetune_tune.sh
    │   ├── loader.py
    │   ├── model.py
    │   ├── models_simgrace
    │       ├── simgrace_100.pth
    │       ├── simgrace_20.pth
    │       ├── simgrace_40.pth
    │       ├── simgrace_60.pth
    │       └── simgrace_80.pth
    │   ├── parse_result.py
    │   ├── pretrain_contextpred.py
    │   ├── pretrain_deepgraphinfomax.py
    │   ├── pretrain_edgepred.py
    │   ├── pretrain_masking.py
    │   ├── pretrain_simgrace.py
    │   ├── pretrain_supervised.py
    │   ├── run.sh
    │   ├── splitters.py
    │   └── util.py
└── unsupervised_TU
    ├── Accuracy.txt
    ├── __pycache__
        ├── arguments.cpython-37.pyc
        ├── aug.cpython-37.pyc
        ├── evaluate_embedding.cpython-37.pyc
        ├── gin.cpython-37.pyc
        ├── losses.cpython-37.pyc
        └── model.cpython-37.pyc
    ├── arguments.py
    ├── aug.py
    ├── cortex_DIM
        ├── configs
        │   ├── convnets.py
        │   └── resnets.py
        ├── functions
        │   ├── __pycache__
        │   │   ├── gan_losses.cpython-37.pyc
        │   │   └── misc.cpython-37.pyc
        │   ├── dim_losses.py
        │   ├── gan_losses.py
        │   └── misc.py
        └── nn_modules
        │   ├── __pycache__
        │       ├── mi_networks.cpython-37.pyc
        │       └── misc.cpython-37.pyc
        │   ├── convnet.py
        │   ├── encoder.py
        │   ├── mi_networks.py
        │   ├── misc.py
        │   └── resnet.py
    ├── data
        └── NCI1
        │   └── NCI1
        │       ├── processed
        │           ├── data.pt
        │           ├── pre_filter.pt
        │           └── pre_transform.pt
        │       └── raw
        │           ├── NCI1_A.txt
        │           ├── NCI1_graph_indicator.txt
        │           ├── NCI1_graph_labels.txt
        │           ├── NCI1_node_labels.txt
        │           └── README.txt
    ├── deepinfomax.py
    ├── evaluate_embedding.py
    ├── gin.py
    ├── go.sh
    ├── losses.py
    ├── model.py
    ├── readme.md
    ├── simgrace.py
    └── test.py


/README.md:
--------------------------------------------------------------------------------
 1 | # SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation (WWW 2022)
 2 | PyTorch implementation for [SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation](https://arxiv.org/pdf/2202.03104.pdf) accepted by The Web Conference 2022 (WWW 2022).
 3 | ## Overview
 4 | In this repository, we provide the codes of SimGRACE to evaluate its performances in terms of generalizability (unsupervised & semi-supervised learning), transferability (transfer learning) and robustness (adversarial robustness).
 5 | ![](./simgrace.png)
 6 | ## Dataset download
 7 | * Semi-supervised learning & Unsupervised representation learning [TU Datasets](https://chrsmrrs.github.io/datasets/docs/datasets/) (social and biochemical graphs)
 8 | * Transfer learning [chem data](http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip) (2.5GB);[bio data](http://snap.stanford.edu/gnn-pretrain/data/bio_dataset.zip) (2GB) 
 9 | * Adversarial robustness [synthetic data](https://www.dropbox.com/sh/mu8odkd36x54rl3/AABg8ABiMqwcMEM5qKIY97nla?dl=0)
10 | 
11 | ## Citation
12 | ```
13 | @inproceedings{10.1145/3485447.3512156,
14 | author = {Xia, Jun and Wu, Lirong and Chen, Jintao and Hu, Bozhen and Li, Stan Z.},
15 | title = {SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation},
16 | year = {2022},
17 | isbn = {9781450390965},
18 | publisher = {Association for Computing Machinery},
19 | address = {New York, NY, USA},
20 | url = {https://doi.org/10.1145/3485447.3512156},
21 | doi = {10.1145/3485447.3512156},
22 | booktitle = {Proceedings of the ACM Web Conference 2022},
23 | pages = {1070–1079},
24 | numpages = {10},
25 | keywords = {graph representation learning, contrastive learning, Graph neural networks, robustness, graph self-supervised learning},
26 | location = {Virtual Event, Lyon, France},
27 | series = {WWW '22}
28 | }
29 | ```
30 | ## Useful resources for Pretrained Graphs Models (PGMs)
31 | * The first comprehensive survey for PGMs: [A Survey of Pretraining on Graphs: Taxonomy, Methods, and Applications](https://arxiv.org/abs/2202.07893v1)
32 | * [A curated list of must-read papers, open-source pretrained models and pretraining datasets.](https://github.com/junxia97/awesome-pretrain-on-graphs)
33 | 
34 | ## Reference
35 | 1.  [Graph Contrastive Learning Automated (ICML 2021)](https://github.com/Shen-Lab/GraphCL_Automated)
36 | 2.  [Graph Contrastive Learning with Augmentations (NeurIPS 2020)](https://github.com/Shen-Lab/GraphCL)
37 | 3.  [Strategies for Pre-training Graph Neural Networks (ICLR 2020)](https://github.com/snap-stanford/pretrain-gnns/)
38 | 4.  [Adversarial Attack on Graph Structured Data (ICML 2018)](https://github.com/Hanjun-Dai/graph_adversarial_attack)
39 | 


--------------------------------------------------------------------------------
/adversarial_robustness/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies & Dataset
 2 | 
 3 | Please refer to https://github.com/Hanjun-Dai/graph_adversarial_attack for environment setup and to download dataset.
 4 | After the configuration, you should have three directories: ```./code/```, ```./dropbox/``` and ```./pytorch_structure2vec/```.
 5 | 
 6 | ## Training & Evaluation
 7 | ### Pre-training + finetuning: ###
 8 | ```
 9 | cd ./code/graph_classification
10 | ./run_er_components.sh 15 20 0.15 2 -phase train
11 | ./run_er_components.sh 15 20 0.15 3 -phase train
12 | ./run_er_components.sh 15 20 0.15 4 -phase train
13 | 
14 | ./run_er_components.sh 40 50 0.05 2 -phase train
15 | ./run_er_components.sh 40 50 0.05 3 -phase train
16 | ./run_er_components.sh 40 50 0.05 4 -phase train
17 | 
18 | ./run_er_components.sh 90 100 0.02 2 -phase train
19 | ./run_er_components.sh 90 100 0.02 3 -phase train
20 | ./run_er_components.sh 90 100 0.02 4 -phase train
21 | ```
22 | 
23 | ### Adversarial attacks: ###
24 | ```
25 | cd ./code/graph_attack
26 | ./run_trivial.sh 15 20 0.15 2 -phase train
27 | ./run_trivial.sh 15 20 0.15 3 -phase train
28 | ./run_trivial.sh 15 20 0.15 4 -phase train
29 | ./run_grad.sh 15 20 0.15 2 -phase train
30 | ./run_grad.sh 15 20 0.15 3 -phase train
31 | ./run_grad.sh 15 20 0.15 4 -phase train
32 | ./run_dqn.sh 15 20 0.15 2 -phase train
33 | ./run_dqn.sh 15 20 0.15 3 -phase train
34 | ./run_dqn.sh 15 20 0.15 4 -phase train
35 | 
36 | ./run_trivial.sh 40 50 0.05 2 -phase train
37 | ./run_trivial.sh 40 50 0.05 3 -phase train
38 | ./run_trivial.sh 40 50 0.05 4 -phase train
39 | ./run_grad.sh 40 50 0.05 2 -phase train
40 | ./run_grad.sh 40 50 0.05 3 -phase train
41 | ./run_grad.sh 40 50 0.05 4 -phase train
42 | ./run_dqn.sh 40 50 0.05 2 -phase train
43 | ./run_dqn.sh 40 50 0.05 3 -phase train
44 | ./run_dqn.sh 40 50 0.05 4 -phase train
45 | 
46 | ./run_trivial.sh 90 100 0.02 2 -phase train
47 | ./run_trivial.sh 90 100 0.02 3 -phase train
48 | ./run_trivial.sh 90 100 0.02 4 -phase train
49 | ./run_grad.sh 90 100 0.02 2 -phase train
50 | ./run_grad.sh 90 100 0.02 3 -phase train
51 | ./run_grad.sh 90 100 0.02 4 -phase train
52 | ./run_dqn.sh 90 100 0.02 2 -phase train
53 | ./run_dqn.sh 90 100 0.02 3 -phase train
54 | ./run_dqn.sh 90 100 0.02 4 -phase train
55 | ```
56 | 
57 | ## Acknowledgements
58 | * https://github.com/Shen-Lab/GraphCL/tree/master/adversarialRobustness_Component
59 | * https://github.com/Hanjun-Dai/graph_adversarial_attack.
60 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/Makefile:
--------------------------------------------------------------------------------
 1 | dir_guard = @mkdir -p $(@D)
 2 | 
 3 | #INTEL_ROOT := /opt/intel
 4 | MKL_ROOT = $(INTEL_ROOT)/mkl
 5 | TBB_ROOT = $(INTEL_ROOT)/tbb
 6 | 
 7 | FIND := find
 8 | CXX := g++
 9 | CXXFLAGS += -Wall -O3 -std=c++11
10 | LDFLAGS += -lm  -lmkl_rt -ltbb
11 | 
12 | CUDA_HOME := /usr/local/cuda-9.0
13 | NVCC := $(CUDA_HOME)/bin/nvcc
14 | NVCCFLAGS += --default-stream per-thread
15 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand -lcusparse
16 | 
17 | CUDA_ARCH :=  -gencode arch=compute_30,code=sm_30 \
18 | 		-gencode arch=compute_35,code=sm_35 \
19 | 		-gencode arch=compute_50,code=sm_50 \
20 | 		-gencode arch=compute_50,code=compute_50
21 | 
22 | build_root = _ext
23 | obj_build_root = $(build_root)
24 | 
25 | include_dirs = $(CUDA_HOME)/include $(MKL_ROOT)/include $(TBB_ROOT)/include include
26 | CXXFLAGS += $(addprefix -I,$(include_dirs))
27 | CXXFLAGS += -fPIC
28 | 
29 | NVCCFLAGS += $(addprefix -I,$(include_dirs))
30 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC'
31 | cu_files = $(shell $(FIND) src/ -name "*.cu" -printf "%P\n")
32 | cu_obj_files = $(subst .cu,.o,$(cu_files))
33 | objs = $(addprefix $(obj_build_root)/,$(cu_obj_files))
34 | 
35 | DEPS = ${objs:.o=.d}
36 | mylib = _ext/my_lib/_my_lib.so
37 | 
38 | all: $(objs) $(mylib)
39 | 
40 | $(obj_build_root)/%.o: src/%.cu
41 | 	$(dir_guard)
42 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
43 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
44 | 
45 | $(mylib): src/*.c src/*.h src/*.cu
46 | 	python build.py
47 | 
48 | clean:
49 | 	rm -f $(obj_build_root)/*.o
50 | 	rm -f $(obj_build_root)/*.d
51 | 	rm -rf _ext
52 | 	rm -f functions/*.pyc
53 | 	rm -f modules/*.pyc
54 | -include $(DEPS)
55 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__init__.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/custom_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/custom_kernel.o


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__init__.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/_my_lib.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/_my_lib.so


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | this_file = os.path.dirname(__file__)
 6 | 
 7 | sources = ['src/my_lib.c']
 8 | headers = ['src/my_lib.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/my_lib_cuda.c']
15 |     headers += ['src/my_lib_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | extra_objects = ['_ext/custom_kernel.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.my_lib',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects,
31 |     extra_compile_args=['-fopenmp'], 
32 |     extra_link_args=['-lgomp']
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/cmd_args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | # import cPickle as cp
 3 | import pickle as cp
 4 | 
 5 | cmd_opt = argparse.ArgumentParser(description='Argparser for molecule vae')
 6 | cmd_opt.add_argument('-data_folder', type=str, default=None, help='data folder')
 7 | cmd_opt.add_argument('-saved_model', type=str, default=None, help='saved model')
 8 | cmd_opt.add_argument('-save_dir', type=str, default=None, help='save folder')
 9 | cmd_opt.add_argument('-ctx', type=str, default='cpu', help='cpu/gpu')
10 | cmd_opt.add_argument('-phase', type=str, default='test', help='train/test')
11 | cmd_opt.add_argument('-logfile', type=str, default=None, help='log')
12 | 
13 | cmd_opt.add_argument('-batch_size', type=int, default=50, help='minibatch size')
14 | cmd_opt.add_argument('-seed', type=int, default=1, help='seed')
15 | cmd_opt.add_argument('-min_n', type=int, default=0, help='min #nodes')
16 | cmd_opt.add_argument('-max_n', type=int, default=0, help='max #nodes')
17 | cmd_opt.add_argument('-min_c', type=int, default=0, help='min #components')
18 | cmd_opt.add_argument('-max_c', type=int, default=0, help='max #components')
19 | cmd_opt.add_argument('-er_p', type=float, default=0, help='parameter of er graphs')
20 | cmd_opt.add_argument('-n_graphs', type=int, default=0, help='number of graphs')
21 | cmd_opt.add_argument('-gm', default='mean_field', help='mean_field/loopy_bp/gcn')
22 | cmd_opt.add_argument('-latent_dim', type=int, default=64, help='dimension of latent layers')
23 | cmd_opt.add_argument('-out_dim', type=int, default=0, help='s2v output size')
24 | cmd_opt.add_argument('-hidden', type=int, default=32, help='dimension of classification')
25 | cmd_opt.add_argument('-max_lv', type=int, default=2, help='max rounds of message passing')
26 | 
27 | cmd_opt.add_argument('-num_epochs', type=int, default=1000, help='number of epochs')
28 | cmd_opt.add_argument('-learning_rate', type=float, default=0.001, help='init learning_rate')
29 | cmd_opt.add_argument('-weight_decay', type=float, default=5e-4, help='weight_decay')
30 | cmd_opt.add_argument('-dropout', type=float, default=0.5, help='dropout rate')
31 | 
32 | # for node classification
33 | cmd_opt.add_argument('-dataset', type=str, default=None, help='citeseer/cora/pubmed')
34 | cmd_opt.add_argument('-feature_dim', type=int, default=None, help='node feature dim')
35 | cmd_opt.add_argument('-num_class', type=int, default=None, help='# classes')
36 | cmd_opt.add_argument('-adj_norm', type=int, default=1, help='normalize the adj or not')
37 | 
38 | # for bio graph classification
39 | cmd_opt.add_argument('-feat_dim', type=int, default=0, help='dimension of node feature')
40 | cmd_opt.add_argument('-fold', type=int, default=1, help='fold (1..10)')
41 | 
42 | # for AT-SimGRACE
43 | cmd_opt.add_argument('-lr_inner', type=float, default=0.001, help='lr of inner opt')
44 | cmd_opt.add_argument('-epison', type=float, default=0.01, help='radius of perturbation ball')
45 | cmd_opt.add_argument('--clip_norm', type=int, default=50, help='Maximum norm of parameter gradient.')
46 | # for attack 
47 | 
48 | cmd_opt.add_argument('-idx_start', type=int, default=None, help='id of graph or node index')
49 | cmd_opt.add_argument('-num_instances', type=int, default=None, help='num of samples for attack, in genetic algorithm')
50 | cmd_opt.add_argument('-num_steps', type=int, default=100000, help='rl training steps')
51 | cmd_opt.add_argument('-targeted', type=int, default=0, help='0/1 target attack or not')
52 | cmd_opt.add_argument('-frac_meta', type=float, default=0, help='fraction for meta rl learning')
53 | cmd_opt.add_argument('-meta_test', type=int, default=0, help='for meta rl learning')
54 | cmd_opt.add_argument('-rand_att_type', type=str, default=None, help='random/exhaust')
55 | cmd_opt.add_argument('-reward_type', type=str, default=None, help='binary/nll')
56 | cmd_opt.add_argument('-base_model_dump', type=str, default=None, help='saved base model')
57 | cmd_opt.add_argument('-num_mod', type=int, default=1, help='number of modifications allowed')
58 | 
59 | # for genetic algorithm
60 | cmd_opt.add_argument('-population_size', type=int, default=100, help='population size')
61 | cmd_opt.add_argument('-cross_rate', type=float, default=0.1, help='cross_rate')
62 | cmd_opt.add_argument('-mutate_rate', type=float, default=0.2, help='mutate rate')
63 | cmd_opt.add_argument('-rounds', type=int, default=10, help='rounds of evolution')
64 | 
65 | # for node attack
66 | cmd_opt.add_argument('-bilin_q', type=int, default=0, help='bilinear q or not')
67 | cmd_opt.add_argument('-mlp_hidden', type=int, default=64, help='mlp hidden layer size')
68 | cmd_opt.add_argument('-n_hops', type=int, default=2, help='attack range')
69 | 
70 | # for defence
71 | cmd_opt.add_argument('-del_rate', type=float, default=0, help='rate of deleting edge')
72 | 
73 | cmd_args, _ = cmd_opt.parse_known_args()
74 | 
75 | print(cmd_args)
76 | 
77 | def build_kwargs(keys, arg_dict):
78 |     st = ''
79 |     for key in keys:
80 |         st += '%s-%s' % (key, str(arg_dict[key]))
81 |     return st
82 | 
83 | def save_args(fout, args):
84 |     with open(fout, 'wb') as f:
85 |         cp.dump(args, f, cp.HIGHEST_PROTOCOL)
86 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/cmd_args.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/cmd_args.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/dnn.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/dnn.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__init__.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/custom_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from _ext import my_lib
 4 | import sys
 5 | 
 6 | class JaggedLogSoftmax(Function):
 7 |     def forward(self, logits, prefix_sum):        
 8 |         self.save_for_backward(prefix_sum)
 9 | 
10 |         assert len(prefix_sum.size()) == 1
11 |         output = logits.new()
12 |         if not logits.is_cuda:
13 |             my_lib.jagged_log_softmax_forward(logits, prefix_sum, output)
14 |         else:
15 |             my_lib.jagged_log_softmax_forward_cuda(logits, prefix_sum, output)
16 | 
17 |         self.save_for_backward(prefix_sum, output)
18 |         return output
19 | 
20 |     def backward(self, grad_output):
21 |         prefix_sum, output = self.saved_variables
22 |         grad_input = grad_output.new()
23 |         if not grad_output.is_cuda:
24 |             my_lib.jagged_log_softmax_backward(output.data, grad_output, prefix_sum.data, grad_input)
25 |         else:            
26 |             my_lib.jagged_log_softmax_backward_cuda(output.data, grad_output, prefix_sum.data, grad_input)
27 |         return grad_input, None
28 | 
29 | class JaggedArgmax(Function):
30 |     def forward(self, values, prefix_sum):
31 |         assert len(prefix_sum.size()) == 1
32 |         output = prefix_sum.new()
33 |         if not values.is_cuda:
34 |             my_lib.jagged_argmax_forward(values, prefix_sum, output)
35 |         else:
36 |             my_lib.jagged_argmax_forward_cuda(values, prefix_sum, output)
37 | 
38 |         return output
39 | 
40 |     def backward(self, grad_output):
41 |         assert False
42 | 
43 | class JaggedMax(Function):
44 |     def forward(self, values, prefix_sum):
45 |         assert len(prefix_sum.size()) == 1
46 |         idxes = prefix_sum.new()
47 |         vmax = values.new()
48 |         if not values.is_cuda:
49 |             my_lib.jagged_max_forward(values, prefix_sum, vmax, idxes)
50 |         else:
51 |             my_lib.jagged_max_forward_cuda(values, prefix_sum, vmax, idxes)
52 | 
53 |         return vmax, idxes
54 | 
55 |     def backward(self, grad_output):
56 |         assert False
57 | 
58 | def GraphLaplacianNorm(raw_adj):
59 |     ones = torch.ones(raw_adj.size()[0], 1)
60 |     if raw_adj.is_cuda:
61 |         ones = ones.cuda()
62 |     norm = torch.mm(raw_adj, ones) ** 0.5
63 |     indices = raw_adj._indices()
64 |     values = raw_adj._values()
65 |     if not values.is_cuda:
66 |         my_lib.graph_laplacian_norm(indices, values, norm)
67 |     else:
68 |         my_lib.graph_laplacian_norm_cuda(indices, values, norm)
69 | 
70 | def GraphDegreeNorm(raw_adj):
71 |     ones = torch.ones(raw_adj.size()[0], 1)
72 |     if raw_adj.is_cuda:
73 |         ones = ones.cuda()
74 |     norm = torch.mm(raw_adj, ones)
75 |     indices = raw_adj._indices()
76 |     values = raw_adj._values()
77 |     if not values.is_cuda:
78 |         my_lib.graph_degree_norm(indices, values, norm)
79 |     else:
80 |         my_lib.graph_degree_norm_cuda(indices, values, norm)


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/custom_func.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/custom_func.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/graph_embedding.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/graph_embedding.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__init__.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/custom_mod.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from functions.custom_func import JaggedLogSoftmax, JaggedArgmax, JaggedMax
 3 | import networkx as nx
 4 | import numpy as np
 5 | 
 6 | class JaggedLogSoftmaxModule(Module):
 7 |     def forward(self, logits, prefix_sum):
 8 |         return JaggedLogSoftmax()(logits, prefix_sum)
 9 | 
10 | class JaggedArgmaxModule(Module):
11 |     def forward(self, values, prefix_sum):
12 |         return JaggedArgmax()(values, prefix_sum)
13 | 
14 | class JaggedMaxModule(Module):
15 |     def forward(self, values, prefix_sum):
16 |         return JaggedMax()(values, prefix_sum)


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/custom_mod.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/custom_mod.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/custom_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef JAGGED_SOFTMAX_KERNEL_H
 2 | #define JAGGED_SOFTMAX_KERNEL_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void HostSoftMaxForward(cudaStream_t stream, float *input, float *output, long* ps, int bsize); 
 9 | 
10 | void HostSoftMaxBackward(cudaStream_t stream, float *gradOutput, float *gradInput, float *output, long* ps, int bsize);
11 | 
12 | void HostArgmaxForward(cudaStream_t stream, float *input, long *output, long* ps, int bsize); 
13 | 
14 | void HostMaxForward(cudaStream_t stream, float *input, float* vmax, long *idxes, long* ps, int bsize); 
15 | 
16 | void HostGLapNorm(cudaStream_t stream, long* row_indices, long* col_indices, float* p_v, float* p_norm, int nnz);
17 | 
18 | void HostGDegreeNorm(cudaStream_t stream, long* row_indices, float* p_v, float* p_norm, int nnz);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <assert.h>
  3 | 
  4 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output)
  5 | {
  6 |   values = THFloatTensor_newContiguous(values);
  7 |   THLongTensor_resizeAs(output, prefix_sum);
  8 | 
  9 |   float *input_data_base = values->storage->data + values->storageOffset;;  
 10 |   long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
 11 |   long *p_out = output->storage->data + output->storageOffset;
 12 |   long bsize = (long)prefix_sum->size[0];
 13 |   long i, d;
 14 | 
 15 |   #pragma omp parallel for private(i, d)
 16 |   for (i = 0; i < bsize; i++)
 17 |   {
 18 |     long offset = (i == 0) ? 0 : ps[i - 1];
 19 |     long n_ele = ps[i] - offset;
 20 | 
 21 |     float* input_data  = input_data_base  + offset;
 22 | 
 23 |     float max_input = -FLT_MAX;
 24 |     long max_id = -1;
 25 |     for (d = 0; d < n_ele; d++)
 26 |       if (input_data[d] > max_input)
 27 |       {
 28 |         max_input = input_data[d];
 29 |         max_id = d;
 30 |       }
 31 |     assert(max_id >= 0);
 32 |     p_out[i] = max_id;
 33 |   }
 34 | 
 35 |   THFloatTensor_free(values);
 36 |   return 1;
 37 | }
 38 | 
 39 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes)
 40 | {
 41 |   int64_t inputsize = prefix_sum->size[0];
 42 | 
 43 |   values = THFloatTensor_newContiguous(values);
 44 |   THLongTensor_resize1d(idxes, inputsize);
 45 |   THFloatTensor_resize1d(vmax, inputsize);
 46 | 
 47 |   float *input_data_base = values->storage->data + values->storageOffset;
 48 |   long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
 49 |   float *p_maxv = vmax->storage->data + vmax->storageOffset;
 50 |   long *p_i = idxes->storage->data + idxes->storageOffset;
 51 | 
 52 |   long bsize = (long)prefix_sum->size[0];
 53 |   long i, d;
 54 | 
 55 |   #pragma omp parallel for private(i, d)
 56 |   for (i = 0; i < bsize; i++)
 57 |   {
 58 |     long offset = (i == 0) ? 0 : ps[i - 1];
 59 |     long n_ele = ps[i] - offset;
 60 | 
 61 |     float* input_data  = input_data_base  + offset;
 62 | 
 63 |     float max_input = -FLT_MAX;
 64 |     long max_id = -1;
 65 |     for (d = 0; d < n_ele; d++)
 66 |       if (input_data[d] > max_input)
 67 |       {
 68 |         max_input = input_data[d];
 69 |         max_id = d;
 70 |       }
 71 |     assert(max_id >= 0);
 72 |     p_i[i] = max_id;
 73 |     p_maxv[i] = max_input;
 74 |   }
 75 | 
 76 |   THFloatTensor_free(values);
 77 |   return 1;  
 78 | }
 79 | 
 80 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output)
 81 | {
 82 |   logits = THFloatTensor_newContiguous(logits);
 83 |   THFloatTensor_resizeAs(output, logits);   
 84 |   float *input_data_base  = logits->storage->data + logits->storageOffset;//  THTensor_(data)(logits);  
 85 |   long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
 86 |   float *output_data_base = output->storage->data + output->storageOffset;
 87 |   uint64_t bsize = (uint64_t)prefix_sum->size[0];
 88 |   uint64_t i, d;
 89 | 
 90 |   #pragma omp parallel for private(i, d)
 91 |   for (i = 0; i < bsize; i++)
 92 |   {
 93 |     long offset = (i == 0) ? 0 : ps[i - 1];
 94 | 
 95 |     float* input_data  = input_data_base  + offset;
 96 |     float* output_data = output_data_base + offset;
 97 | 
 98 |     long n_ele = ps[i] - offset;
 99 |     float max_input = -FLT_MAX;
100 |     for (d = 0; d < n_ele; d++)
101 |       max_input = THMax(max_input, input_data[d]);
102 | 
103 |     double logsum = 0;
104 |     for (d = 0; d < n_ele; d++)
105 |       logsum += exp(input_data[d] - max_input);
106 |     logsum = max_input + log(logsum);
107 | 
108 |     for (d = 0; d < n_ele; d++)
109 |       output_data[d] = input_data[d] - logsum;
110 |   }
111 | 
112 |   THFloatTensor_free(logits);
113 |   return 1;
114 | }
115 | 
116 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input)
117 | {
118 |   grad_output = THFloatTensor_newContiguous(grad_output);
119 |   output = THFloatTensor_newContiguous(output); 
120 |   THFloatTensor_resizeAs(grad_input, grad_output); 
121 |   
122 |   float *output_data_base = output->storage->data + output->storageOffset;
123 |   float *gradOutput_data_base  = grad_output->storage->data + grad_output->storageOffset; 
124 |   long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
125 |   float *gradInput_data_base  = grad_input->storage->data + grad_input->storageOffset; 
126 |   
127 |   uint64_t bsize = (uint64_t)prefix_sum->size[0];
128 |   uint64_t i, d;
129 |   #pragma omp parallel for private(i, d)
130 |   for (i = 0; i < bsize; i++)
131 |   {
132 |     long offset = (i == 0) ? 0 : ps[i - 1];
133 |     float *gradInput_data  = gradInput_data_base  + offset;
134 |     float *output_data     = output_data_base     + offset;
135 |     float *gradOutput_data = gradOutput_data_base + offset;
136 | 
137 |     double sum = 0;
138 |     long n_ele = ps[i] - offset;
139 |     for (d = 0; d < n_ele; d++)
140 |       sum += gradOutput_data[d];
141 | 
142 |     for (d = 0; d < n_ele; d++)
143 |       gradInput_data[d] = gradOutput_data[d] - exp(output_data[d]) * sum;
144 |   }
145 | 
146 |   THFloatTensor_free(grad_output);
147 |   THFloatTensor_free(output);
148 |   return 1;
149 | }
150 | 
151 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm)
152 | {
153 |   uint64_t nnz = (uint64_t)values->size[0];
154 |   long *row_indices = indices->storage->data + indices->storageOffset;
155 |   long *col_indices = row_indices + indices->stride[0];
156 |   float *p_v = values->storage->data + values->storageOffset;
157 |   float *p_norm = norm->storage->data + norm->storageOffset;
158 | 
159 |   uint64_t i;
160 |   #pragma omp parallel for private(i)  
161 |   for (i = 0; i < nnz; i++)
162 |   {    
163 |     float norm = p_norm[ row_indices[i] ] * p_norm[ col_indices[i] ];
164 |     p_v[i] /= norm;
165 |   }
166 | 
167 |   return 1;
168 | }
169 | 
170 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm)
171 | {
172 |   uint64_t nnz = (uint64_t)values->size[0];
173 |   long *row_indices = indices->storage->data + indices->storageOffset;
174 |   float *p_v = values->storage->data + values->storageOffset;
175 |   float *p_norm = norm->storage->data + norm->storageOffset;
176 | 
177 |   uint64_t i;
178 |   #pragma omp parallel for private(i)
179 |   for (i = 0; i < nnz; i++)
180 |   {
181 |     float norm = p_norm[ row_indices[i] ];
182 |     p_v[i] /= norm;
183 |   }
184 | 
185 |   return 1;  
186 | }


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib.h:
--------------------------------------------------------------------------------
 1 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output);
 2 | 
 3 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input);
 4 | 
 5 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output);
 6 | 
 7 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes);
 8 | 
 9 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm);
10 | 
11 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm);


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | 
  3 | #include "custom_kernel.h"
  4 | 
  5 | // this symbol will be resolved automatically from PyTorch libs
  6 | extern THCState *state;
  7 | 
  8 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output)
  9 | {
 10 |   logits = THCudaTensor_newContiguous(state, logits);
 11 |   THCudaTensor_resizeAs(state, output, logits);
 12 |   
 13 |   float *input_data_base  = THCudaTensor_data(state, logits);
 14 |   long* ps = THCudaLongTensor_data(state, prefix_sum);
 15 |   float *output_data_base  = THCudaTensor_data(state, output);
 16 |   
 17 |   int bsize = (int)prefix_sum->size[0];
 18 |   cudaStream_t stream = THCState_getCurrentStream(state);
 19 |   HostSoftMaxForward(stream, input_data_base, output_data_base, ps, bsize); 
 20 | 
 21 |   THCudaTensor_free(state, logits);
 22 |   return 1;
 23 | }
 24 | 
 25 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input)
 26 | {
 27 |   output = THCudaTensor_newContiguous(state, output);
 28 |   grad_output = THCudaTensor_newContiguous(state, grad_output);
 29 | 
 30 |   THCudaTensor_resizeAs(state, grad_input, grad_output);
 31 |   float *output_data_base  = THCudaTensor_data(state, output);  
 32 |   float *gradOutput_data_base  = THCudaTensor_data(state, grad_output);
 33 |   long* ps = THCudaLongTensor_data(state, prefix_sum);
 34 |   float *gradInput_data_base  = THCudaTensor_data(state, grad_input);
 35 |   
 36 |   int bsize = (int)prefix_sum->size[0];
 37 |   cudaStream_t stream = THCState_getCurrentStream(state);
 38 |   HostSoftMaxBackward(stream, gradOutput_data_base, gradInput_data_base, output_data_base, ps, bsize); 
 39 |   THCudaTensor_free(state, grad_output);
 40 |   THCudaTensor_free(state, output);
 41 |   return 1;
 42 | }
 43 | 
 44 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output)
 45 | {
 46 |   values = THCudaTensor_newContiguous(state, values);
 47 |   THCudaLongTensor_resizeAs(state, output, prefix_sum);
 48 |   
 49 |   float *input_data_base  = THCudaTensor_data(state, values);
 50 |   long* ps = THCudaLongTensor_data(state, prefix_sum);
 51 |   long *output_data_base  = THCudaLongTensor_data(state, output);
 52 |   
 53 |   int bsize = (int)prefix_sum->size[0];
 54 |   cudaStream_t stream = THCState_getCurrentStream(state);
 55 |   HostArgmaxForward(stream, input_data_base, output_data_base, ps, bsize); 
 56 | 
 57 |   THCudaTensor_free(state, values);
 58 |   return 1;
 59 | }
 60 | 
 61 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes)
 62 | {
 63 |   int64_t inputsize = prefix_sum->size[0];
 64 |   values = THCudaTensor_newContiguous(state, values);
 65 |   THCudaLongTensor_resize1d(state, idxes, inputsize);
 66 |   THCudaTensor_resize1d(state, vmax, inputsize);
 67 | 
 68 |   float *input_data_base  = THCudaTensor_data(state, values);
 69 |   long* ps = THCudaLongTensor_data(state, prefix_sum);
 70 |   long *p_i  = THCudaLongTensor_data(state, idxes);
 71 |   float *p_maxv  = THCudaTensor_data(state, vmax);
 72 | 
 73 |   int bsize = (int)prefix_sum->size[0];
 74 |   cudaStream_t stream = THCState_getCurrentStream(state);
 75 |   HostMaxForward(stream, input_data_base, p_maxv, p_i, ps, bsize);
 76 | 
 77 |   THCudaTensor_free(state, values);
 78 |   return 1;  
 79 | }
 80 | 
 81 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm)
 82 | {
 83 |   uint64_t nnz = (uint64_t)values->size[0];
 84 |   long *row_indices = THCudaLongTensor_data(state, indices);
 85 |   long *col_indices = row_indices + THCudaLongTensor_stride(state, indices, 0);
 86 |   float *p_v = THCudaTensor_data(state, values);
 87 |   float *p_norm = THCudaTensor_data(state, norm);
 88 | 
 89 |   cudaStream_t stream = THCState_getCurrentStream(state);
 90 |   HostGLapNorm(stream, row_indices, col_indices, p_v, p_norm, nnz);
 91 |   return 1;
 92 | }
 93 | 
 94 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm)
 95 | {
 96 |   uint64_t nnz = (uint64_t)values->size[0];
 97 |   long *row_indices = THCudaLongTensor_data(state, indices);  
 98 |   float *p_v = THCudaTensor_data(state, values);
 99 |   float *p_norm = THCudaTensor_data(state, norm);
100 | 
101 |   cudaStream_t stream = THCState_getCurrentStream(state);
102 |   HostGDegreeNorm(stream, row_indices, p_v, p_norm, nnz);
103 |   return 1;  
104 | }


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib_cuda.h:
--------------------------------------------------------------------------------
 1 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output);
 2 | 
 3 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input);
 4 | 
 5 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output);
 6 | 
 7 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes);
 8 | 
 9 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm);
10 | 
11 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm);


--------------------------------------------------------------------------------
/adversarial_robustness/code/common/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | import torch.nn.functional as F
 5 | from torch.autograd import Variable
 6 | import numpy as np
 7 | from modules.custom_mod import JaggedLogSoftmaxModule, JaggedArgmaxModule, JaggedMaxModule
 8 | import sys
 9 | 
10 | def cpu_test():
11 |     mod = JaggedLogSoftmaxModule()
12 |     for i in range(10):
13 |         a = torch.rand(10000, 10)
14 |         b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])]))
15 |         c = mod(Variable(a), Variable(b))
16 |         c2 = F.log_softmax(Variable(a), dim=1)
17 |         print(torch.sum(torch.abs(c - c2)))
18 | 
19 |     a = torch.rand(100, 30)
20 |     b = torch.from_numpy(np.array([ (i + 1) * 30 for i in range(100)]))
21 |     va = Variable(a, requires_grad=True)
22 |     vb = Variable(b)
23 |     c = mod(va, vb)
24 |     t = F.torch.mean(c)
25 |     t.backward()
26 |     b1 = va.grad
27 | 
28 |     va = Variable(a, requires_grad=True)
29 |     c = F.log_softmax(va, dim=1)
30 |     t = F.torch.mean(c)
31 |     t.backward()
32 |     b2 = va.grad
33 | 
34 |     print(torch.sum(torch.abs(b1 - b2)))
35 | 
36 | def gpu_test():
37 |     mod = JaggedLogSoftmaxModule()
38 |     for i in range(10):
39 |         a = torch.rand(10000, 10).cuda()
40 |         b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
41 |         c1 = mod(Variable(a), Variable(b))
42 |         c2 = F.log_softmax(Variable(a), dim=1)
43 |         c3 = F.log_softmax(Variable(a.cpu()), dim=1).cuda()
44 |         print(torch.sum(torch.abs(c3 - c2)).data[0], torch.sum(torch.abs(c3 - c1)).data[0], torch.sum(torch.abs(c2 - c1)).data[0])
45 | 
46 |     a = torch.rand(1000, 100).cuda()
47 |     b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
48 |     va = Variable(a, requires_grad=True)
49 |     vb = Variable(b)
50 |     c = mod(va, vb)
51 |     t = F.torch.sum(c)
52 |     t.backward()
53 |     b1 = va.grad
54 | 
55 |     va = Variable(a, requires_grad=True)
56 |     c = F.log_softmax(va, dim=1)
57 |     t = F.torch.sum(c)
58 |     t.backward()
59 |     b2 = va.grad
60 | 
61 |     va = Variable(a.cpu(), requires_grad=True)
62 |     c = F.log_softmax(va, dim=1)
63 |     t = F.torch.sum(c)
64 |     t.backward()
65 |     b3 = va.grad.cuda()
66 |     print(torch.sum(torch.abs(b3 - b2)).data[0], torch.sum(torch.abs(b3 - b1)).data[0], torch.sum(torch.abs(b2 - b1)).data[0])
67 | 
68 | def argmax():
69 |     torch.manual_seed(1)    
70 |     mod = JaggedArgmaxModule()
71 | 
72 |     a = torch.rand(10, 4).cuda()
73 |     print(a)
74 |     b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
75 |     c = mod(Variable(a), Variable(b))
76 |     print(c)
77 | 
78 |     a = torch.randn(10).cuda()
79 |     print(a)
80 |     b = torch.LongTensor([2, 5, 9, 10]).cuda()
81 |     c = mod(Variable(a), Variable(b))
82 |     print(c)
83 | 
84 | torch.manual_seed(1)    
85 | mod = JaggedMaxModule()
86 | 
87 | a = torch.rand(10, 4).cuda()
88 | print(a)
89 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
90 | c1, c2 = mod(Variable(a), Variable(b))
91 | print(c1)
92 | print(c2)
93 | 
94 | a = torch.randn(10).cuda()
95 | print(a)
96 | b = torch.LongTensor([2, 5, 9, 10]).cuda()
97 | c = mod(Variable(a), Variable(b))
98 | print(c[0], c[1])


--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/data_util.py:
--------------------------------------------------------------------------------
 1 | # import cPickle as cp
 2 | import pickle as cp
 3 | import networkx as nx
 4 | 
 5 | def load_pkl(fname, num_graph):
 6 |     g_list = []
 7 |     with open(fname, 'rb') as f:
 8 |         for i in range(num_graph):
 9 |             g = cp.load(f)
10 |             g_list.append(g)
11 |     return g_list
12 | 
13 | def g2txt(g, label, fid):
14 |     fid.write('%d %d\n' % (len(g), label))
15 |     for i in range(len(g)):
16 |         fid.write('%d' % len(g.neighbors(i)))
17 |         for j in g.neighbors(i):
18 |             fid.write(' %d' % j)
19 |         fid.write('\n')


--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/data_util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/data_generator/data_util.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/gen_er_components.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | # import cPickle as cp
 4 | import pickle as cp
 5 | import random
 6 | import numpy as np
 7 | import networkx as nx
 8 | import time
 9 | from tqdm import tqdm
10 | 
11 | 
12 | def get_component():
13 |     cur_n = np.random.randint(max_n - min_n + 1) + min_n
14 |     g = nx.erdos_renyi_graph(n = cur_n, p = p)
15 | 
16 |     comps = [c for c in nx.connected_component_subgraphs(g)]
17 |     random.shuffle(comps)
18 |     for i in range(1, len(comps)):
19 |         x = random.choice(comps[i - 1].nodes())
20 |         y = random.choice(comps[i].nodes())
21 |         g.add_edge(x, y)
22 |     assert nx.is_connected(g)
23 |     return g
24 | 
25 | if __name__ == '__main__':
26 |     save_dir = None
27 |     max_n = None
28 |     min_n = None
29 |     num_graph = None
30 |     p = None
31 |     n_comp = None
32 |     for i in range(1, len(sys.argv), 2):
33 |         if sys.argv[i] == '-save_dir':
34 |             save_dir = sys.argv[i + 1]
35 |         if sys.argv[i] == '-max_n':
36 |             max_n = int(sys.argv[i + 1])
37 |         if sys.argv[i] == '-min_n':
38 |             min_n = int(sys.argv[i + 1])
39 |         if sys.argv[i] == '-num_graph':
40 |             num_graph = int(sys.argv[i + 1])
41 |         if sys.argv[i] == '-p':
42 |             p = float(sys.argv[i + 1])
43 |         if sys.argv[i] == '-n_comp':
44 |             n_comp = int(sys.argv[i + 1])
45 | 
46 |     assert save_dir is not None
47 |     assert max_n is not None
48 |     assert min_n is not None
49 |     assert num_graph is not None
50 |     assert p is not None
51 |     assert n_comp is not None
52 | 
53 |     fout_name = '%s/ncomp-%d-nrange-%d-%d-n_graph-%d-p-%.2f.pkl' % (save_dir, n_comp, min_n, max_n, num_graph, p)
54 |     print('Final Output: ' + fout_name)
55 |     print("Generating graphs...")
56 |     min_n = min_n // n_comp
57 |     max_n = max_n // n_comp
58 | 
59 |     for i in tqdm(range(num_graph)):
60 | 
61 |         for j in range(n_comp):
62 |             g = get_component()
63 |             
64 |             if j == 0:
65 |                 g_all = g
66 |             else:
67 |                 g_all = nx.disjoint_union(g_all, g)
68 |         assert nx.number_connected_components(g_all) == n_comp
69 | 
70 |         with open(fout_name, 'ab') as fout:
71 |             cp.dump(g_all, fout, cp.HIGHEST_PROTOCOL)
72 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/pkl_dump.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | min_n=90
 4 | max_n=100
 5 | p=0.02
 6 | output_root=../../dropbox/data/components
 7 | 
 8 | if [ ! -e $output_root ];
 9 | then
10 |     mkdir -p $output_root
11 | fi
12 | 
13 | for t_c in 1 2 3 4 5; do
14 | 
15 | n_comp=$t_c
16 | 
17 | python gen_er_components.py \
18 |     -save_dir $output_root \
19 |     -max_n $max_n \
20 |     -min_n $min_n \
21 |     -num_graph 5000 \
22 |     -p $p \
23 |     -n_comp $n_comp
24 | 
25 | done
26 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/collect_rl_results.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | if __name__ == '__main__':
 4 | 	result_root = '../../dropbox/scratch/results/graph_classification/components'
 5 | 	targets = os.listdir(result_root)
 6 | 	targets = sorted(targets)
 7 | 	for fname in targets:
 8 |                 if fname[0] == '.':
 9 |                     continue
10 | 		configs = os.listdir(result_root + '/' + fname)
11 | 		best_num = 100
12 | 		best_config = None
13 | 
14 | 		for config in configs:
15 |                         if config[0] == '.' or 'epoch-best' in config:
16 |                             continue
17 | 			if '0.1' in config:
18 | 				continue
19 | 			result = result_root + '/' + fname + '/' + config + '/epoch-best.txt'
20 | 			with open(result, 'r') as f:
21 | 				num = float(f.readline().strip())
22 | 			if num < best_num:
23 | 				best_config = config
24 | 				best_num = num
25 | 		print fname, best_config, best_num	
26 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/er_trivial_attack.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import sys
 5 | import numpy as np
 6 | import torch
 7 | import networkx as nx
 8 | import random
 9 | from torch.autograd import Variable
10 | from torch.nn.parameter import Parameter
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 | from tqdm import tqdm
15 | from copy import deepcopy
16 | 
17 | from q_net import NStepQNet, QNet, greedy_actions
18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
19 | from cmd_args import cmd_args
20 | from graph_embedding import S2VGraph
21 | 
22 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model
23 | 
24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
25 | from graph_common import loop_dataset, load_er_data
26 | 
27 | def propose_attack(model, s2v_g, num_added=1):
28 |     g = s2v_g.to_networkx()
29 |     comps = [c for c in nx.connected_component_subgraphs(g)]
30 |     set_id = {}
31 | 
32 |     for i in range(len(comps)):
33 |         for j in comps[i].nodes():
34 |             set_id[j] = i
35 | 
36 |     cand = []
37 |     for i in range(len(g) - 1):
38 |         for j in range(i + 1, len(g)):
39 |             if set_id[i] != set_id[j] or i == j:
40 |                 continue
41 |             cand.append('%d %d' % (i, j))
42 |     
43 |     if cmd_args.rand_att_type == 'random':
44 |         added = np.random.choice(cand, num_added)
45 |         added = [(int(w.split()[0]), int(w.split()[1])) for w in added]
46 |         g.add_edges_from(added)
47 |         return S2VGraph(g, s2v_g.label)
48 |     elif cmd_args.rand_att_type == 'exhaust':
49 |         g_list = []
50 |         for c in cand:
51 |             x, y = [int(w) for w in c.split()]
52 |             g2 = g.copy()
53 |             g2.add_edge(x, y)
54 |             g_list.append(S2VGraph(g2, s2v_g.label))
55 |         _, _, acc = model(g_list)
56 |         ans = g_list[0]
57 |         for i in range(len(g_list)):
58 |             if acc.numpy()[i] < 1:
59 |                 ans = g_list[i]
60 |                 break
61 |         return ans
62 |     else:
63 |         raise NotImplementedError
64 | 
65 | if __name__ == '__main__':
66 |     random.seed(cmd_args.seed)
67 |     np.random.seed(cmd_args.seed)
68 |     torch.manual_seed(cmd_args.seed)
69 | 
70 |     label_map, train_glist, test_glist = load_er_data()
71 | 
72 |     base_classifier = load_base_model(label_map, test_glist)
73 | 
74 |     new_test_list = []
75 |     for g in tqdm(test_glist):
76 |         new_test_list.append(propose_attack(base_classifier, g))
77 | 
78 |     test_graphs(base_classifier, new_test_list)


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/grad_attack.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | import numpy as np
 5 | import torch
 6 | import networkx as nx
 7 | import random
 8 | from torch.autograd import Variable
 9 | from torch.nn.parameter import Parameter
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | from tqdm import tqdm
14 | from copy import deepcopy
15 | 
16 | from q_net import NStepQNet, QNet, greedy_actions
17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
18 | from cmd_args import cmd_args
19 | from graph_embedding import S2VGraph
20 | 
21 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model
22 | 
23 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
24 | from graph_common import loop_dataset, load_er_data
25 | 
26 | def propose_attack(model, s2v_g, num_added=1):
27 |     g = s2v_g.to_networkx()
28 |     comps = [c for c in nx.connected_component_subgraphs(g)]
29 |     set_id = {}
30 |     for i in range(len(comps)):
31 |         for j in comps[i].nodes():
32 |             set_id[j] = i
33 | 
34 |     node_feat, edge_feat, labels = model.PrepareFeatureLabel([s2v_g])
35 |     if cmd_args.ctx == 'gpu':
36 |         node_feat = node_feat.cuda()
37 |         labels = labels.cuda()
38 | 
39 |     cand_list = [s2v_g]
40 |     for l in range( len(model.label_map) ):
41 |         print('66666666')
42 |         if l == s2v_g.label:
43 |             continue
44 |         labels[0] = l
45 |         model.zero_grad()
46 |         (_, embed), sp_dict = model.s2v([s2v_g], node_feat, edge_feat, pool_global=True, n2n_grad=True)
47 |         print('77777777')
48 |         _, loss, _ = model.mlp(embed, labels)
49 |         print(loss)
50 |         loss.backward()
51 |         grad = sp_dict['n2n'].grad.data.numpy().flatten()    
52 |         idxes = np.argsort(grad)
53 |         added = []
54 | 
55 |         for p in idxes:
56 |             x = p // s2v_g.num_nodes
57 |             y = p % s2v_g.num_nodes
58 |             if set_id[x] != set_id[y] or x == y or grad[p] > 0:
59 |                 continue
60 |             added.append((x, y))
61 |             if len(added) >= num_added:
62 |                 break
63 |         if len(added) == 0:
64 |             continue
65 |         g2 = g.copy()
66 |         g2.add_edges_from(added)
67 | 
68 |         cand_list.append( S2VGraph(g2, s2v_g.label) )
69 |     
70 |     _, _, acc = model(cand_list)
71 |     acc = acc.double().cpu().numpy()
72 |     for i in range(len(cand_list)):
73 |         if acc[i] < 1.0:
74 |             return cand_list[i]
75 |     return cand_list[0]
76 | 
77 | if __name__ == '__main__':
78 |     random.seed(cmd_args.seed)
79 |     np.random.seed(cmd_args.seed)
80 |     torch.manual_seed(cmd_args.seed)
81 | 
82 |     label_map, train_glist, test_glist = load_er_data()
83 | 
84 |     base_classifier = load_base_model(label_map, test_glist)
85 | 
86 |     new_test_list = []
87 |     for g in tqdm(test_glist):
88 |         new_test_list.append(propose_attack(base_classifier, g))
89 | 
90 |     test_graphs(base_classifier, new_test_list)


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/nstep_replay_mem.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | 
  4 | class NstepReplaySubMemCell(object):
  5 |     def __init__(self, memory_size):
  6 |         self.memory_size = memory_size
  7 | 
  8 |         self.actions = [None] * self.memory_size
  9 |         self.rewards = [None] * self.memory_size
 10 |         self.states = [None] * self.memory_size
 11 |         self.s_primes = [None] * self.memory_size
 12 |         self.terminals = [None] * self.memory_size
 13 | 
 14 |         self.count = 0
 15 |         self.current = 0
 16 | 
 17 |     def add(self, s_t, a_t, r_t, s_prime, terminal):
 18 |         self.actions[self.current] = a_t
 19 |         self.rewards[self.current] = r_t
 20 |         self.states[self.current] = s_t
 21 |         self.s_primes[self.current] = s_prime
 22 |         self.terminals[self.current] = terminal
 23 | 
 24 |         self.count = max(self.count, self.current + 1)
 25 |         self.current = (self.current + 1) % self.memory_size
 26 | 
 27 |     def add_list(self, list_st, list_at, list_rt, list_sp, list_term):
 28 |         for i in range(len(list_st)):
 29 |             if list_sp is None:
 30 |                 sp = (None, None, None)
 31 |             else:
 32 |                 sp = list_sp[i]
 33 |             self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i])
 34 | 
 35 |     def sample(self, batch_size):
 36 |         assert self.count >= batch_size
 37 | 
 38 |         list_st = []
 39 |         list_at = []
 40 |         list_rt = []
 41 |         list_s_primes = []
 42 |         list_term = []
 43 |         
 44 |         for i in range(batch_size):
 45 |             idx = random.randint(0, self.count - 1)
 46 |             list_st.append(self.states[idx])
 47 |             list_at.append(self.actions[idx])
 48 |             list_rt.append(float(self.rewards[idx]))
 49 |             list_s_primes.append(self.s_primes[idx])
 50 |             list_term.append(self.terminals[idx])
 51 | 
 52 |         return list_st, list_at, list_rt, list_s_primes, list_term
 53 | 
 54 | def hash_state_action(s_t, a_t):
 55 |     key = s_t[0]
 56 |     base = 179424673
 57 |     for e in s_t[1].directed_edges:
 58 |         key = (key * base + e[0]) % base
 59 |         key = (key * base + e[1]) % base
 60 |     if s_t[2] is not None:
 61 |         key = (key * base + s_t[2]) % base
 62 |     else:
 63 |         key = (key * base) % base
 64 |     
 65 |     key = (key * base + a_t) % base
 66 |     return key
 67 | 
 68 | class NstepReplayMemCell(object):
 69 |     def __init__(self, memory_size, balance_sample = False):
 70 |         self.sub_list = []
 71 |         self.balance_sample = balance_sample
 72 |         self.sub_list.append(NstepReplaySubMemCell(memory_size))
 73 |         if balance_sample:
 74 |             self.sub_list.append(NstepReplaySubMemCell(memory_size))
 75 |             self.state_set = set()
 76 | 
 77 |     def add(self, s_t, a_t, r_t, s_prime, terminal):
 78 |         if not self.balance_sample or r_t < 0:
 79 |             self.sub_list[0].add(s_t, a_t, r_t, s_prime, terminal)
 80 |         else:
 81 |             assert r_t > 0
 82 |             key = hash_state_action(s_t, a_t)
 83 |             if key in self.state_set:
 84 |                 return
 85 |             self.state_set.add(key)
 86 |             self.sub_list[1].add(s_t, a_t, r_t, s_prime, terminal)
 87 |     
 88 |     def sample(self, batch_size):
 89 |         if not self.balance_sample or self.sub_list[1].count < batch_size:
 90 |             return self.sub_list[0].sample(batch_size)
 91 |         
 92 |         list_st, list_at, list_rt, list_s_primes, list_term = self.sub_list[0].sample(batch_size // 2)
 93 |         list_st2, list_at2, list_rt2, list_s_primes2, list_term2 = self.sub_list[1].sample(batch_size - batch_size // 2)
 94 |         
 95 |         return list_st + list_st2, list_at + list_at2, list_rt + list_rt2, list_s_primes + list_s_primes2, list_term + list_term2
 96 | 
 97 | class NstepReplayMem(object):
 98 |     def __init__(self, memory_size, n_steps, balance_sample = False):
 99 |         self.mem_cells = []
100 |         for i in range(n_steps - 1):
101 |             self.mem_cells.append(NstepReplayMemCell(memory_size, False))
102 |         self.mem_cells.append(NstepReplayMemCell(memory_size, balance_sample))
103 | 
104 |         self.n_steps = n_steps
105 |         self.memory_size = memory_size
106 | 
107 |     def add(self, s_t, a_t, r_t, s_prime, terminal, t):
108 |         assert t >= 0 and t < self.n_steps
109 |         if t == self.n_steps - 1:
110 |             assert terminal
111 |         else:
112 |             assert not terminal
113 |         self.mem_cells[t].add(s_t, a_t, r_t, s_prime, terminal)        
114 | 
115 |     def add_list(self, list_st, list_at, list_rt, list_sp, list_term, t):
116 |         for i in range(len(list_st)):
117 |             if list_sp is None:
118 |                 sp = (None, None, None)
119 |             else:
120 |                 sp = list_sp[i]
121 |             self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i], t)
122 | 
123 |     def sample(self, batch_size, t = None):
124 |         if t is None:
125 |             t = np.random.randint(self.n_steps)
126 |         list_st, list_at, list_rt, list_s_primes, list_term = self.mem_cells[t].sample(batch_size)
127 |         return t, list_st, list_at, list_rt, list_s_primes, list_term


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/plot_dqn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | import torch
  7 | import networkx as nx
  8 | import random
  9 | from torch.autograd import Variable
 10 | from torch.nn.parameter import Parameter
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | from tqdm import tqdm
 15 | from copy import deepcopy
 16 | 
 17 | from q_net import NStepQNet, QNet, greedy_actions
 18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
 19 | from cmd_args import cmd_args
 20 | 
 21 | from rl_common import GraphEdgeEnv, local_args, load_graphs, test_graphs, load_base_model, attackable, get_supervision
 22 | from nstep_replay_mem import NstepReplayMem
 23 | 
 24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
 25 | from graph_common import loop_dataset
 26 | 
 27 | class Agent(object):
 28 |     def __init__(self, g_list, test_g_list, env):
 29 |         self.g_list = g_list
 30 |         if test_g_list is None:
 31 |             self.test_g_list = g_list
 32 |         else:
 33 |             self.test_g_list = test_g_list
 34 |         self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2)
 35 |         self.env = env
 36 |         # self.net = QNet()
 37 |         self.net = NStepQNet(2)
 38 |         self.old_net = NStepQNet(2)
 39 |         if cmd_args.ctx == 'gpu':
 40 |             self.net = self.net.cuda()
 41 |             self.old_net = self.old_net.cuda()
 42 |         self.eps_start = 1.0
 43 |         self.eps_end = 1.0
 44 |         self.eps_step = 10000
 45 |         self.burn_in = 100        
 46 |         self.step = 0
 47 | 
 48 |         self.best_eval = None
 49 |         self.pos = 0
 50 |         self.sample_idxes = list(range(len(g_list)))
 51 |         random.shuffle(self.sample_idxes)
 52 |         self.take_snapshot()
 53 | 
 54 |     def take_snapshot(self):
 55 |         self.old_net.load_state_dict(self.net.state_dict())
 56 | 
 57 |     def make_actions(self, time_t, greedy=False):
 58 |         self.eps = self.eps_end + max(0., (self.eps_start - self.eps_end)
 59 |                 * (self.eps_step - max(0., self.step)) / self.eps_step)
 60 | 
 61 |         if random.random() < self.eps and not greedy:
 62 |             actions = self.env.uniformRandActions()
 63 |         else:
 64 |             cur_state = self.env.getStateRef()
 65 |             actions, _, _ = self.net(time_t, cur_state, None, greedy_acts=True)
 66 |             actions = list(actions.cpu().numpy())
 67 |             
 68 |         return actions
 69 | 
 70 |     def run_simulation(self):
 71 |         if (self.pos + 1) * cmd_args.batch_size > len(self.sample_idxes):
 72 |             self.pos = 0
 73 |             random.shuffle(self.sample_idxes)
 74 | 
 75 |         selected_idx = self.sample_idxes[self.pos * cmd_args.batch_size : (self.pos + 1) * cmd_args.batch_size]
 76 |         self.pos += 1
 77 |         self.env.setup([self.g_list[idx] for idx in selected_idx])
 78 | 
 79 |         t = 0
 80 |         while not env.isTerminal():
 81 |             list_at = self.make_actions(t)
 82 |             list_st = self.env.cloneState()
 83 |             self.env.step(list_at)
 84 | 
 85 |             assert (env.rewards is not None) == env.isTerminal()
 86 |             if env.isTerminal():
 87 |                 rewards = env.rewards
 88 |                 s_prime = None
 89 |             else:
 90 |                 rewards = np.zeros(len(list_at), dtype=np.float32)
 91 |                 s_prime = self.env.cloneState()
 92 | 
 93 |             self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t)
 94 |             t += 1
 95 | 
 96 |     def eval(self):
 97 |         self.env.setup(deepcopy(self.test_g_list))
 98 |         t = 0
 99 |         while not self.env.isTerminal():
100 |             list_at = self.make_actions(t, greedy=True)
101 |             self.env.step(list_at)
102 |             t += 1
103 |         test_loss = loop_dataset(env.g_list, env.classifier, list(range(len(env.g_list))), epoch=101)
104 |         print('\033[93m average test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
105 |         with open('%s/edge_added.txt' % cmd_args.save_dir, 'w') as f:
106 |             for i in range(len(self.test_g_list)):
107 |                 f.write('%d %d ' % (self.test_g_list[i].label, env.pred[i] + 1))
108 |                 f.write('%d %d\n' % env.added_edges[i])
109 |         reward = np.mean(self.env.rewards)
110 |         print(reward)
111 |         return reward, test_loss[1]
112 | 
113 | if __name__ == '__main__':
114 |     random.seed(cmd_args.seed)
115 |     np.random.seed(cmd_args.seed)
116 |     torch.manual_seed(cmd_args.seed)
117 | 
118 |     label_map, _, g_list = load_graphs()
119 |     # random.shuffle(g_list)
120 |     base_classifier = load_base_model(label_map, g_list)
121 |     env = GraphEdgeEnv(base_classifier, n_edges = 1)
122 |     
123 |     if cmd_args.frac_meta > 0:
124 |         num_train = int( len(g_list) * (1 - cmd_args.frac_meta) )
125 |         agent = Agent(g_list[:num_train], g_list[num_train:], env)
126 |     else:
127 |         agent = Agent(g_list, None, env)
128 |     
129 |     assert cmd_args.phase == 'test'
130 |     agent.net.load_state_dict(torch.load(cmd_args.save_dir + '/epoch-best.model'))
131 |     agent.eval()
132 |         # env.setup([g_list[idx] for idx in selected_idx])
133 |         # t = 0
134 |         # while not env.isTerminal():
135 |         #     policy_net = net_list[t]
136 |         #     t += 1            
137 |         #     batch_graph, picked_nodes = env.getState()
138 |         #     log_probs, prefix_sum = policy_net(batch_graph, picked_nodes)
139 |         #     actions = env.sampleActions(torch.exp(log_probs).data.cpu().numpy(), prefix_sum.data.cpu().numpy(), greedy=True)
140 |         #     env.step(actions)
141 | 
142 |         # test_loss = loop_dataset(env.g_list, base_classifier, list(range(len(env.g_list))))
143 |         # print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
144 |         
145 |         # print(np.mean(avg_rewards), np.mean(env.rewards))
146 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/plot_dqn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | dropbox=../../dropbox
 4 | 
 5 | min_n=40
 6 | max_n=50
 7 | p=0.05
 8 | min_c=1
 9 | max_c=3
10 | base_lv=4
11 | data_folder=$dropbox/data/components
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
14 | 
15 | lr=0.001
16 | max_lv=5
17 | frac_meta=0
18 | 
19 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold
20 | 
21 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta}
22 | 
23 | python plot_dqn.py \
24 |     -data_folder $data_folder \
25 |     -save_dir $output_root \
26 |     -max_n $max_n \
27 |     -min_n $min_n \
28 |     -max_lv $max_lv \
29 |     -frac_meta $frac_meta \
30 |     -min_c $min_c \
31 |     -max_c $max_c \
32 |     -n_graphs 5000 \
33 |     -er_p $p \
34 |     -learning_rate $lr \
35 |     -base_model_dump $base_model_dump \
36 |     -logfile $output_root/log.txt \
37 |     $@
38 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/q_net.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import sys
  4 | import numpy as np
  5 | import torch
  6 | import networkx as nx
  7 | import random
  8 | from torch.autograd import Variable
  9 | from torch.nn.parameter import Parameter
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | import torch.optim as optim
 13 | from tqdm import tqdm
 14 | sys.path.append('%s/../../pytorch_structure2vec/s2v_lib' % os.path.dirname(os.path.realpath(__file__)))
 15 | from pytorch_util import weights_init
 16 | 
 17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
 18 | from graph_embedding import EmbedMeanField, EmbedLoopyBP
 19 | from cmd_args import cmd_args
 20 | from modules.custom_mod import JaggedMaxModule
 21 | from rl_common import local_args
 22 | 
 23 | def greedy_actions(q_values, v_p, banned_list):
 24 |     actions = []
 25 |     offset = 0
 26 |     banned_acts = []
 27 |     prefix_sum = v_p.data.cpu().numpy()
 28 |     for i in range(len(prefix_sum)):
 29 |         n_nodes = prefix_sum[i] - offset
 30 | 
 31 |         if banned_list is not None and banned_list[i] is not None:
 32 |             for j in banned_list[i]:
 33 |                 banned_acts.append(offset + j)                    
 34 |         offset = prefix_sum[i]
 35 | 
 36 |     q_values = q_values.data.clone()
 37 |     if len(banned_acts):
 38 |         q_values[banned_acts, :] = np.finfo(np.float64).min
 39 |     jmax = JaggedMaxModule()
 40 |     values, actions = jmax(Variable(q_values), v_p)
 41 | 
 42 |     return actions.data, values.data
 43 |     
 44 | class QNet(nn.Module):
 45 |     def __init__(self, s2v_module = None):
 46 |         super(QNet, self).__init__()
 47 |         if cmd_args.gm == 'mean_field':
 48 |             model = EmbedMeanField
 49 |         elif cmd_args.gm == 'loopy_bp':
 50 |             model = EmbedLoopyBP
 51 |         else:
 52 |             print('unknown gm %s' % cmd_args.gm)
 53 |             sys.exit()
 54 | 
 55 |         if cmd_args.out_dim == 0:
 56 |             embed_dim = cmd_args.latent_dim
 57 |         else:
 58 |             embed_dim = cmd_args.out_dim
 59 |         if local_args.mlp_hidden:
 60 |             self.linear_1 = nn.Linear(embed_dim * 2, local_args.mlp_hidden)
 61 |             self.linear_out = nn.Linear(local_args.mlp_hidden, 1)
 62 |         else:
 63 |             self.linear_out = nn.Linear(embed_dim * 2, 1)
 64 |         weights_init(self)
 65 | 
 66 |         if s2v_module is None:
 67 |             self.s2v = model(latent_dim=cmd_args.latent_dim, 
 68 |                             output_dim=cmd_args.out_dim,
 69 |                             num_node_feats=2,
 70 |                             num_edge_feats=0,
 71 |                             max_lv=cmd_args.max_lv)
 72 |         else:
 73 |             self.s2v = s2v_module
 74 | 
 75 |     def PrepareFeatures(self, batch_graph, picked_nodes):
 76 |         n_nodes = 0
 77 |         prefix_sum = []
 78 |         picked_ones = []
 79 |         for i in range(len(batch_graph)):
 80 |             if picked_nodes is not None and picked_nodes[i] is not None:
 81 |                 assert picked_nodes[i] >= 0 and picked_nodes[i] < batch_graph[i].num_nodes
 82 |                 picked_ones.append(n_nodes + picked_nodes[i])
 83 |             n_nodes += batch_graph[i].num_nodes
 84 |             prefix_sum.append(n_nodes)
 85 | 
 86 |         node_feat = torch.zeros(n_nodes, 2)
 87 |         node_feat[:, 0] = 1.0
 88 | 
 89 |         if len(picked_ones):
 90 |             node_feat.numpy()[picked_ones, 1] = 1.0
 91 |             node_feat.numpy()[picked_ones, 0] = 0.0
 92 | 
 93 |         return node_feat, torch.LongTensor(prefix_sum)
 94 | 
 95 |     def add_offset(self, actions, v_p):
 96 |         prefix_sum = v_p.data.cpu().numpy()
 97 | 
 98 |         shifted = []        
 99 |         for i in range(len(prefix_sum)):
100 |             if i > 0:
101 |                 offset = prefix_sum[i - 1]
102 |             else:
103 |                 offset = 0
104 |             shifted.append(actions[i] + offset)
105 | 
106 |         return shifted
107 | 
108 |     def rep_global_embed(self, graph_embed, v_p):
109 |         prefix_sum = v_p.data.cpu().numpy()
110 | 
111 |         rep_idx = []        
112 |         for i in range(len(prefix_sum)):
113 |             if i == 0:
114 |                 n_nodes = prefix_sum[i]
115 |             else:
116 |                 n_nodes = prefix_sum[i] - prefix_sum[i - 1]
117 |             rep_idx += [i] * n_nodes
118 | 
119 |         rep_idx = Variable(torch.LongTensor(rep_idx))
120 |         if cmd_args.ctx == 'gpu':
121 |             rep_idx = rep_idx.cuda()
122 |         graph_embed = torch.index_select(graph_embed, 0, rep_idx)
123 |         return graph_embed
124 | 
125 |     def forward(self, time_t, states, actions, greedy_acts = False):
126 |         batch_graph, picked_nodes, banned_list = zip(*states)
127 | 
128 |         node_feat, prefix_sum = self.PrepareFeatures(batch_graph, picked_nodes)
129 |         
130 |         if cmd_args.ctx == 'gpu':
131 |             node_feat = node_feat.cuda()
132 |             prefix_sum = prefix_sum.cuda()
133 |         prefix_sum = Variable(prefix_sum)
134 | 
135 |         embed, graph_embed = self.s2v(batch_graph, node_feat, None, pool_global=True)
136 | 
137 |         if actions is None:
138 |             graph_embed = self.rep_global_embed(graph_embed, prefix_sum)
139 |         else:
140 |             shifted = self.add_offset(actions, prefix_sum)
141 |             embed = embed[shifted, :]
142 |         
143 |         embed_s_a = torch.cat((embed, graph_embed), dim=1)
144 | 
145 |         if local_args.mlp_hidden:
146 |             embed_s_a = F.relu( self.linear_1(embed_s_a) )
147 |         
148 |         raw_pred = self.linear_out(embed_s_a)
149 |         
150 |         if greedy_acts:
151 |             actions, _ = greedy_actions(raw_pred, prefix_sum, banned_list)
152 |             
153 |         return actions, raw_pred, prefix_sum
154 | 
155 | class NStepQNet(nn.Module):
156 |     def __init__(self, num_steps, s2v_module = None):
157 |         super(NStepQNet, self).__init__()
158 | 
159 |         list_mod = [QNet(s2v_module)]
160 | 
161 |         for i in range(1, num_steps):
162 |             list_mod.append(QNet(list_mod[0].s2v))
163 |         
164 |         self.list_mod = nn.ModuleList(list_mod)
165 | 
166 |         self.num_steps = num_steps
167 | 
168 |     def forward(self, time_t, states, actions, greedy_acts = False):
169 |         assert time_t >= 0 and time_t < self.num_steps
170 | 
171 |         return self.list_mod[time_t](time_t, states, actions, greedy_acts)
172 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/q_net.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/q_net.pyc


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_dqn.sh:
--------------------------------------------------------------------------------
 1 | dropbox=../../dropbox
 2 | 
 3 | min_n=$1
 4 | max_n=$2
 5 | p=$3
 6 | min_c=1
 7 | max_c=3
 8 | base_lv=$4
 9 | data_folder=$dropbox/data/components
10 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
11 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
12 | 
13 | lr=0.001
14 | max_lv=5
15 | frac_meta=0
16 | 
17 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold
18 | 
19 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta}
20 | 
21 | if [ ! -e $output_root ];
22 | then
23 |     mkdir -p $output_root
24 | fi
25 | 
26 | python dqn.py \
27 |     -data_folder $data_folder \
28 |     -save_dir $output_root \
29 |     -max_n $max_n \
30 |     -min_n $min_n \
31 |     -max_lv $max_lv \
32 |     -frac_meta $frac_meta \
33 |     -min_c $min_c \
34 |     -max_c $max_c \
35 |     -n_graphs 5000 \
36 |     -er_p $p \
37 |     -learning_rate $lr \
38 |     -base_model_dump $base_model_dump \
39 |     -logfile $output_root/log.txt \
40 |     $@
41 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_ga.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | dropbox=../../dropbox
 4 | 
 5 | min_n=$1
 6 | max_n=$2
 7 | p=$3
 8 | min_c=1
 9 | max_c=3
10 | base_lv=$4
11 | data_folder=$dropbox/data/components
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
14 | 
15 | idx_start=0
16 | num=2000
17 | pop=50
18 | cross=0.1
19 | mutate=0.2
20 | rounds=10
21 | 
22 | output_base=$HOME/scratch/results/graph_classification/components/$save_fold
23 | output_root=$output_base/ga-p-${pop}-c-${cross}-m-${mutate}-r-${rounds}
24 | 
25 | if [ ! -e $output_root ];
26 | then
27 |     mkdir -p $output_root
28 | fi
29 | 
30 | python genetic_algorithm.py \
31 |     -data_folder $data_folder \
32 |     -save_dir $output_root \
33 |     -idx_start $idx_start \
34 |     -population_size $pop \
35 |     -cross_rate $cross \
36 |     -mutate_rate $mutate \
37 |     -rounds $rounds \
38 |     -num_instances $num \
39 |     -max_n $max_n \
40 |     -min_n $min_n \
41 |     -min_c $min_c \
42 |     -max_c $max_c \
43 |     -n_graphs 5000 \
44 |     -er_p $p \
45 |     -base_model_dump $base_model_dump \
46 |     $@
47 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_grad.sh:
--------------------------------------------------------------------------------
 1 | min_n=$1
 2 | max_n=$2
 3 | p=$3
 4 | dropbox=../../dropbox/
 5 | data_folder=$dropbox/data/components
 6 | min_c=1
 7 | max_c=3
 8 | max_lv=$4
 9 | rand=random
10 | 
11 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
12 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
13 | 
14 | output_root=./saved
15 | 
16 | if [ ! -e $output_root ];
17 | then
18 |     mkdir -p $output_root
19 | fi
20 | 
21 | python grad_attack.py \
22 |     -data_folder $data_folder \
23 |     -save_dir $output_root \
24 |     -max_n $max_n \
25 |     -min_n $min_n \
26 |     -rand_att_type $rand \
27 |     -min_c $min_c \
28 |     -max_c $max_c \
29 |     -base_model_dump $base_model_dump \
30 |     -n_graphs 5000 \
31 |     -er_p $p \
32 |     $@
33 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_trivial.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | min_n=$1
 4 | max_n=$2
 5 | p=$3
 6 | dropbox=../../dropbox/
 7 | data_folder=$dropbox/data/components
 8 | min_c=1
 9 | max_c=3
10 | max_lv=$4
11 | # rand=exhaust
12 | rand=random
13 | 
14 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
15 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
16 | 
17 | output_root=./saved
18 | 
19 | if [ ! -e $output_root ];
20 | then
21 |     mkdir -p $output_root
22 | fi
23 | 
24 | python er_trivial_attack.py \
25 |     -data_folder $data_folder \
26 |     -save_dir $output_root \
27 |     -max_n $max_n \
28 |     -min_n $min_n \
29 |     -max_lv $max_lv \
30 |     -rand_att_type $rand \
31 |     -min_c $min_c \
32 |     -max_c $max_c \
33 |     -base_model_dump $base_model_dump \
34 |     -n_graphs 5000 \
35 |     -er_p $p \
36 |     $@
37 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/er_components.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import sys
 5 | import numpy as np
 6 | import torch
 7 | import random
 8 | from torch.autograd import Variable
 9 | from torch.nn.parameter import Parameter
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | from tqdm import tqdm
14 | # import cPickle as cp
15 | import pickle as cp
16 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
17 | from cmd_args import cmd_args, save_args
18 | from dnn import GraphClassifier
19 | from graph_embedding import S2VGraph
20 | 
21 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__)))
22 | from data_util import load_pkl
23 | 
24 | from graph_common import loop_dataset, load_er_data
25 | 
26 | if __name__ == '__main__':
27 |     random.seed(cmd_args.seed)
28 |     np.random.seed(cmd_args.seed)
29 |     torch.manual_seed(cmd_args.seed)    
30 |     
31 |     label_map, train_glist, test_glist = load_er_data()    
32 |         
33 |     if cmd_args.saved_model is not None and cmd_args.saved_model != '':        
34 |         print('loading model from %s' % cmd_args.saved_model)
35 |         with open('%s-args.pkl' % cmd_args.saved_model, 'rb') as f:
36 |             base_args = cp.load(f)
37 |         classifier = GraphClassifier(label_map, **vars(base_args))            
38 |         classifier.load_state_dict(torch.load(cmd_args.saved_model + '.model'))
39 |     else:
40 |         classifier = GraphClassifier(label_map, **vars(cmd_args))
41 | 
42 |     if cmd_args.ctx == 'gpu':
43 |         classifier = classifier.cuda()
44 |     if cmd_args.phase == 'test':
45 |         test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=101)
46 |         print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
47 | 
48 |     if cmd_args.phase == 'train':
49 |         optimizer = optim.Adam(classifier.parameters(), lr=cmd_args.learning_rate)
50 | 
51 |         train_idxes = list(range(len(train_glist)))
52 |         best_loss = None
53 |         for epoch in range(cmd_args.num_epochs):
54 |             random.shuffle(train_idxes)
55 |             avg_loss = loop_dataset(train_glist, classifier, train_idxes, optimizer=optimizer, epoch=epoch)
56 |             print('\033[92maverage training of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, avg_loss[0], avg_loss[1]))
57 |             
58 |             test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=epoch)
59 |             print('\033[93maverage test of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, test_loss[0], test_loss[1]))
60 | 
61 |             if best_loss is None or test_loss[0] < best_loss:
62 |                 best_loss = test_loss[0]
63 |                 print('----saving to best model since this is the best valid loss so far.----')
64 |                 torch.save(classifier.state_dict(), cmd_args.save_dir + '/epoch-best.model')
65 |                 save_args(cmd_args.save_dir + '/epoch-best-args.pkl', cmd_args)


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/graph_common.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import sys
  5 | import numpy as np
  6 | import torch
  7 | import networkx as nx
  8 | import random
  9 | from torch.autograd import Variable
 10 | from torch.nn.parameter import Parameter
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | from tqdm import tqdm
 15 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
 16 | from cmd_args import cmd_args
 17 | from graph_embedding import S2VGraph
 18 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__)))
 19 | from data_util import load_pkl
 20 | from copy import deepcopy
 21 | 
 22 | @torch.no_grad()
 23 | def gen_adv_output(data, model, z):
 24 |     z = Variable(z.detach().data, requires_grad=False)
 25 |     model_adv = deepcopy(model)
 26 |     adv_optim = optim.Adam(model_adv.parameters(), lr=cmd_args.lr_inner)
 27 |     def closure(z):
 28 |         adv_optim.zero_grad()
 29 |         z_tmp = model_adv.forward_cl(data)
 30 |         loss_tmp = model_adv.loss_cl(z, z_tmp)
 31 |         loss_tmp.backward()
 32 |         torch.nn.utils.clip_grad_norm_(model_adv.parameters(), cmd_args.clip_norm)
 33 |     closure = torch.enable_grad()(closure)
 34 |     closure(z)
 35 |     state = dict()
 36 |     for i in range(2): 
 37 |         for name, param in model_adv.named_parameters():          
 38 |             if name.split('.')[0] != 'mlp' and name.split('.')[0] != 'projection_head':
 39 |                 if i == 0:
 40 |                     state[name] = torch.zeros_like(param.grad)               
 41 |                 dev = state[name] + cmd_args.lr_inner * param.grad
 42 |                 clip_coef = cmd_args.epison / (dev.norm() + 1e-12)
 43 |                 dev = clip_coef * dev if clip_coef < 1 else dev
 44 |                 param.sub_(state[name]).add_(dev)
 45 |                 state[name] = dev           
 46 |         closure(z)
 47 |     z2 = model_adv.forward_cl(data)
 48 |     return z2
 49 | 
 50 | def loop_dataset(g_list, classifier, sample_idxes, optimizer=None, bsize=cmd_args.batch_size, epoch=0):
 51 |     total_loss = []
 52 |     total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize
 53 |     pbar = tqdm(range(total_iters), unit='batch')
 54 | 
 55 |     n_samples = 0
 56 |     for pos in pbar:
 57 |         selected_idx = sample_idxes[pos * bsize : (pos + 1) * bsize]
 58 |         batch_graph = [g_list[idx] for idx in selected_idx]
 59 |         if epoch <= 150:
 60 |             x1 = classifier.forward_cl(batch_graph)
 61 |             x2 = gen_adv_output(batch_graph, classifier, x1)
 62 |             x2 = Variable(x2.detach().data, requires_grad=False)
 63 |             loss = classifier.loss_cl(x1, x2)
 64 |             acc = torch.zeros(1)
 65 |         else:
 66 |             _, loss, acc = classifier(batch_graph)
 67 |         acc = acc.sum().item() / float(acc.size()[0])
 68 |         if optimizer is not None:
 69 |             optimizer.zero_grad()
 70 |             loss.backward()         
 71 |             optimizer.step()
 72 |         loss = loss.data.cpu().numpy()
 73 |         pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc))
 74 |         total_loss.append( np.array([loss, acc]) * len(selected_idx))
 75 |         n_samples += len(selected_idx)
 76 |     if optimizer is None:
 77 |         assert n_samples == len(sample_idxes)
 78 |     total_loss = np.array(total_loss)
 79 |     avg_loss = np.sum(total_loss, 0) / n_samples
 80 |     return avg_loss
 81 | 
 82 | def load_er_data():
 83 |     frac_train = 0.9
 84 |     pattern = 'nrange-%d-%d-n_graph-%d-p-%.2f' % (cmd_args.min_n, cmd_args.max_n, cmd_args.n_graphs, cmd_args.er_p)
 85 |     num_train = int(frac_train * cmd_args.n_graphs)
 86 |     train_glist = []
 87 |     test_glist = []
 88 |     label_map = {}
 89 |     for i in range(cmd_args.min_c, cmd_args.max_c + 1):
 90 |         cur_list = load_pkl('%s/ncomp-%d-%s.pkl' % (cmd_args.data_folder, i, pattern), cmd_args.n_graphs)
 91 |         assert len(cur_list) == cmd_args.n_graphs
 92 |         train_glist += [S2VGraph(cur_list[j], i) for j in range(num_train)]
 93 |         test_glist += [S2VGraph(cur_list[j], i) for j in range(num_train, len(cur_list))]
 94 |         label_map[i] = i - cmd_args.min_c
 95 |     cmd_args.num_class = len(label_map)
 96 |     cmd_args.feat_dim = 1
 97 |     print('# train:', len(train_glist), ' # test:', len(test_glist))
 98 | 
 99 |     return label_map, train_glist, test_glist
100 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/run_er_components.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | min_n=$1
 4 | max_n=$2
 5 | p=$3
 6 | dropbox=../../dropbox
 7 | data_folder=$dropbox/data/components
 8 | min_c=1
 9 | max_c=3
10 | max_lv=$4
11 | 
12 | 
13 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
14 | output_root=../../dropbox/scratch/results/graph_classification/components/$save_fold
15 | 
16 | if [ ! -e $output_root ];
17 | then
18 |     mkdir -p $output_root
19 | fi
20 | 
21 | python er_components.py \
22 |     -data_folder $data_folder \
23 |     -save_dir $output_root \
24 |     -max_n $max_n \
25 |     -min_n $min_n \
26 |     -max_lv $max_lv \
27 |     -min_c $min_c \
28 |     -max_c $max_c \
29 |     -n_graphs 5000 \
30 |     -er_p $p \
31 |     $@
32 | 


--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/test_er_comp.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | min_n=40
 4 | max_n=50
 5 | p=0.05
 6 | dropbox=../../dropbox/
 7 | data_folder=$dropbox/data/components
 8 | min_c=1
 9 | max_c=3
10 | max_lv=4
11 | 
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
13 | output_root=$HOME/scratch/results/graph_classification/components/$save_fold
14 | saved_model=$output_root/epoch-best
15 | 
16 | if [ ! -e $output_root ];
17 | then
18 |     mkdir -p $output_root
19 | fi
20 | 
21 | python er_components.py \
22 |     -data_folder $data_folder \
23 |     -save_dir $output_root \
24 |     -max_n $max_n \
25 |     -min_n $min_n \
26 |     -max_lv $max_lv \
27 |     -min_c $min_c \
28 |     -max_c $max_c \
29 |     -saved_model $saved_model \
30 |     -n_graphs 5000 \
31 |     -er_p $p \
32 |     $@
33 | 


--------------------------------------------------------------------------------
/semisupervised_TU/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | 
 3 | You can create a conda environment named simgrace with the command:
 4 | ```
 5 | conda env create -f environment.yml
 6 | conda activate simgrace
 7 | ```
 8 | 
 9 | Then, you need to create two directories for pre-trained models and finetuned results to avoid errors:
10 | 
11 | ```
12 | cd ./pre-training
13 | mkdir models
14 | mkdir logs
15 | cd ..
16 | cd ./funetuning
17 | mkdir logs
18 | cd ..
19 | ```
20 | 
21 | ## SimGRACE with Perturbations of Various Magnitudes
22 | 
23 | Take NCI1 as an example:
24 | 
25 | ### Pre-training: ###
26 | 
27 | ```
28 | cd ./pre-training
29 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 0
30 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 1
31 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 2
32 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 3
33 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 4
34 | ```
35 | 
36 | ### Finetuning: ###
37 | 
38 | ```
39 | cd ./funetuning
40 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 0
41 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 1
42 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 2
43 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 3
44 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 4
45 | ```
46 | 
47 | Five suffixes stand for five runs (with mean & std reported), and eta could be tuned among ```0.1, 1.0, 10.0, 100.0```.
48 | ```lr``` in pre-training should be tuned from {0.01, 0.001, 0.0001} and ```model_epoch``` in finetuning (this means the epoch checkpoint loaded from pre-trained model) from {20, 40, 60, 80, 100}.
49 | 
50 | ## Acknowledgements
51 | * https://github.com/Shen-Lab/GraphCL/tree/master/semisupervised_TU
52 | * https://github.com/chentingpc/gfn.
53 | 


--------------------------------------------------------------------------------
/semisupervised_TU/environment.yml:
--------------------------------------------------------------------------------
 1 | name: simgrace
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - _libgcc_mutex=0.1=main
 6 |   - ca-certificates=2020.6.24=0
 7 |   - certifi=2020.6.20=py36_0
 8 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 9 |   - libedit=3.1.20191231=h7b6447c_0
10 |   - libffi=3.3=he6710b0_1
11 |   - libgcc-ng=9.1.0=hdf63c60_0
12 |   - libstdcxx-ng=9.1.0=hdf63c60_0
13 |   - ncurses=6.2=he6710b0_1
14 |   - openssl=1.1.1g=h7b6447c_0
15 |   - pip=20.1.1=py36_1
16 |   - python=3.6.10=h7579374_2
17 |   - readline=8.0=h7b6447c_0
18 |   - setuptools=47.3.1=py36_0
19 |   - sqlite=3.32.3=h62c20be_0
20 |   - tk=8.6.10=hbc83047_0
21 |   - wheel=0.34.2=py36_0
22 |   - xz=5.2.5=h7b6447c_0
23 |   - zlib=1.2.11=h7b6447c_3
24 |   - pip:
25 |     - decorator==4.4.2
26 |     - future==0.18.2
27 |     - isodate==0.6.0
28 |     - joblib==0.16.0
29 |     - networkx==2.4
30 |     - numpy==1.19.0
31 |     - pandas==1.0.5
32 |     - pillow==7.2.0
33 |     - plyfile==0.7.2
34 |     - pyparsing==2.4.7
35 |     - python-dateutil==2.8.1
36 |     - pytz==2020.1
37 |     - rdflib==5.0.0
38 |     - scikit-learn==0.23.1
39 |     - scipy==1.5.0
40 |     - six==1.15.0
41 |     - threadpoolctl==2.1.0
42 |     - torch==1.4.0
43 |     - torch-cluster==1.4.5
44 |     - torch-geometric==1.1.0
45 |     - torch-scatter==1.1.0
46 |     - torch-sparse==0.4.4
47 |     - torchvision==0.5.0
48 | 


--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/datasets.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import re
 3 | 
 4 | import torch
 5 | from torch_geometric.datasets import MNISTSuperpixels
 6 | from torch_geometric.utils import degree
 7 | import torch_geometric.transforms as T
 8 | from feature_expansion import FeatureExpander
 9 | from image_dataset import ImageDataset
10 | from tu_dataset import TUDatasetExt
11 | 
12 | 
13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None):
14 |     if root is None or root == '':
15 |         path = osp.join(osp.expanduser('~'), 'pyG_data', name)
16 |     else:
17 |         path = osp.join(root, name)
18 |     degree = feat_str.find("deg") >= 0
19 |     onehot_maxdeg = re.findall("odeg(\d+)", feat_str)
20 |     onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None
21 |     k = re.findall("an{0,1}k(\d+)", feat_str)
22 |     k = int(k[0]) if k else 0
23 |     groupd = re.findall("groupd(\d+)", feat_str)
24 |     groupd = int(groupd[0]) if groupd else 0
25 |     remove_edges = re.findall("re(\w+)", feat_str)
26 |     remove_edges = remove_edges[0] if remove_edges else 'none'
27 |     edge_noises_add = re.findall("randa([\d\.]+)", feat_str)
28 |     edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0
29 |     edge_noises_delete = re.findall("randd([\d\.]+)", feat_str)
30 |     edge_noises_delete = float(
31 |         edge_noises_delete[0]) if edge_noises_delete else 0
32 |     centrality = feat_str.find("cent") >= 0
33 |     coord = feat_str.find("coord") >= 0
34 | 
35 |     pre_transform = FeatureExpander(
36 |         degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k,
37 |         centrality=centrality, remove_edges=remove_edges,
38 |         edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete,
39 |         group_degree=groupd).transform
40 | 
41 |     if 'MNIST' in name or 'CIFAR' in name:
42 |         if name == 'MNIST_SUPERPIXEL':
43 |             train_dataset = MNISTSuperpixels(path, True,
44 |                 pre_transform=pre_transform, transform=T.Cartesian())
45 |             test_dataset = MNISTSuperpixels(path, False,
46 |                 pre_transform=pre_transform, transform=T.Cartesian())
47 |         else:
48 |             train_dataset = ImageDataset(path, name, True,
49 |                 pre_transform=pre_transform, coord=coord,
50 |                 processed_file_prefix="data_%s" % feat_str)
51 |             test_dataset = ImageDataset(path, name, False,
52 |                 pre_transform=pre_transform, coord=coord,
53 |                 processed_file_prefix="data_%s" % feat_str)
54 |         dataset = (train_dataset, test_dataset)
55 |     else:
56 |         dataset = TUDatasetExt(
57 |             path, name, pre_transform=pre_transform,
58 |             use_node_attr=True, processed_filename="data_%s.pt" % feat_str)
59 | 
60 |         dataset.data.edge_attr = None
61 | 
62 |     return dataset
63 | 


--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/gcn_conv.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Parameter
  3 | from torch_scatter import scatter_add
  4 | from torch_geometric.nn.conv import MessagePassing
  5 | from torch_geometric.utils import remove_self_loops, add_self_loops
  6 | from torch_geometric.nn.inits import glorot, zeros
  7 | 
  8 | 
  9 | class GCNConv(MessagePassing):
 10 |     r"""The graph convolutional operator from the `"Semi-supervised
 11 |     Classfication with Graph Convolutional Networks"
 12 |     <https://arxiv.org/abs/1609.02907>`_ paper
 13 | 
 14 |     .. math::
 15 |         \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
 16 |         \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},
 17 | 
 18 |     where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
 19 |     adjacency matrix with inserted self-loops and
 20 |     :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.
 21 | 
 22 |     Args:
 23 |         in_channels (int): Size of each input sample.
 24 |         out_channels (int): Size of each output sample.
 25 |         improved (bool, optional): If set to :obj:`True`, the layer computes
 26 |             :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
 27 |             (default: :obj:`False`)
 28 |         cached (bool, optional): If set to :obj:`True`, the layer will cache
 29 |             the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
 30 |             \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`.
 31 |             (default: :obj:`False`)
 32 |         bias (bool, optional): If set to :obj:`False`, the layer will not learn
 33 |             an additive bias. (default: :obj:`True`)
 34 |         edge_norm (bool, optional): whether or not to normalize adj matrix.
 35 |             (default: :obj:`True`)
 36 |         gfn (bool, optional): If `True`, only linear transform (1x1 conv) is
 37 |             applied to every nodes. (default: :obj:`False`)
 38 |     """
 39 | 
 40 |     def __init__(self,
 41 |                  in_channels,
 42 |                  out_channels,
 43 |                  improved=False,
 44 |                  cached=False,
 45 |                  bias=True,
 46 |                  edge_norm=True,
 47 |                  gfn=False):
 48 |         super(GCNConv, self).__init__('add')
 49 | 
 50 |         self.in_channels = in_channels
 51 |         self.out_channels = out_channels
 52 |         self.improved = improved
 53 |         self.cached = cached
 54 |         self.cached_result = None
 55 |         self.edge_norm = edge_norm
 56 |         self.gfn = gfn
 57 | 
 58 |         self.weight = Parameter(torch.Tensor(in_channels, out_channels))
 59 | 
 60 |         if bias:
 61 |             self.bias = Parameter(torch.Tensor(out_channels))
 62 |         else:
 63 |             self.register_parameter('bias', None)
 64 | 
 65 |         self.reset_parameters()
 66 | 
 67 |     def reset_parameters(self):
 68 |         glorot(self.weight)
 69 |         zeros(self.bias)
 70 |         self.cached_result = None
 71 | 
 72 |     @staticmethod
 73 |     def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
 74 |         if edge_weight is None:
 75 |             edge_weight = torch.ones((edge_index.size(1), ),
 76 |                                      dtype=dtype,
 77 |                                      device=edge_index.device)
 78 |         edge_weight = edge_weight.view(-1)
 79 |         assert edge_weight.size(0) == edge_index.size(1)
 80 | 
 81 |         edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
 82 |         edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
 83 |         # Add edge_weight for loop edges.
 84 |         loop_weight = torch.full((num_nodes, ),
 85 |                                  1 if not improved else 2,
 86 |                                  dtype=edge_weight.dtype,
 87 |                                  device=edge_weight.device)
 88 |         edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
 89 | 
 90 |         row, col = edge_index
 91 |         deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
 92 |         deg_inv_sqrt = deg.pow(-0.5)
 93 |         deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
 94 | 
 95 |         return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
 96 | 
 97 |     def forward(self, x, edge_index, edge_weight=None):
 98 |         """"""
 99 |         x = torch.matmul(x, self.weight)
100 |         if self.gfn:
101 |             return x
102 | 
103 |         if not self.cached or self.cached_result is None:
104 |             if self.edge_norm:
105 |                 edge_index, norm = GCNConv.norm(
106 |                     edge_index, x.size(0), edge_weight, self.improved, x.dtype)
107 |             else:
108 |                 norm = None
109 |             self.cached_result = edge_index, norm
110 | 
111 |         edge_index, norm = self.cached_result
112 |         return self.propagate(edge_index, x=x, norm=norm)
113 | 
114 |     def message(self, x_j, norm):
115 |         if self.edge_norm:
116 |             return norm.view(-1, 1) * x_j
117 |         else:
118 |             return x_j
119 | 
120 |     def update(self, aggr_out):
121 |         if self.bias is not None:
122 |             aggr_out = aggr_out + self.bias
123 |         return aggr_out
124 | 
125 |     def __repr__(self):
126 |         return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
127 |                                    self.out_channels)
128 | 


--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/image_dataset.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | import torchvision
  6 | from torchvision import datasets, transforms
  7 | from torch_geometric.data import InMemoryDataset, Data
  8 | 
  9 | 
 10 | class ImageDataset(InMemoryDataset):
 11 |     def __init__(self,
 12 |                  root,
 13 |                  name,
 14 |                  train=True,
 15 |                  transform=None,
 16 |                  pre_transform=None,
 17 |                  pre_filter=None,
 18 |                  coord=False,
 19 |                  processed_file_prefix='data'):
 20 |         assert name in ['MNIST', 'CIFAR10'], "Unsupported data name %s" % name
 21 |         self.name = name
 22 |         self.coord = coord
 23 |         self.processed_file_prefix = processed_file_prefix
 24 |         self.traindata = None
 25 |         self.testdata = None
 26 |         super(ImageDataset, self).__init__(
 27 |             root, transform, pre_transform, pre_filter)
 28 |         path = self.processed_paths[0] if train else self.processed_paths[1]
 29 |         self.data, self.slices = torch.load(path)
 30 | 
 31 |     @property
 32 |     def raw_file_names(self):
 33 |         if self.name == 'MNIST':
 34 |             return ['t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte',
 35 |                     'train-images-idx3-ubyte', 'train-labels-idx1-ubyte']
 36 |         elif self.name == 'CIFAR10':
 37 |             return ['data_batch_1', 'data_batch_2', 'data_batch_3',
 38 |                     'data_batch_4', 'data_batch_5', 'test_batch']
 39 | 
 40 |     @property
 41 |     def processed_file_names(self):
 42 |         return ['%s_training.pt' % self.processed_file_prefix,
 43 |                 '%s_test.pt' % self.processed_file_prefix]
 44 | 
 45 |     def download(self):
 46 |         transform = transforms.ToTensor()
 47 |         if self.name == 'CIFAR10':
 48 |             data_train = datasets.CIFAR10(root=self.raw_dir,
 49 |                                           transform=transform,
 50 |                                           train=True,
 51 |                                           download=True)
 52 |             data_test = datasets.CIFAR10(root=self.raw_dir,
 53 |                                          transform=transform,
 54 |                                          train=False,
 55 |                                          download=True)
 56 |         elif self.name == 'MNIST':
 57 |             data_train = datasets.MNIST(root=self.raw_dir,
 58 |                                         transform=transform,
 59 |                                         train=True,
 60 |                                         download=True)
 61 |             data_test = datasets.MNIST(root=self.raw_dir,
 62 |                                        transform=transform,
 63 |                                        train=False,
 64 |                                        download=True)
 65 |         else:
 66 |             raise ValueError("Unknown data name {}".format(self.name))
 67 |         self.traindata = data_train
 68 |         self.testdata = data_test
 69 | 
 70 |     def process(self):
 71 |         trainLoader = torch.utils.data.DataLoader(self.traindata)
 72 |         testLoader = torch.utils.data.DataLoader(self.testdata)
 73 |         if self.name == 'MNIST':
 74 |             num_row, num_col = 28, 28
 75 |         elif self.name == 'CIFAR10':
 76 |             num_row, num_col = 32, 32
 77 |         else:
 78 |             raise ValueError('dataset error')
 79 |         num_edges = (3 * num_row - 2) * (3 * num_col - 2)
 80 |         edge_index_array = np.zeros(shape=[2, num_edges])
 81 |         edge_attr_array = np.zeros(shape=[1, num_edges])
 82 |         curt = 0
 83 |         for j in range(num_row):
 84 |             for k in range(num_col):
 85 |                 for m in range(max(j-1, 0), min(j+1, num_row-1)+1):
 86 |                     for n in range(max(k-1, 0), min(k+1, num_col-1)+1):
 87 |                         edge_index_array[0][curt] = j * num_row + k
 88 |                         edge_index_array[1][curt] = m * num_row + n
 89 |                         edge_attr_array[0][curt] = self.weight(j, k, m, n)
 90 |                         curt += 1
 91 |         edge_index = torch.from_numpy(edge_index_array).to(torch.int64)
 92 |         edge_attr = torch.from_numpy(edge_attr_array).to(torch.float)
 93 | 
 94 |         def transform_data(data_loader, edge_index, edge_attr):
 95 |             data_list = []
 96 |             channel, num_row, num_col = data_loader.dataset[0][0].size()
 97 |             if self.coord:
 98 |                 x = torch.arange(num_col, dtype=torch.float)
 99 |                 x = x.view((1, -1)).repeat(num_row, 1).view((-1, 1)) - x.mean()
100 |                 y = torch.arange(num_row, dtype=torch.float)
101 |                 y = y.view((-1, 1)).repeat(1, num_col).view((-1, 1)) - y.mean()
102 |                 coord = torch.cat([x, y], -1)
103 | 
104 |             for image, label in iter(data_loader):
105 |                 x = image[0].permute([1,2,0]).view(
106 |                     num_row * num_col, image[0].size()[0])
107 |                 if self.coord:
108 |                     x = torch.cat([x, coord], -1)
109 |                 data = Data(
110 |                     edge_index=edge_index, edge_attr=edge_attr, x=x, y=label)
111 |                 if self.pre_filter is not None:
112 |                     data = self.pre_filter(data)
113 |                 if self.pre_transform is not None:
114 |                     data = self.pre_transform(data)
115 |                 data_list.append(data)
116 |             return data_list
117 | 
118 |         train_data_list = transform_data(trainLoader, edge_index, edge_attr)
119 |         torch.save(self.collate(train_data_list), self.processed_paths[0])
120 | 
121 |         test_data_list = transform_data(testLoader, edge_index, edge_attr)
122 |         torch.save(self.collate(test_data_list), self.processed_paths[1])
123 | 
124 |     @staticmethod
125 |     def weight(pos_x, pos_y, pos_x_new, pos_y_new):
126 |         dist = (pos_x - pos_x_new) ** 2 + (pos_y - pos_y_new) ** 2
127 |         return math.exp(-dist)
128 | 
129 |     def __repr__(self):
130 |         return '{}({})'.format(self.name, len(self))
131 | 
132 | 


--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/tu_dataset.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.datasets import TUDataset
 2 | import torch
 3 | from itertools import repeat, product
 4 | from copy import deepcopy
 5 | 
 6 | 
 7 | 
 8 | class TUDatasetExt(TUDataset):
 9 |     r"""A variety of graph kernel benchmark datasets, *.e.g.* "IMDB-BINARY",
10 |     "REDDIT-BINARY" or "PROTEINS", collected from the `TU Dortmund University
11 |     <http://graphkernels.cs.tu-dortmund.de>`_.
12 | 
13 |     Args:
14 |         root (string): Root directory where the dataset should be saved.
15 |         name (string): The `name <http://graphkernels.cs.tu-dortmund.de>`_ of
16 |             the dataset.
17 |         transform (callable, optional): A function/transform that takes in an
18 |             :obj:`torch_geometric.data.Data` object and returns a transformed
19 |             version. The data object will be transformed before every access.
20 |             (default: :obj:`None`)
21 |         pre_transform (callable, optional): A function/transform that takes in
22 |             an :obj:`torch_geometric.data.Data` object and returns a
23 |             transformed version. The data object will be transformed before
24 |             being saved to disk. (default: :obj:`None`)
25 |         pre_filter (callable, optional): A function that takes in an
26 |             :obj:`torch_geometric.data.Data` object and returns a boolean
27 |             value, indicating whether the data object should be included in the
28 |             final dataset. (default: :obj:`None`)
29 |         use_node_attr (bool, optional): If :obj:`True`, the dataset will
30 |             contain additional continuous node features (if present).
31 |             (default: :obj:`False`)
32 |     """
33 | 
34 |     url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/' \
35 |           'graphkerneldatasets'
36 | 
37 |     def __init__(self,
38 |                  root,
39 |                  name,
40 |                  transform=None,
41 |                  pre_transform=None,
42 |                  pre_filter=None,
43 |                  use_node_attr=False,
44 |                  processed_filename='data.pt'):
45 |         self.processed_filename = processed_filename
46 |         super(TUDatasetExt, self).__init__(root, name, transform, pre_transform,
47 |                                            pre_filter, use_node_attr)
48 | 
49 |     @property
50 |     def processed_file_names(self):
51 |         return self.processed_filename
52 | 
53 | 


--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | def print_weights(model):
 4 |     for name, param in model.named_parameters():
 5 |         if param.requires_grad:
 6 |             print(name, param.shape)
 7 |     sys.stdout.flush()
 8 | 
 9 | 
10 | def logger(info):
11 |     fold, epoch = info['fold'], info['epoch']
12 |     if epoch == 1 or epoch % 10 == 0:
13 |         train_acc, test_acc = info['train_acc'], info['test_acc']
14 |         print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format(
15 |             fold, epoch, train_acc, test_acc))
16 |     sys.stdout.flush()
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/datasets.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import re
 3 | 
 4 | import torch
 5 | from torch_geometric.datasets import MNISTSuperpixels
 6 | from torch_geometric.utils import degree
 7 | import torch_geometric.transforms as T
 8 | from feature_expansion import FeatureExpander
 9 | from image_dataset import ImageDataset
10 | from tu_dataset import TUDatasetExt
11 | 
12 | 
13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None, aug=None, aug_ratio=None):
14 |     if root is None or root == '':
15 |         path = osp.join(osp.expanduser('~'), 'pyG_data', name)
16 |     else:
17 |         path = osp.join(root, name)
18 |     degree = feat_str.find("deg") >= 0
19 |     onehot_maxdeg = re.findall("odeg(\d+)", feat_str)
20 |     onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None
21 |     k = re.findall("an{0,1}k(\d+)", feat_str)
22 |     k = int(k[0]) if k else 0
23 |     groupd = re.findall("groupd(\d+)", feat_str)
24 |     groupd = int(groupd[0]) if groupd else 0
25 |     remove_edges = re.findall("re(\w+)", feat_str)
26 |     remove_edges = remove_edges[0] if remove_edges else 'none'
27 |     edge_noises_add = re.findall("randa([\d\.]+)", feat_str)
28 |     edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0
29 |     edge_noises_delete = re.findall("randd([\d\.]+)", feat_str)
30 |     edge_noises_delete = float(
31 |         edge_noises_delete[0]) if edge_noises_delete else 0
32 |     centrality = feat_str.find("cent") >= 0
33 |     coord = feat_str.find("coord") >= 0
34 | 
35 |     pre_transform = FeatureExpander(
36 |         degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k,
37 |         centrality=centrality, remove_edges=remove_edges,
38 |         edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete,
39 |         group_degree=groupd).transform
40 | 
41 |     print(aug, aug_ratio)
42 |     if 'MNIST' in name or 'CIFAR' in name:
43 |         if name == 'MNIST_SUPERPIXEL':
44 |             train_dataset = MNISTSuperpixels(path, True,
45 |                 pre_transform=pre_transform, transform=T.Cartesian())
46 |             test_dataset = MNISTSuperpixels(path, False,
47 |                 pre_transform=pre_transform, transform=T.Cartesian())
48 |         else:
49 |             train_dataset = ImageDataset(path, name, True,
50 |                 pre_transform=pre_transform, coord=coord,
51 |                 processed_file_prefix="data_%s" % feat_str)
52 |             test_dataset = ImageDataset(path, name, False,
53 |                 pre_transform=pre_transform, coord=coord,
54 |                 processed_file_prefix="data_%s" % feat_str)
55 |         dataset = (train_dataset, test_dataset)
56 |     else:
57 |         dataset = TUDatasetExt(
58 |             path, name, pre_transform=pre_transform,
59 |             use_node_attr=True, processed_filename="data_%s.pt" % feat_str, aug=aug, aug_ratio=aug_ratio)
60 | 
61 |         dataset.data.edge_attr = None
62 | 
63 |     return dataset
64 | 


--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/gcn_conv.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Parameter
  3 | from torch_scatter import scatter_add
  4 | from torch_geometric.nn.conv import MessagePassing
  5 | from torch_geometric.utils import remove_self_loops, add_self_loops
  6 | from torch_geometric.nn.inits import glorot, zeros
  7 | 
  8 | 
  9 | class GCNConv(MessagePassing):
 10 |     r"""The graph convolutional operator from the `"Semi-supervised
 11 |     Classfication with Graph Convolutional Networks"
 12 |     <https://arxiv.org/abs/1609.02907>`_ paper
 13 | 
 14 |     .. math::
 15 |         \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
 16 |         \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},
 17 | 
 18 |     where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
 19 |     adjacency matrix with inserted self-loops and
 20 |     :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.
 21 | 
 22 |     Args:
 23 |         in_channels (int): Size of each input sample.
 24 |         out_channels (int): Size of each output sample.
 25 |         improved (bool, optional): If set to :obj:`True`, the layer computes
 26 |             :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
 27 |             (default: :obj:`False`)
 28 |         cached (bool, optional): If set to :obj:`True`, the layer will cache
 29 |             the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
 30 |             \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`.
 31 |             (default: :obj:`False`)
 32 |         bias (bool, optional): If set to :obj:`False`, the layer will not learn
 33 |             an additive bias. (default: :obj:`True`)
 34 |         edge_norm (bool, optional): whether or not to normalize adj matrix.
 35 |             (default: :obj:`True`)
 36 |         gfn (bool, optional): If `True`, only linear transform (1x1 conv) is
 37 |             applied to every nodes. (default: :obj:`False`)
 38 |     """
 39 | 
 40 |     def __init__(self,
 41 |                  in_channels,
 42 |                  out_channels,
 43 |                  improved=False,
 44 |                  cached=False,
 45 |                  bias=True,
 46 |                  edge_norm=True,
 47 |                  gfn=False):
 48 |         super(GCNConv, self).__init__('add')
 49 | 
 50 |         self.in_channels = in_channels
 51 |         self.out_channels = out_channels
 52 |         self.improved = improved
 53 |         self.cached = cached
 54 |         self.cached_result = None
 55 |         self.edge_norm = edge_norm
 56 |         self.gfn = gfn
 57 | 
 58 |         self.weight = Parameter(torch.Tensor(in_channels, out_channels))
 59 | 
 60 |         if bias:
 61 |             self.bias = Parameter(torch.Tensor(out_channels))
 62 |         else:
 63 |             self.register_parameter('bias', None)
 64 | 
 65 |         self.reset_parameters()
 66 | 
 67 |     def reset_parameters(self):
 68 |         glorot(self.weight)
 69 |         zeros(self.bias)
 70 |         self.cached_result = None
 71 | 
 72 |     @staticmethod
 73 |     def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
 74 |         if edge_weight is None:
 75 |             edge_weight = torch.ones((edge_index.size(1), ),
 76 |                                      dtype=dtype,
 77 |                                      device=edge_index.device)
 78 |         edge_weight = edge_weight.view(-1)
 79 |         assert edge_weight.size(0) == edge_index.size(1)
 80 | 
 81 |         edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
 82 |         edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
 83 |         # Add edge_weight for loop edges.
 84 |         loop_weight = torch.full((num_nodes, ),
 85 |                                  1 if not improved else 2,
 86 |                                  dtype=edge_weight.dtype,
 87 |                                  device=edge_weight.device)
 88 |         edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
 89 | 
 90 |         row, col = edge_index
 91 |         deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
 92 |         deg_inv_sqrt = deg.pow(-0.5)
 93 |         deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
 94 | 
 95 |         return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
 96 | 
 97 |     def forward(self, x, edge_index, edge_weight=None):
 98 |         """"""
 99 |         x = torch.matmul(x, self.weight)
100 |         if self.gfn:
101 |             return x
102 | 
103 |         if not self.cached or self.cached_result is None:
104 |             if self.edge_norm:
105 |                 edge_index, norm = GCNConv.norm(
106 |                     edge_index, x.size(0), edge_weight, self.improved, x.dtype)
107 |             else:
108 |                 norm = None
109 |             self.cached_result = edge_index, norm
110 | 
111 |         edge_index, norm = self.cached_result
112 |         return self.propagate(edge_index, x=x, norm=norm)
113 | 
114 |     def message(self, x_j, norm):
115 |         if self.edge_norm:
116 |             return norm.view(-1, 1) * x_j
117 |         else:
118 |             return x_j
119 | 
120 |     def update(self, aggr_out):
121 |         if self.bias is not None:
122 |             aggr_out = aggr_out + self.bias
123 |         return aggr_out
124 | 
125 |     def __repr__(self):
126 |         return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
127 |                                    self.out_channels)
128 | 


--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/utils.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | def print_weights(model):
 4 |     for name, param in model.named_parameters():
 5 |         if param.requires_grad:
 6 |             print(name, param.shape)
 7 |     sys.stdout.flush()
 8 | 
 9 | 
10 | def logger(info):
11 |     fold, epoch = info['fold'], info['epoch']
12 |     if epoch == 1 or epoch % 10 == 0:
13 |         train_acc, test_acc = info['train_acc'], info['test_acc']
14 |         print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format(
15 |             fold, epoch, train_acc, test_acc))
16 |     sys.stdout.flush()
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/simgrace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/simgrace.png


--------------------------------------------------------------------------------
/transfer_learning/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies & Dataset
 2 | 
 3 | Please refer to https://github.com/snap-stanford/pretrain-gnns#installation for environment setup and https://github.com/snap-stanford/pretrain-gnns#dataset-download to download dataset.
 4 | 
 5 | ## Training & Evaluation
 6 | ### Step 1: Pre-training: ###
 7 | ```
 8 | cd ./bio
 9 | python pretrain_simgrace.py --eta 0.1
10 | cd ./chem
11 | python pretrain_simgrace.py --eta 0.1
12 | ```
13 | ### Step 2: Finetuning: ###
14 | ```
15 | cd ./bio
16 | ./finetune.sh
17 | cd ./chem
18 | ./run.sh
19 | ```
20 | Results will be recorded in ```result.log```.
21 | 
22 | 
23 | ## Acknowledgements
24 | 
25 | * https://github.com/snap-stanford/pretrain-gnns.
26 | * https://github.com/Shen-Lab/GraphCL/tree/master/transferLearning_MoleculeNet_PPI.
27 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from torch.utils.data.dataloader import default_collate
 3 | 
 4 | from batch import BatchFinetune, BatchMasking, BatchAE, BatchSubstructContext
 5 | 
 6 | class DataLoaderFinetune(torch.utils.data.DataLoader):
 7 |     r"""Data loader which merges data objects from a
 8 |     :class:`torch_geometric.data.dataset` to a mini-batch.
 9 |     Args:
10 |         dataset (Dataset): The dataset from which to load the data.
11 |         batch_size (int, optional): How may samples per batch to load.
12 |             (default: :obj:`1`)
13 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
14 |             reshuffled at every epoch (default: :obj:`True`)
15 |     """
16 | 
17 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
18 |         super(DataLoaderFinetune, self).__init__(
19 |             dataset,
20 |             batch_size,
21 |             shuffle,
22 |             collate_fn=lambda data_list: BatchFinetune.from_data_list(data_list),
23 |             **kwargs)
24 | 
25 | class DataLoaderMasking(torch.utils.data.DataLoader):
26 |     r"""Data loader which merges data objects from a
27 |     :class:`torch_geometric.data.dataset` to a mini-batch.
28 |     Args:
29 |         dataset (Dataset): The dataset from which to load the data.
30 |         batch_size (int, optional): How may samples per batch to load.
31 |             (default: :obj:`1`)
32 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
33 |             reshuffled at every epoch (default: :obj:`True`)
34 |     """
35 | 
36 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
37 |         super(DataLoaderMasking, self).__init__(
38 |             dataset,
39 |             batch_size,
40 |             shuffle,
41 |             collate_fn=lambda data_list: BatchMasking.from_data_list(data_list),
42 |             **kwargs)
43 | 
44 | 
45 | class DataLoaderAE(torch.utils.data.DataLoader):
46 |     r"""Data loader which merges data objects from a
47 |     :class:`torch_geometric.data.dataset` to a mini-batch.
48 |     Args:
49 |         dataset (Dataset): The dataset from which to load the data.
50 |         batch_size (int, optional): How may samples per batch to load.
51 |             (default: :obj:`1`)
52 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
53 |             reshuffled at every epoch (default: :obj:`True`)
54 |     """
55 | 
56 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
57 |         super(DataLoaderAE, self).__init__(
58 |             dataset,
59 |             batch_size,
60 |             shuffle,
61 |             collate_fn=lambda data_list: BatchAE.from_data_list(data_list),
62 |             **kwargs)
63 | 
64 | 
65 | class DataLoaderSubstructContext(torch.utils.data.DataLoader):
66 |     r"""Data loader which merges data objects from a
67 |     :class:`torch_geometric.data.dataset` to a mini-batch.
68 |     Args:
69 |         dataset (Dataset): The dataset from which to load the data.
70 |         batch_size (int, optional): How may samples per batch to load.
71 |             (default: :obj:`1`)
72 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
73 |             reshuffled at every epoch (default: :obj:`True`)
74 |     """
75 | 
76 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
77 |         super(DataLoaderSubstructContext, self).__init__(
78 |             dataset,
79 |             batch_size,
80 |             shuffle,
81 |             collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list),
82 |             **kwargs)
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/finetune.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | split=species
3 | ### for GIN
4 | for runseed in 0 1 2 3 4 5 6 7 8 9
5 | do
6 | python finetune.py --model_file models_simgrace/simgcl_80.pth --split $split --epochs 10 --device 0 --runseed $runseed --gnn_type gin --lr 1e-3
7 | done
8 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/finetune_tune.sh:
--------------------------------------------------------------------------------
 1 | #### GIN fine-tuning
 2 | runseed=$1
 3 | device=$2
 4 | split=species
 5 | 
 6 | ### for GIN
 7 | for unsup in contextpred infomax edgepred masking
 8 | do
 9 | model_file=${unsup}
10 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin
11 | 
12 | model_file=supervised_${unsup}
13 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin
14 | done
15 | 
16 | python finetune.py --split $split --filename gin_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type gin
17 | python finetune.py --split $split --model_file model_gin/supervised.pth --filename gin_supervised --epochs 50 --device $device --runseed $runseed --gnn_type gin
18 | 
19 | 
20 | ### for other GNNs
21 | for gnn_type in gcn gat graphsage
22 | do
23 | python finetune.py --split $split --filename ${gnn_type}_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type
24 | 
25 | model_file=${gnn_type}_supervised_masking
26 | python finetune.py --model_file model_architecture/${model_file}.pth --split $split --filename ${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type
27 | 
28 | done


--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_100.pth


--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_20.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_20.pth


--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_40.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_40.pth


--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_60.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_60.pth


--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_80.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_80.pth


--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_deepgraphinfomax.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import BioDataset
  4 | from torch_geometric.data import DataLoader
  5 | from torch_geometric.nn.inits import uniform
  6 | from torch_geometric.nn import global_mean_pool
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | 
 13 | from tqdm import tqdm
 14 | import numpy as np
 15 | 
 16 | from model import GNN
 17 | from sklearn.metrics import roc_auc_score
 18 | 
 19 | import pandas as pd
 20 | 
 21 | 
 22 | def cycle_index(num, shift):
 23 |     arr = torch.arange(num) + shift
 24 |     arr[-shift:] = torch.arange(shift)
 25 |     return arr
 26 | 
 27 | class Discriminator(nn.Module):
 28 |     def __init__(self, hidden_dim):
 29 |         super(Discriminator, self).__init__()
 30 |         self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
 31 |         self.reset_parameters()
 32 | 
 33 |     def reset_parameters(self):
 34 |         size = self.weight.size(0)
 35 |         uniform(size, self.weight)
 36 | 
 37 |     def forward(self, x, summary):
 38 |         h = torch.matmul(summary, self.weight)
 39 |         return torch.sum(x*h, dim = 1)
 40 | 
 41 | class Infomax(nn.Module):
 42 |     def __init__(self, gnn, discriminator):
 43 |         super(Infomax, self).__init__()
 44 |         self.gnn = gnn
 45 |         self.discriminator = discriminator
 46 |         self.loss = nn.BCEWithLogitsLoss()
 47 |         self.pool = global_mean_pool
 48 | 
 49 | 
 50 | def train(args, model, device, loader, optimizer):
 51 |     model.train()
 52 | 
 53 |     train_acc_accum = 0
 54 |     train_loss_accum = 0
 55 | 
 56 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 57 |         batch = batch.to(device)
 58 |         node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr)
 59 |         summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch))
 60 | 
 61 |         positive_expanded_summary_emb = summary_emb[batch.batch]
 62 | 
 63 |         shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)]
 64 |         negative_expanded_summary_emb = shifted_summary_emb[batch.batch]
 65 | 
 66 |         positive_score = model.discriminator(node_emb, positive_expanded_summary_emb)
 67 |         negative_score = model.discriminator(node_emb, negative_expanded_summary_emb)      
 68 | 
 69 |         optimizer.zero_grad()
 70 |         loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score))
 71 |         loss.backward()
 72 | 
 73 |         optimizer.step()
 74 | 
 75 |         train_loss_accum += float(loss.detach().cpu().item())
 76 |         acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
 77 |         train_acc_accum += float(acc.detach().cpu().item())
 78 | 
 79 |     return train_acc_accum/(step+1), train_loss_accum/(step+1)
 80 | 
 81 | 
 82 | def main():
 83 |     # Training settings
 84 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 85 |     parser.add_argument('--device', type=int, default=0,
 86 |                         help='which gpu to use if any (default: 0)')
 87 |     parser.add_argument('--batch_size', type=int, default=256,
 88 |                         help='input batch size for training (default: 256)')
 89 |     parser.add_argument('--epochs', type=int, default=100,
 90 |                         help='number of epochs to train (default: 100)')
 91 |     parser.add_argument('--lr', type=float, default=0.001,
 92 |                         help='learning rate (default: 0.001)')
 93 |     parser.add_argument('--decay', type=float, default=0,
 94 |                         help='weight decay (default: 0)')
 95 |     parser.add_argument('--num_layer', type=int, default=5,
 96 |                         help='number of GNN message passing layers (default: 5).')
 97 |     parser.add_argument('--emb_dim', type=int, default=300,
 98 |                         help='embedding dimensions (default: 300)')
 99 |     parser.add_argument('--dropout_ratio', type=float, default=0,
100 |                         help='dropout ratio (default: 0)')
101 |     parser.add_argument('--JK', type=str, default="last",
102 |                         help='how the node features across layers are combined. last, sum, max or concat')
103 |     parser.add_argument('--gnn_type', type=str, default="gin")
104 |     parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model')
105 |     parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
106 |     parser.add_argument('--num_workers', type=int, default = 4, help='number of workers for dataset loading')
107 |     args = parser.parse_args()
108 | 
109 | 
110 |     torch.manual_seed(0)
111 |     np.random.seed(0)
112 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
113 |     if torch.cuda.is_available():
114 |         torch.cuda.manual_seed_all(0)
115 | 
116 |     #set up dataset
117 |     root_unsupervised = 'dataset/unsupervised'
118 |     dataset = BioDataset(root_unsupervised, data_type='unsupervised')
119 | 
120 |     print(dataset)
121 | 
122 |     loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
123 | 
124 |     #set up model
125 |     gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
126 | 
127 |     discriminator = Discriminator(args.emb_dim)
128 | 
129 |     model = Infomax(gnn, discriminator)
130 |     
131 |     model.to(device)
132 | 
133 |     #set up optimizer
134 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
135 |     print(optimizer)
136 | 
137 | 
138 |     for epoch in range(1, args.epochs+1):
139 |         print("====epoch " + str(epoch))
140 |     
141 |         train_acc, train_loss = train(args, model, device, loader, optimizer)
142 | 
143 |         print(train_acc)
144 |         print(train_loss)
145 | 
146 | 
147 |     if not args.model_file == "":
148 |         torch.save(model.gnn.state_dict(), args.model_file + ".pth")
149 | 
150 | if __name__ == "__main__":
151 |     main()
152 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_edgepred.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import BioDataset
  4 | from dataloader import DataLoaderAE
  5 | from util import NegativeEdge
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | from tqdm import tqdm
 13 | import numpy as np
 14 | 
 15 | from model import GNN
 16 | import pandas as pd
 17 | 
 18 | criterion = nn.BCEWithLogitsLoss()
 19 | 
 20 | def train(args, model, device, loader, optimizer):
 21 |     model.train()
 22 | 
 23 |     train_acc_accum = 0
 24 |     train_loss_accum = 0
 25 | 
 26 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 27 |         batch = batch.to(device)
 28 |         node_emb = model(batch.x, batch.edge_index, batch.edge_attr)
 29 | 
 30 |         positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1)
 31 |         negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1)
 32 | 
 33 |         optimizer.zero_grad()
 34 |         loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score))
 35 |         loss.backward()
 36 |         optimizer.step()
 37 | 
 38 |         train_loss_accum += float(loss.detach().cpu().item())
 39 |         acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
 40 |         train_acc_accum += float(acc.detach().cpu().item())
 41 | 
 42 |     return train_acc_accum/(step+1), train_loss_accum/(step + 1)
 43 | 
 44 | 
 45 | def main():
 46 |     # Training settings
 47 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 48 |     parser.add_argument('--device', type=int, default=0,
 49 |                         help='which gpu to use if any (default: 0)')
 50 |     parser.add_argument('--batch_size', type=int, default=256,
 51 |                         help='input batch size for training (default: 256)')
 52 |     parser.add_argument('--epochs', type=int, default=100,
 53 |                         help='number of epochs to train (default: 100)')
 54 |     parser.add_argument('--lr', type=float, default=0.001,
 55 |                         help='learning rate (default: 0.001)')
 56 |     parser.add_argument('--decay', type=float, default=0,
 57 |                         help='weight decay (default: 0)')
 58 |     parser.add_argument('--num_layer', type=int, default=5,
 59 |                         help='number of GNN message passing layers (default: 5).')
 60 |     parser.add_argument('--emb_dim', type=int, default=300,
 61 |                         help='embedding dimensions (default: 300)')
 62 |     parser.add_argument('--dropout_ratio', type=float, default=0,
 63 |                         help='dropout ratio (default: 0)')
 64 |     parser.add_argument('--JK', type=str, default="last",
 65 |                         help='how the node features across layers are combined. last, sum, max or concat')
 66 |     parser.add_argument('--gnn_type', type=str, default="gin")
 67 |     parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model')
 68 |     parser.add_argument('--num_workers', type=int, default = 12, help='number of workers for dataset loading')
 69 |     args = parser.parse_args()
 70 | 
 71 | 
 72 |     torch.manual_seed(0)
 73 |     np.random.seed(0)
 74 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
 75 |     if torch.cuda.is_available():
 76 |         torch.cuda.manual_seed_all(0)
 77 | 
 78 |     #set up dataset
 79 |     root_unsupervised = 'dataset/unsupervised'
 80 |     dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = NegativeEdge())
 81 |     dataset.data.to(device)
 82 | 
 83 |     print(dataset)
 84 | 
 85 |     loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
 86 | 
 87 |     #set up model
 88 |     model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device)
 89 |     
 90 |     model.to(device)
 91 | 
 92 |     #set up optimizer
 93 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
 94 |     #optimizer = optim.Adam(model.graph_pred_linear.parameters(), lr=args.lr, weight_decay=args.decay)    
 95 |     print(optimizer)
 96 | 
 97 | 
 98 |     for epoch in range(1, args.epochs+1):
 99 |         print("====epoch " + str(epoch))
100 |     
101 |         train_acc, train_loss = train(args, model, device, loader, optimizer)
102 | 
103 |         print(train_acc)
104 |         print(train_loss)
105 | 
106 |     if not args.model_file == "":
107 |         torch.save(model.state_dict(), args.model_file + ".pth")
108 | 
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()


--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_masking.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import BioDataset
  4 | from dataloader import DataLoaderMasking 
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import torch.optim as optim
 10 | 
 11 | from tqdm import tqdm
 12 | import numpy as np
 13 | 
 14 | from model import GNN, GNN_graphpred
 15 | 
 16 | import pandas as pd
 17 | 
 18 | from util import MaskEdge
 19 | 
 20 | from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
 21 | 
 22 | #criterion = nn.BCEWithLogitsLoss()
 23 | criterion = nn.CrossEntropyLoss()
 24 | 
 25 | def compute_accuracy(pred, target):
 26 |     #return float(torch.sum((pred.detach() > 0) == target.to(torch.uint8)).cpu().item())/(pred.shape[0]*pred.shape[1])
 27 |     return float(torch.sum(torch.max(pred.detach(), dim = 1)[1] == target).cpu().item())/len(pred)
 28 | 
 29 | def train(args, model_list, loader, optimizer_list, device):
 30 |     model, linear_pred_edges = model_list
 31 |     optimizer_model, optimizer_linear_pred_edges = optimizer_list
 32 | 
 33 |     model.train()
 34 |     linear_pred_edges.train()
 35 | 
 36 |     loss_accum = 0
 37 |     acc_accum = 0
 38 | 
 39 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 40 |         batch = batch.to(device)
 41 | 
 42 |         node_rep = model(batch.x, batch.edge_index, batch.edge_attr)
 43 | 
 44 |         ### predict the edge types.
 45 |         masked_edge_index = batch.edge_index[:, batch.masked_edge_idx]
 46 |         edge_rep = node_rep[masked_edge_index[0]] + node_rep[masked_edge_index[1]]
 47 |         pred_edge = linear_pred_edges(edge_rep)
 48 | 
 49 |         #converting the binary classification to multiclass classification
 50 |         edge_label = torch.argmax(batch.mask_edge_label, dim = 1)
 51 | 
 52 |         acc_edge = compute_accuracy(pred_edge, edge_label)
 53 |         acc_accum += acc_edge
 54 | 
 55 |         optimizer_model.zero_grad()
 56 |         optimizer_linear_pred_edges.zero_grad()
 57 | 
 58 |         loss = criterion(pred_edge, edge_label)
 59 |         loss.backward()
 60 | 
 61 |         optimizer_model.step()
 62 |         optimizer_linear_pred_edges.step()
 63 | 
 64 |         loss_accum += float(loss.cpu().item())
 65 | 
 66 |     return loss_accum/(step + 1), acc_accum/(step + 1)
 67 | 
 68 | def main():
 69 |     # Training settings
 70 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 71 |     parser.add_argument('--device', type=int, default=0,
 72 |                         help='which gpu to use if any (default: 0)')
 73 |     parser.add_argument('--batch_size', type=int, default=256,
 74 |                         help='input batch size for training (default: 256)')
 75 |     parser.add_argument('--epochs', type=int, default=100,
 76 |                         help='number of epochs to train (default: 100)')
 77 |     parser.add_argument('--lr', type=float, default=0.001,
 78 |                         help='learning rate (default: 0.001)')
 79 |     parser.add_argument('--decay', type=float, default=0,
 80 |                         help='weight decay (default: 0)')
 81 |     parser.add_argument('--num_layer', type=int, default=5,
 82 |                         help='number of GNN message passing layers (default: 5).')
 83 |     parser.add_argument('--emb_dim', type=int, default=300,
 84 |                         help='embedding dimensions (default: 300)')
 85 |     parser.add_argument('--dropout_ratio', type=float, default=0,
 86 |                         help='dropout ratio (default: 0)')
 87 |     parser.add_argument('--mask_rate', type=float, default=0.15,
 88 |                         help='dropout ratio (default: 0.15)')
 89 |     parser.add_argument('--JK', type=str, default="last",
 90 |                         help='how the node features are combined across layers. last, sum, max or concat')
 91 |     parser.add_argument('--gnn_type', type=str, default="gin")
 92 |     parser.add_argument('--model_file', type=str, default = '', help='filename to output the model')
 93 |     parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
 94 |     parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
 95 |     args = parser.parse_args()
 96 | 
 97 |     torch.manual_seed(0)
 98 |     np.random.seed(0)
 99 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
100 |     if torch.cuda.is_available():
101 |         torch.cuda.manual_seed_all(0)
102 | 
103 |     print("num layer: %d mask rate: %f" %(args.num_layer, args.mask_rate))
104 | 
105 |     #set up dataset
106 |     root_unsupervised = 'dataset/unsupervised'
107 |     dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = MaskEdge(mask_rate = args.mask_rate))
108 | 
109 |     print(dataset)
110 | 
111 |     loader = DataLoaderMasking(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
112 | 
113 | 
114 |     #set up models, one for pre-training and one for context embeddings
115 |     model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device)
116 |     #Linear layer for classifying different edge types
117 |     linear_pred_edges = torch.nn.Linear(args.emb_dim, 7).to(device)
118 | 
119 |     model_list = [model, linear_pred_edges]
120 | 
121 |     #set up optimizers
122 |     optimizer_model = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
123 |     optimizer_linear_pred_edges = optim.Adam(linear_pred_edges.parameters(), lr=args.lr, weight_decay=args.decay)
124 | 
125 |     optimizer_list = [optimizer_model, optimizer_linear_pred_edges]
126 | 
127 |     for epoch in range(1, args.epochs+1):
128 |         print("====epoch " + str(epoch))
129 |         
130 |         train_loss, train_acc = train(args, model_list, loader, optimizer_list, device)
131 |         print(train_loss, train_acc)
132 | 
133 |     if not args.model_file == "":
134 |         torch.save(model.state_dict(), args.model_file + ".pth")
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     main()
139 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_supervised.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from splitters import random_split, species_split
  4 | from loader import BioDataset
  5 | from torch_geometric.data import DataLoader
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | from tqdm import tqdm
 13 | import numpy as np
 14 | 
 15 | from model import GNN, GNN_graphpred
 16 | from sklearn.metrics import roc_auc_score
 17 | 
 18 | import pandas as pd
 19 | 
 20 | from util import combine_dataset
 21 | 
 22 | criterion = nn.BCEWithLogitsLoss()
 23 | 
 24 | def train(args, model, device, loader, optimizer):
 25 |     model.train()
 26 | 
 27 |     loss_accum = 0
 28 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 29 |         batch = batch.to(device)
 30 |         pred = model(batch)
 31 |         y = batch.go_target_pretrain.view(pred.shape).to(torch.float64)
 32 | 
 33 |         optimizer.zero_grad()
 34 |         loss = criterion(pred.double(), y)
 35 |         loss.backward()
 36 | 
 37 |         optimizer.step()
 38 | 
 39 |         loss_accum += loss.detach().cpu()
 40 | 
 41 |     return loss_accum / (step + 1)
 42 | 
 43 | 
 44 | def main():
 45 |     # Training settings
 46 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 47 |     parser.add_argument('--device', type=int, default=0,
 48 |                         help='which gpu to use if any (default: 0)')
 49 |     parser.add_argument('--batch_size', type=int, default=32,
 50 |                         help='input batch size for training (default: 32)')
 51 |     parser.add_argument('--epochs', type=int, default=100,
 52 |                         help='number of epochs to train (default: 100)')
 53 |     parser.add_argument('--lr', type=float, default=0.001,
 54 |                         help='learning rate (default: 0.001)')
 55 |     parser.add_argument('--decay', type=float, default=0,
 56 |                         help='weight decay (default: 0)')
 57 |     parser.add_argument('--num_layer', type=int, default=5,
 58 |                         help='number of GNN message passing layers (default: 5).')
 59 |     parser.add_argument('--emb_dim', type=int, default=300,
 60 |                         help='embedding dimensions (default: 300)')
 61 |     parser.add_argument('--dropout_ratio', type=float, default=0.2,
 62 |                         help='dropout ratio (default: 0.2)')
 63 |     parser.add_argument('--graph_pooling', type=str, default="mean",
 64 |                         help='graph level pooling (sum, mean, max, set2set, attention)')
 65 |     parser.add_argument('--JK', type=str, default="last",
 66 |                         help='how the node features across layers are combined. last, sum, max or concat')
 67 |     parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)')
 68 |     parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
 69 |     parser.add_argument('--gnn_type', type=str, default="gin")
 70 |     parser.add_argument('--num_workers', type=int, default = 0, help='number of workers for dataset loading')
 71 |     parser.add_argument('--seed', type=int, default=42, help = "Seed for splitting dataset.")
 72 |     parser.add_argument('--split', type=str, default = "species", help='Random or species split')
 73 |     args = parser.parse_args()
 74 | 
 75 | 
 76 |     torch.manual_seed(0)
 77 |     np.random.seed(0)
 78 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
 79 |     if torch.cuda.is_available():
 80 |         torch.cuda.manual_seed_all(0)
 81 | 
 82 |     root_supervised = 'dataset/supervised'
 83 | 
 84 |     dataset = BioDataset(root_supervised, data_type='supervised')
 85 | 
 86 |     if args.split == "random":
 87 |         print("random splitting")
 88 |         train_dataset, valid_dataset, test_dataset = random_split(dataset, seed = args.seed)
 89 |         print(train_dataset)
 90 |         print(valid_dataset)
 91 |         pretrain_dataset = combine_dataset(train_dataset, valid_dataset)
 92 |         print(pretrain_dataset)
 93 |     elif args.split == "species":
 94 |         print("species splitting")
 95 |         trainval_dataset, test_dataset = species_split(dataset)
 96 |         test_dataset_broad, test_dataset_none, _ = random_split(test_dataset, seed = args.seed, frac_train=0.5, frac_valid=0.5, frac_test=0)
 97 |         print(trainval_dataset)
 98 |         print(test_dataset_broad)
 99 |         pretrain_dataset = combine_dataset(trainval_dataset, test_dataset_broad)            
100 |         print(pretrain_dataset)
101 |         #train_dataset, valid_dataset, _ = random_split(trainval_dataset, seed = args.seed, frac_train=0.85, frac_valid=0.15, frac_test=0)
102 |     else:
103 |         raise ValueError("Unknown split name.")
104 | 
105 | 
106 |     train_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
107 | 
108 |     num_tasks = len(pretrain_dataset[0].go_target_pretrain)
109 | 
110 |     #set up model
111 |     model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type)
112 |     if not args.input_model_file == "":
113 |         model.from_pretrained(args.input_model_file + ".pth")
114 |     
115 |     model.to(device)
116 | 
117 |     #set up optimizer
118 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)   
119 |     print(optimizer)
120 | 
121 |     for epoch in range(1, args.epochs+1):
122 |         print("====epoch " + str(epoch))
123 |     
124 |         train_loss = train(args, model, device, train_loader, optimizer)
125 | 
126 |     if not args.output_model_file == "":
127 |         torch.save(model.gnn.state_dict(), args.output_model_file + ".pth")
128 | 
129 | 
130 | 
131 | if __name__ == "__main__":
132 |     main()
133 | 


--------------------------------------------------------------------------------
/transfer_learning/bio/splitters.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | 
 5 | def random_split(dataset, frac_train=0.8, frac_valid=0.1, frac_test=0.1,
 6 |                  seed=0):
 7 |     """
 8 |     Adapted from graph-pretrain
 9 |     :param dataset:
10 |     :param task_idx:
11 |     :param null_value:
12 |     :param frac_train:
13 |     :param frac_valid:
14 |     :param frac_test:
15 |     :param seed:
16 |     :return: train, valid, test slices of the input dataset obj.
17 |     """
18 |     np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
19 | 
20 |     num_mols = len(dataset)
21 |     random.seed(seed)
22 |     all_idx = list(range(num_mols))
23 |     random.shuffle(all_idx)
24 | 
25 |     train_idx = all_idx[:int(frac_train * num_mols)]
26 |     valid_idx = all_idx[int(frac_train * num_mols):int(frac_valid * num_mols)
27 |                                                    + int(frac_train * num_mols)]
28 |     test_idx = all_idx[int(frac_valid * num_mols) + int(frac_train * num_mols):]
29 | 
30 |     assert len(set(train_idx).intersection(set(valid_idx))) == 0
31 |     assert len(set(valid_idx).intersection(set(test_idx))) == 0
32 |     assert len(train_idx) + len(valid_idx) + len(test_idx) == num_mols
33 | 
34 |     train_dataset = dataset[torch.tensor(train_idx)]
35 |     valid_dataset = dataset[torch.tensor(valid_idx)]
36 |     if frac_test == 0:
37 |         test_dataset = None
38 |     else:
39 |         test_dataset = dataset[torch.tensor(test_idx)]
40 | 
41 |     return train_dataset, valid_dataset, test_dataset
42 | 
43 | def species_split(dataset, train_valid_species_id_list=[3702, 6239, 511145,
44 |                                                         7227, 10090, 4932, 7955],
45 |                   test_species_id_list=[9606]):
46 |     """
47 |     Split dataset based on species_id attribute
48 |     :param dataset:
49 |     :param train_valid_species_id_list:
50 |     :param test_species_id_list:
51 |     :return: train_valid dataset, test dataset
52 |     """
53 |     # NB: pytorch geometric dataset object can be indexed using slices or
54 |     # byte tensors. We will use byte tensors here
55 | 
56 |     train_valid_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8)
57 |     for id in train_valid_species_id_list:
58 |         train_valid_byte_tensor += (dataset.data.species_id == id)
59 | 
60 |     test_species_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8)
61 |     for id in test_species_id_list:
62 |         test_species_byte_tensor += (dataset.data.species_id == id)
63 | 
64 |     assert ((train_valid_byte_tensor + test_species_byte_tensor) == 1).all()
65 | 
66 |     train_valid_dataset = dataset[train_valid_byte_tensor]
67 |     test_valid_dataset = dataset[test_species_byte_tensor]
68 | 
69 |     return train_valid_dataset, test_valid_dataset
70 | 
71 | if __name__ == "__main__":
72 |     from collections import Counter
73 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from torch.utils.data.dataloader import default_collate
 3 | 
 4 | from batch import BatchSubstructContext, BatchMasking, BatchAE
 5 | 
 6 | class DataLoaderSubstructContext(torch.utils.data.DataLoader):
 7 |     r"""Data loader which merges data objects from a
 8 |     :class:`torch_geometric.data.dataset` to a mini-batch.
 9 |     Args:
10 |         dataset (Dataset): The dataset from which to load the data.
11 |         batch_size (int, optional): How may samples per batch to load.
12 |             (default: :obj:`1`)
13 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
14 |             reshuffled at every epoch (default: :obj:`True`)
15 |     """
16 | 
17 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
18 |         super(DataLoaderSubstructContext, self).__init__(
19 |             dataset,
20 |             batch_size,
21 |             shuffle,
22 |             collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list),
23 |             **kwargs)
24 | 
25 | class DataLoaderMasking(torch.utils.data.DataLoader):
26 |     r"""Data loader which merges data objects from a
27 |     :class:`torch_geometric.data.dataset` to a mini-batch.
28 |     Args:
29 |         dataset (Dataset): The dataset from which to load the data.
30 |         batch_size (int, optional): How may samples per batch to load.
31 |             (default: :obj:`1`)
32 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
33 |             reshuffled at every epoch (default: :obj:`True`)
34 |     """
35 | 
36 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
37 |         super(DataLoaderMasking, self).__init__(
38 |             dataset,
39 |             batch_size,
40 |             shuffle,
41 |             collate_fn=lambda data_list: BatchMasking.from_data_list(data_list),
42 |             **kwargs)
43 | 
44 | 
45 | class DataLoaderAE(torch.utils.data.DataLoader):
46 |     r"""Data loader which merges data objects from a
47 |     :class:`torch_geometric.data.dataset` to a mini-batch.
48 |     Args:
49 |         dataset (Dataset): The dataset from which to load the data.
50 |         batch_size (int, optional): How may samples per batch to load.
51 |             (default: :obj:`1`)
52 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
53 |             reshuffled at every epoch (default: :obj:`True`)
54 |     """
55 | 
56 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
57 |         super(DataLoaderAE, self).__init__(
58 |             dataset,
59 |             batch_size,
60 |             shuffle,
61 |             collate_fn=lambda data_list: BatchAE.from_data_list(data_list),
62 |             **kwargs)
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/finetune.sh:
--------------------------------------------------------------------------------
 1 | #### GIN fine-tuning
 2 | split=scaffold
 3 | dataset=$1
 4 | 
 5 | CUDA_VISIBLE_DEVICES=0
 6 | for runseed in 0 1 2 3 4 5 6 7 8 9
 7 | do
 8 | model_file=${unsup}
 9 | python finetune.py --input_model_file models_simgrace/simgrace_80.pth --split $split --runseed $runseed --gnn_type gin --dataset $dataset --lr 1e-3 --epochs 100
10 | done
11 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/finetune_tune.sh:
--------------------------------------------------------------------------------
 1 | #### GIN fine-tuning
 2 | runseed=$1
 3 | device=$2
 4 | split=scaffold
 5 | 
 6 | ### for GIN
 7 | for dataset in bbbp sider toxcast
 8 | do
 9 | for unsup in contextpred infomax edgepred masking
10 | do
11 | model_file=${unsup}
12 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset
13 | 
14 | model_file=supervised_${unsup}
15 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset
16 | done
17 | 
18 | python finetune.py --split $split --filename ${dataset}/gin_nopretrain --device $device --runseed $runseed --gnn_type gin --dataset $dataset
19 | python finetune.py --split $split --input_model_file model_gin/supervised.pth --filename ${dataset}/gin_supervised --device $device --runseed $runseed --gnn_type gin --dataset $dataset
20 | 
21 | 
22 | ### for other GNNs
23 | for gnn_type in gcn gat graphsage
24 | do
25 | python finetune.py --split $split --filename ${dataset}/${gnn_type}_nopretrain --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset
26 | 
27 | model_file=${gnn_type}_supervised_contextpred
28 | python finetune.py --input_model_file model_architecture/${model_file}.pth --split $split --filename ${dataset}/${model_file} --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset
29 | 
30 | done
31 | done
32 | 
33 | 
34 | fold_idx=$1
35 | 
36 | for batch_size in 8 64
37 | do
38 | for drop_ratio in 0 0.2 0.5
39 | do
40 | for dataset in ptc_mr mutag
41 | do
42 | for unsup in contextpred edgepred masking infomax
43 | do
44 | 
45 | model_file=${unsup}
46 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
47 | 
48 | 
49 | model_file=supervised_${unsup}
50 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
51 | 
52 | done
53 | 
54 | model_file=supervised
55 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
56 | 
57 | python finetune_mutag_ptc.py --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/nopretrain --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
58 | 
59 | done
60 | done
61 | done


--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_100.pth


--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_20.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_20.pth


--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_40.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_40.pth


--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_60.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_60.pth


--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_80.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_80.pth


--------------------------------------------------------------------------------
/transfer_learning/chem/parse_result.py:
--------------------------------------------------------------------------------
 1 | ### Parsing the result!
 2 | import tensorflow as tf
 3 | import os
 4 | import numpy as np
 5 | import pickle
 6 | 
 7 | def get_test_acc(event_file):
 8 |     val_auc_list = np.zeros(100)
 9 |     test_auc_list = np.zeros(100)
10 |     for e in list(tf.train.summary_iterator(event_file)):
11 |         if len(e.summary.value) == 0:
12 |             continue
13 |         if e.summary.value[0].tag == "data/val_auc":
14 |             val_auc_list[e.step-1] = e.summary.value[0].simple_value
15 |         if e.summary.value[0].tag == "data/test_auc":
16 |             test_auc_list[e.step-1] = e.summary.value[0].simple_value
17 |     
18 |     best_epoch = np.argmax(val_auc_list)
19 | 
20 |     return test_auc_list[best_epoch]
21 | 
22 | if __name__ == "__main__":
23 | 
24 |     dataset_list = ["bbbp", "sider", "toxcast"]
25 |     #10 random seed
26 |     seed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
27 |     config_list = []
28 | 
29 |     config_list.append("gin_nopretrain")
30 |     config_list.append("gin_infomax")
31 |     config_list.append("gin_edgepred")
32 |     config_list.append("gin_masking")
33 |     config_list.append("gin_contextpred")
34 |     config_list.append("gin_supervised")
35 |     config_list.append("gin_supervised_infomax")
36 |     config_list.append("gin_supervised_edgepred")
37 |     config_list.append("gin_supervised_masking")
38 |     config_list.append("gin_supervised_contextpred")
39 |     config_list.append("gcn_nopretrain")
40 |     config_list.append("gcn_supervised_contextpred")
41 |     config_list.append("graphsage_nopretrain")
42 |     config_list.append("graphsage_supervised_contextpred")
43 |     config_list.append("gat_nopretrain")
44 |     config_list.append("gat_supervised_contextpred")
45 | 
46 |     result_mat = np.zeros((len(seed_list), len(config_list), len(dataset_list)))
47 | 
48 |     for i, seed in enumerate(seed_list):
49 |         for j, config in enumerate(config_list):
50 |             for k, dataset in enumerate(dataset_list):
51 |                 dir_name = "runs/finetune_cls_runseed" + str(seed) + "/" + dataset + "/" + config
52 |                 print(dir_name)
53 |                 file_in_dir = os.listdir(dir_name)
54 |                 event_file_list = []
55 |                 for f in file_in_dir:
56 |                     if "events" in f:
57 |                         event_file_list.append(f)
58 | 
59 |                 event_file = event_file_list[0]
60 | 
61 |                 result_mat[i, j, k] = get_test_acc(dir_name + "/" + event_file)
62 | 
63 |     with open("result_summary", "wb") as f:
64 |         pickle.dump({"result_mat": result_mat, "seed_list": seed_list, "config_list": config_list, "dataset_list": dataset_list}, f)
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_deepgraphinfomax.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import MoleculeDataset
  4 | from torch_geometric.data import DataLoader
  5 | from torch_geometric.nn.inits import uniform
  6 | from torch_geometric.nn import global_mean_pool
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | import torch.optim as optim
 12 | 
 13 | from tqdm import tqdm
 14 | import numpy as np
 15 | 
 16 | from model import GNN
 17 | from sklearn.metrics import roc_auc_score
 18 | 
 19 | from splitters import scaffold_split, random_split, random_scaffold_split
 20 | import pandas as pd
 21 | 
 22 | from tensorboardX import SummaryWriter
 23 | 
 24 | 
 25 | def cycle_index(num, shift):
 26 |     arr = torch.arange(num) + shift
 27 |     arr[-shift:] = torch.arange(shift)
 28 |     return arr
 29 | 
 30 | class Discriminator(nn.Module):
 31 |     def __init__(self, hidden_dim):
 32 |         super(Discriminator, self).__init__()
 33 |         self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
 34 |         self.reset_parameters()
 35 | 
 36 |     def reset_parameters(self):
 37 |         size = self.weight.size(0)
 38 |         uniform(size, self.weight)
 39 | 
 40 |     def forward(self, x, summary):
 41 |         h = torch.matmul(summary, self.weight)
 42 |         return torch.sum(x*h, dim = 1)
 43 | 
 44 | class Infomax(nn.Module):
 45 |     def __init__(self, gnn, discriminator):
 46 |         super(Infomax, self).__init__()
 47 |         self.gnn = gnn
 48 |         self.discriminator = discriminator
 49 |         self.loss = nn.BCEWithLogitsLoss()
 50 |         self.pool = global_mean_pool
 51 | 
 52 | 
 53 | def train(args, model, device, loader, optimizer):
 54 |     model.train()
 55 | 
 56 |     train_acc_accum = 0
 57 |     train_loss_accum = 0
 58 | 
 59 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 60 |         batch = batch.to(device)
 61 |         node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr)
 62 |         summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch))
 63 | 
 64 |         positive_expanded_summary_emb = summary_emb[batch.batch]
 65 | 
 66 |         shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)]
 67 |         negative_expanded_summary_emb = shifted_summary_emb[batch.batch]
 68 | 
 69 |         positive_score = model.discriminator(node_emb, positive_expanded_summary_emb)
 70 |         negative_score = model.discriminator(node_emb, negative_expanded_summary_emb)      
 71 | 
 72 |         optimizer.zero_grad()
 73 |         loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score))
 74 |         loss.backward()
 75 | 
 76 |         optimizer.step()
 77 | 
 78 |         train_loss_accum += float(loss.detach().cpu().item())
 79 |         acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
 80 |         train_acc_accum += float(acc.detach().cpu().item())
 81 | 
 82 |     return train_acc_accum/step, train_loss_accum/step
 83 | 
 84 | 
 85 | def main():
 86 |     # Training settings
 87 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 88 |     parser.add_argument('--device', type=int, default=0,
 89 |                         help='which gpu to use if any (default: 0)')
 90 |     parser.add_argument('--batch_size', type=int, default=256,
 91 |                         help='input batch size for training (default: 256)')
 92 |     parser.add_argument('--epochs', type=int, default=100,
 93 |                         help='number of epochs to train (default: 100)')
 94 |     parser.add_argument('--lr', type=float, default=0.001,
 95 |                         help='learning rate (default: 0.001)')
 96 |     parser.add_argument('--decay', type=float, default=0,
 97 |                         help='weight decay (default: 0)')
 98 |     parser.add_argument('--num_layer', type=int, default=5,
 99 |                         help='number of GNN message passing layers (default: 5).')
100 |     parser.add_argument('--emb_dim', type=int, default=300,
101 |                         help='embedding dimensions (default: 300)')
102 |     parser.add_argument('--dropout_ratio', type=float, default=0,
103 |                         help='dropout ratio (default: 0)')
104 |     parser.add_argument('--JK', type=str, default="last",
105 |                         help='how the node features across layers are combined. last, sum, max or concat')
106 |     parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.')
107 |     parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
108 |     parser.add_argument('--gnn_type', type=str, default="gin")
109 |     parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
110 |     parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
111 |     args = parser.parse_args()
112 | 
113 | 
114 |     torch.manual_seed(0)
115 |     np.random.seed(0)
116 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
117 |     if torch.cuda.is_available():
118 |         torch.cuda.manual_seed_all(0)
119 | 
120 | 
121 |     #set up dataset
122 |     dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset)
123 | 
124 |     print(dataset)
125 | 
126 |     loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
127 | 
128 |     #set up model
129 |     gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
130 | 
131 |     discriminator = Discriminator(args.emb_dim)
132 | 
133 |     model = Infomax(gnn, discriminator)
134 |     
135 |     model.to(device)
136 | 
137 |     #set up optimizer
138 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
139 |     print(optimizer)
140 | 
141 |     for epoch in range(1, args.epochs+1):
142 |         print("====epoch " + str(epoch))
143 |     
144 |         train_acc, train_loss = train(args, model, device, loader, optimizer)
145 | 
146 |         print(train_acc)
147 |         print(train_loss)
148 | 
149 | 
150 |     if not args.output_model_file == "":
151 |         torch.save(gnn.state_dict(), args.output_model_file + ".pth")
152 | 
153 | if __name__ == "__main__":
154 |     main()
155 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_edgepred.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import MoleculeDataset
  4 | from dataloader import DataLoaderAE
  5 | from util import NegativeEdge
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | 
 12 | from tqdm import tqdm
 13 | import numpy as np
 14 | 
 15 | from model import GNN, GNN_graphpred
 16 | from sklearn.metrics import roc_auc_score
 17 | 
 18 | from splitters import scaffold_split, random_split, random_scaffold_split
 19 | import pandas as pd
 20 | 
 21 | from tensorboardX import SummaryWriter
 22 | 
 23 | criterion = nn.BCEWithLogitsLoss()
 24 | 
 25 | def train(args, model, device, loader, optimizer):
 26 |     model.train()
 27 | 
 28 |     train_acc_accum = 0
 29 |     train_loss_accum = 0
 30 | 
 31 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 32 |         batch = batch.to(device)
 33 |         node_emb = model(batch.x, batch.edge_index, batch.edge_attr)
 34 | 
 35 |         positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1)
 36 |         negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1)
 37 | 
 38 |         optimizer.zero_grad()
 39 |         loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score))
 40 |         loss.backward()
 41 |         optimizer.step()
 42 | 
 43 |         train_loss_accum += float(loss.detach().cpu().item())
 44 |         acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
 45 |         train_acc_accum += float(acc.detach().cpu().item())
 46 | 
 47 |     return train_acc_accum/step, train_loss_accum/step
 48 | 
 49 | 
 50 | def main():
 51 |     # Training settings
 52 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 53 |     parser.add_argument('--device', type=int, default=0,
 54 |                         help='which gpu to use if any (default: 0)')
 55 |     parser.add_argument('--batch_size', type=int, default=256,
 56 |                         help='input batch size for training (default: 256)')
 57 |     parser.add_argument('--epochs', type=int, default=100,
 58 |                         help='number of epochs to train (default: 100)')
 59 |     parser.add_argument('--lr', type=float, default=0.001,
 60 |                         help='learning rate (default: 0.001)')
 61 |     parser.add_argument('--decay', type=float, default=0,
 62 |                         help='weight decay (default: 0)')
 63 |     parser.add_argument('--num_layer', type=int, default=5,
 64 |                         help='number of GNN message passing layers (default: 5).')
 65 |     parser.add_argument('--emb_dim', type=int, default=300,
 66 |                         help='embedding dimensions (default: 300)')
 67 |     parser.add_argument('--dropout_ratio', type=float, default=0,
 68 |                         help='dropout ratio (default: 0)')
 69 |     parser.add_argument('--JK', type=str, default="last",
 70 |                         help='how the node features across layers are combined. last, sum, max or concat')
 71 |     parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.')
 72 |     parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
 73 |     parser.add_argument('--gnn_type', type=str, default="gin")
 74 |     parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
 75 |     args = parser.parse_args()
 76 | 
 77 | 
 78 |     torch.manual_seed(0)
 79 |     np.random.seed(0)
 80 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
 81 |     if torch.cuda.is_available():
 82 |         torch.cuda.manual_seed_all(0)
 83 | 
 84 |     #set up dataset
 85 |     dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset, transform = NegativeEdge())
 86 | 
 87 |     print(dataset[0])
 88 | 
 89 |     loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
 90 | 
 91 |     #set up model
 92 |     model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
 93 |     
 94 |     model.to(device)
 95 | 
 96 |     #set up optimizer
 97 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)   
 98 |     print(optimizer)
 99 | 
100 |     for epoch in range(1, args.epochs+1):
101 |         print("====epoch " + str(epoch))
102 |     
103 |         train_acc, train_loss = train(args, model, device, loader, optimizer)
104 | 
105 |         print(train_acc)
106 |         print(train_loss)
107 | 
108 |     if not args.output_model_file == "":
109 |         torch.save(model.state_dict(), args.output_model_file + ".pth")
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_supervised.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from loader import MoleculeDataset
  4 | from torch_geometric.data import DataLoader
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import torch.optim as optim
 10 | 
 11 | from tqdm import tqdm
 12 | import numpy as np
 13 | 
 14 | from model import GNN, GNN_graphpred
 15 | # from model_extra import GNN, GNN_graphpred
 16 | from sklearn.metrics import roc_auc_score
 17 | 
 18 | from splitters import scaffold_split, random_split, random_scaffold_split
 19 | import pandas as pd
 20 | 
 21 | from tensorboardX import SummaryWriter
 22 | 
 23 | criterion = nn.BCEWithLogitsLoss(reduction = "none")
 24 | 
 25 | def train(args, model, device, loader, optimizer):
 26 |     model.train()
 27 | 
 28 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 29 |         batch = batch.to(device)
 30 |         pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch)
 31 |         y = batch.y.view(pred.shape).to(torch.float64)
 32 | 
 33 |         #Whether y is non-null or not.
 34 |         is_valid = y**2 > 0
 35 |         #Loss matrix
 36 |         loss_mat = criterion(pred.double(), (y+1)/2)
 37 |         #loss matrix after removing null target
 38 |         loss_mat = torch.where(is_valid, loss_mat, torch.zeros(loss_mat.shape).to(loss_mat.device).to(loss_mat.dtype))
 39 |             
 40 |         optimizer.zero_grad()
 41 |         loss = torch.sum(loss_mat)/torch.sum(is_valid)
 42 |         loss.backward()
 43 | 
 44 |         optimizer.step()
 45 | 
 46 | 
 47 | def eval(args, model, device, loader, normalized_weight):
 48 |     model.eval()
 49 |     y_true = []
 50 |     y_scores = []
 51 | 
 52 |     for step, batch in enumerate(tqdm(loader, desc="Iteration")):
 53 |         batch = batch.to(device)
 54 | 
 55 |         with torch.no_grad():
 56 |             pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch)
 57 | 
 58 |         y_true.append(batch.y.view(pred.shape).cpu())
 59 |         y_scores.append(pred.cpu())
 60 | 
 61 |     y_true = torch.cat(y_true, dim = 0).numpy()
 62 |     y_scores = torch.cat(y_scores, dim = 0).numpy()
 63 | 
 64 |     roc_list = []
 65 |     weight = []
 66 |     for i in range(y_true.shape[1]):
 67 |         #AUC is only defined when there is at least one positive data.
 68 |         if np.sum(y_true[:,i] == 1) > 0 and np.sum(y_true[:,i] == -1) > 0:
 69 |             is_valid = y_true[:,i]**2 > 0
 70 |             roc_list.append(roc_auc_score((y_true[is_valid,i] + 1)/2, y_scores[is_valid,i]))
 71 |             weight.append(normalized_weight[i])
 72 | 
 73 |     if len(roc_list) < y_true.shape[1]:
 74 |         print("Some target is missing!")
 75 |         print("Missing ratio: %f" %(1 - float(len(roc_list))/y_true.shape[1]))
 76 | 
 77 |     weight = np.array(weight)
 78 |     roc_list = np.array(roc_list)
 79 | 
 80 |     return weight.dot(roc_list)
 81 | 
 82 | 
 83 | def main():
 84 |     # Training settings
 85 |     parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
 86 |     parser.add_argument('--device', type=int, default=0,
 87 |                         help='which gpu to use if any (default: 0)')
 88 |     parser.add_argument('--batch_size', type=int, default=32,
 89 |                         help='input batch size for training (default: 32)')
 90 |     parser.add_argument('--epochs', type=int, default=100,
 91 |                         help='number of epochs to train (default: 100)')
 92 |     parser.add_argument('--lr', type=float, default=0.001,
 93 |                         help='learning rate (default: 0.001)')
 94 |     parser.add_argument('--decay', type=float, default=0,
 95 |                         help='weight decay (default: 0)')
 96 |     parser.add_argument('--num_layer', type=int, default=5,
 97 |                         help='number of GNN message passing layers (default: 5).')
 98 |     parser.add_argument('--emb_dim', type=int, default=300,
 99 |                         help='embedding dimensions (default: 300)')
100 |     parser.add_argument('--dropout_ratio', type=float, default=0.2,
101 |                         help='dropout ratio (default: 0.2)')
102 |     parser.add_argument('--graph_pooling', type=str, default="mean",
103 |                         help='graph level pooling (sum, mean, max, set2set, attention)')
104 |     parser.add_argument('--JK', type=str, default="last",
105 |                         help='how the node features across layers are combined. last, sum, max or concat')
106 |     parser.add_argument('--dataset', type=str, default = 'chembl_filtered', help='root directory of dataset. For now, only classification.')
107 |     parser.add_argument('--gnn_type', type=str, default="gin")
108 |     parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)')
109 |     parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
110 |     parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
111 |     args = parser.parse_args()
112 | 
113 | 
114 |     torch.manual_seed(0)
115 |     np.random.seed(0)
116 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
117 |     if torch.cuda.is_available():
118 |         torch.cuda.manual_seed_all(0)
119 | 
120 |     #Bunch of classification tasks
121 |     if args.dataset == "chembl_filtered":
122 |         num_tasks = 1310
123 |     else:
124 |         raise ValueError("Invalid dataset name.")
125 | 
126 |     #set up dataset
127 |     dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset)
128 | 
129 |     loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
130 | 
131 |     #set up model
132 |     model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type)
133 |     if not args.input_model_file == "":
134 |         model.from_pretrained(args.input_model_file + ".pth")
135 |     
136 |     model.to(device)
137 | 
138 |     #set up optimizer
139 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)  
140 |     print(optimizer)
141 | 
142 | 
143 |     for epoch in range(1, args.epochs+1):
144 |         print("====epoch " + str(epoch))
145 |     
146 |         train(args, model, device, loader, optimizer)
147 | 
148 |     if not args.output_model_file == "":
149 |         torch.save(model.gnn.state_dict(), args.output_model_file + ".pth")
150 | 
151 | 
152 | if __name__ == "__main__":
153 |     main()
154 | 


--------------------------------------------------------------------------------
/transfer_learning/chem/run.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | 
3 | nohup ./finetune.sh bbbp > log_bbbp &
4 | nohup ./finetune.sh sider > log_sider &
5 | nohup ./finetune.sh toxcast > log_toxcast &


--------------------------------------------------------------------------------
/unsupervised_TU/Accuracy.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/Accuracy.txt


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/arguments.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/arguments.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/aug.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/aug.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/gin.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/gin.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/losses.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/model.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/arguments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def arg_parse():
 4 |     parser = argparse.ArgumentParser(description='GcnInformax Arguments.')
 5 |     parser.add_argument('--DS', dest='DS', default='NCI1', help='NCI1,PTC_MR,IMDB-BINARY,IMDB-MULTI,REDDIT-BINARY')
 6 |     parser.add_argument('--local', dest='local', action='store_const', 
 7 |             const=True, default=False)
 8 |     parser.add_argument('--glob', dest='glob', action='store_const', 
 9 |             const=True, default=False)
10 |     parser.add_argument('--prior', dest='prior', action='store_const', 
11 |             const=True, default=False)
12 |     parser.add_argument('--device', default='cuda:6', type=str, help='gpu device ids')
13 |     parser.add_argument('--lr', dest='lr', type=float, default= 0.01,
14 |             help='Learning rate.')
15 |     parser.add_argument('--alpha', default=1.2, type=float, help='stregnth for regularization')
16 |     parser.add_argument('--num-gc-layers', dest='num_gc_layers', type=int, default=5,
17 |             help='Number of graph convolution layers before each pooling')
18 |     parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=32, help='')
19 |     parser.add_argument('--seed', type=int, default=0)
20 |     parser.add_argument('--epochs', type=int, default=20)
21 |     # Random
22 |     parser.add_argument('--eta', type=float, default=1.0, help='0.1, 1.0, 10, 100, 1000')
23 |     parser.add_argument('--batch_size', type=int, default=128, help='128, 256, 512, 1024')     
24 | 
25 |     return parser.parse_args()
26 | 
27 | 


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/configs/convnets.py:
--------------------------------------------------------------------------------
 1 | '''Basic convnet hyperparameters.
 2 | 
 3 | conv_args are in format (dim_h, f_size, stride, pad batch_norm, dropout, nonlinearity, pool)
 4 | fc_args are in format (dim_h, batch_norm, dropout, nonlinearity)
 5 | 
 6 | '''
 7 | 
 8 | from cortex_DIM.nn_modules.encoder import ConvnetEncoder, FoldedConvnetEncoder
 9 | 
10 | 
11 | # Basic DCGAN-like encoders
12 | 
13 | _basic28x28 = dict(
14 |     Encoder=ConvnetEncoder,
15 |     conv_args=[(64, 5, 2, 2, True, False, 'ReLU', None),
16 |                (128, 5, 2, 2, True, False, 'ReLU', None)],
17 |     fc_args=[(1024, True, False, 'ReLU', None)],
18 |     local_idx=1,
19 |     fc_idx=0
20 | )
21 | 
22 | _basic32x32 = dict(
23 |     Encoder=ConvnetEncoder,
24 |     conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
25 |                (128, 4, 2, 1, True, False, 'ReLU', None),
26 |                (256, 4, 2, 1, True, False, 'ReLU', None)],
27 |     fc_args=[(1024, True, False, 'ReLU')],
28 |     local_idx=1,
29 |     conv_idx=2,
30 |     fc_idx=0
31 | )
32 | 
33 | _basic64x64 = dict(
34 |     Encoder=ConvnetEncoder,
35 |     conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
36 |                (128, 4, 2, 1, True, False, 'ReLU', None),
37 |                (256, 4, 2, 1, True, False, 'ReLU', None),
38 |                (512, 4, 2, 1, True, False, 'ReLU', None)],
39 |     fc_args=[(1024, True, False, 'ReLU')],
40 |     local_idx=2,
41 |     conv_idx=3,
42 |     fc_idx=0
43 | )
44 | 
45 | # Alexnet-like encoders
46 | 
47 | _alex64x64 = dict(
48 |     Encoder=ConvnetEncoder,
49 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
50 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
51 |                (384, 3, 1, 1, True, False, 'ReLU', None),
52 |                (384, 3, 1, 1, True, False, 'ReLU', None),
53 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
54 |     fc_args=[(4096, True, False, 'ReLU'),
55 |              (4096, True, False, 'ReLU')],
56 |     local_idx=2,
57 |     conv_idx=4,
58 |     fc_idx=1
59 | )
60 | 
61 | _foldalex64x64 = dict(
62 |     Encoder=FoldedConvnetEncoder,
63 |     crop_size=16,
64 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
65 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
66 |                (384, 3, 1, 1, True, False, 'ReLU', None),
67 |                (384, 3, 1, 1, True, False, 'ReLU', None),
68 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
69 |     fc_args=[(4096, True, False, 'ReLU'),
70 |              (4096, True, False, 'ReLU')],
71 |     local_idx=4,
72 |     fc_idx=1
73 | )
74 | 
75 | _foldmultialex64x64 = dict(
76 |     Encoder=FoldedConvnetEncoder,
77 |     crop_size=16,
78 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
79 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
80 |                (384, 3, 1, 1, True, False, 'ReLU', None),
81 |                (384, 3, 1, 1, True, False, 'ReLU', None),
82 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
83 |                (192, 3, 1, 0, True, False, 'ReLU', None),
84 |                (192, 1, 1, 0, True, False, 'ReLU', None)],
85 |     fc_args=[(4096, True, False, 'ReLU')],
86 |     local_idx=4,
87 |     multi_idx=6,
88 |     fc_idx=1
89 | )
90 | 
91 | configs = dict(
92 |     basic28x28=_basic28x28,
93 |     basic32x32=_basic32x32,
94 |     basic64x64=_basic64x64,
95 |     alex64x64=_alex64x64,
96 |     foldalex64x64=_foldalex64x64,
97 |     foldmultialex64x64=_foldmultialex64x64
98 | )


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/configs/resnets.py:
--------------------------------------------------------------------------------
  1 | """Configurations for ResNets
  2 | 
  3 | """
  4 | 
  5 | from cortex_DIM.nn_modules.encoder import ResnetEncoder, FoldedResnetEncoder
  6 | 
  7 | 
  8 | _resnet19_32x32 = dict(
  9 |     Encoder=ResnetEncoder,
 10 |     conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
 11 |     res_args=[
 12 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 13 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 14 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 15 |          1),
 16 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 17 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 18 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 19 |          1),
 20 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 21 |           (128, 3, 2, 1, True, False, 'ReLU', None),
 22 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 23 |          1),
 24 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 25 |           (128, 3, 1, 1, True, False, 'ReLU', None),
 26 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 27 |          1),
 28 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
 29 |           (256, 3, 2, 1, True, False, 'ReLU', None),
 30 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 31 |          1),
 32 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
 33 |           (256, 3, 1, 1, True, False, 'ReLU', None),
 34 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 35 |          1)
 36 |     ],
 37 |     fc_args=[(1024, True, False, 'ReLU')],
 38 |     local_idx=4,
 39 |     fc_idx=0
 40 | )
 41 | 
 42 | _foldresnet19_32x32 = dict(
 43 |     Encoder=FoldedResnetEncoder,
 44 |     crop_size=8,
 45 |     conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
 46 |     res_args=[
 47 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 48 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 49 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 50 |          1),
 51 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 52 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 53 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 54 |          1),
 55 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 56 |           (128, 3, 2, 1, True, False, 'ReLU', None),
 57 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 58 |          1),
 59 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 60 |           (128, 3, 1, 1, True, False, 'ReLU', None),
 61 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 62 |          1),
 63 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
 64 |           (256, 3, 2, 1, True, False, 'ReLU', None),
 65 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 66 |          1),
 67 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
 68 |           (256, 3, 1, 1, True, False, 'ReLU', None),
 69 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 70 |          1)
 71 |     ],
 72 |     fc_args=[(1024, True, False, 'ReLU')],
 73 |     local_idx=6,
 74 |     fc_idx=0
 75 | )
 76 | 
 77 | _resnet34_32x32 = dict(
 78 |     Encoder=ResnetEncoder,
 79 |     conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
 80 |     res_args=[
 81 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 82 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 83 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 84 |          1),
 85 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
 86 |           (64, 3, 1, 1, True, False, 'ReLU', None),
 87 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 88 |          2),
 89 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 90 |           (128, 3, 2, 1, True, False, 'ReLU', None),
 91 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 92 |          1),
 93 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
 94 |           (128, 3, 1, 1, True, False, 'ReLU', None),
 95 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
 96 |          5),
 97 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
 98 |           (256, 3, 2, 1, True, False, 'ReLU', None),
 99 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
100 |          1),
101 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
102 |           (256, 3, 1, 1, True, False, 'ReLU', None),
103 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
104 |          2)
105 |     ],
106 |     fc_args=[(1024, True, False, 'ReLU')],
107 |     local_idx=2,
108 |     fc_idx=0
109 | )
110 | 
111 | _foldresnet34_32x32 = dict(
112 |     Encoder=FoldedResnetEncoder,
113 |     crop_size=8,
114 |     conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
115 |     res_args=[
116 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
117 |           (64, 3, 1, 1, True, False, 'ReLU', None),
118 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
119 |          1),
120 |         ([(64, 1, 1, 0, True, False, 'ReLU', None),
121 |           (64, 3, 1, 1, True, False, 'ReLU', None),
122 |           (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
123 |          2),
124 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
125 |           (128, 3, 2, 1, True, False, 'ReLU', None),
126 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
127 |          1),
128 |         ([(128, 1, 1, 0, True, False, 'ReLU', None),
129 |           (128, 3, 1, 1, True, False, 'ReLU', None),
130 |           (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
131 |          5),
132 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
133 |           (256, 3, 2, 1, True, False, 'ReLU', None),
134 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
135 |          1),
136 |         ([(256, 1, 1, 0, True, False, 'ReLU', None),
137 |           (256, 3, 1, 1, True, False, 'ReLU', None),
138 |           (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
139 |          2)
140 |     ],
141 |     fc_args=[(1024, True, False, 'ReLU')],
142 |     local_idx=12,
143 |     fc_idx=0
144 | )
145 | 
146 | configs = dict(
147 |     resnet19_32x32=_resnet19_32x32,
148 |     resnet34_32x32=_resnet34_32x32,
149 |     foldresnet19_32x32=_foldresnet19_32x32,
150 |     foldresnet34_32x32=_foldresnet34_32x32
151 | )


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/gan_losses.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | 
 5 | import math
 6 | 
 7 | import torch
 8 | import torch.nn.functional as F
 9 | 
10 | from cortex_DIM.functions.misc import log_sum_exp
11 | 
12 | 
13 | def raise_measure_error(measure):
14 |     supported_measures = ['GAN', 'JSD', 'X2', 'KL', 'RKL', 'DV', 'H2', 'W1']
15 |     raise NotImplementedError(
16 |         'Measure `{}` not supported. Supported: {}'.format(measure,
17 |                                                            supported_measures))
18 | 
19 | 
20 | def get_positive_expectation(p_samples, measure, average=True):
21 |     """Computes the positive part of a divergence / difference.
22 | 
23 |     Args:
24 |         p_samples: Positive samples.
25 |         measure: Measure to compute for.
26 |         average: Average the result over samples.
27 | 
28 |     Returns:
29 |         torch.Tensor
30 | 
31 |     """
32 |     log_2 = math.log(2.)
33 | 
34 |     if measure == 'GAN':
35 |         Ep = - F.softplus(-p_samples)
36 |     elif measure == 'JSD':
37 |         Ep = log_2 - F.softplus(- p_samples)
38 |     elif measure == 'X2':
39 |         Ep = p_samples ** 2
40 |     elif measure == 'KL':
41 |         Ep = p_samples + 1.
42 |     elif measure == 'RKL':
43 |         Ep = -torch.exp(-p_samples)
44 |     elif measure == 'DV':
45 |         Ep = p_samples
46 |     elif measure == 'H2':
47 |         Ep = 1. - torch.exp(-p_samples)
48 |     elif measure == 'W1':
49 |         Ep = p_samples
50 |     else:
51 |         raise_measure_error(measure)
52 | 
53 |     if average:
54 |         return Ep.mean()
55 |     else:
56 |         return Ep
57 | 
58 | 
59 | def get_negative_expectation(q_samples, measure, average=True):
60 |     """Computes the negative part of a divergence / difference.
61 | 
62 |     Args:
63 |         q_samples: Negative samples.
64 |         measure: Measure to compute for.
65 |         average: Average the result over samples.
66 | 
67 |     Returns:
68 |         torch.Tensor
69 | 
70 |     """
71 |     log_2 = math.log(2.)
72 | 
73 |     if measure == 'GAN':
74 |         Eq = F.softplus(-q_samples) + q_samples
75 |     elif measure == 'JSD':
76 |         Eq = F.softplus(-q_samples) + q_samples - log_2
77 |     elif measure == 'X2':
78 |         Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
79 |     elif measure == 'KL':
80 |         Eq = torch.exp(q_samples)
81 |     elif measure == 'RKL':
82 |         Eq = q_samples - 1.
83 |     elif measure == 'DV':
84 |         Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
85 |     elif measure == 'H2':
86 |         Eq = torch.exp(q_samples) - 1.
87 |     elif measure == 'W1':
88 |         Eq = q_samples
89 |     else:
90 |         raise_measure_error(measure)
91 | 
92 |     if average:
93 |         return Eq.mean()
94 |     else:
95 |         return Eq


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/misc.py:
--------------------------------------------------------------------------------
 1 | """Miscilaneous functions.
 2 | 
 3 | """
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def log_sum_exp(x, axis=None):
 9 |     """Log sum exp function
10 | 
11 |     Args:
12 |         x: Input.
13 |         axis: Axis over which to perform sum.
14 | 
15 |     Returns:
16 |         torch.Tensor: log sum exp
17 | 
18 |     """
19 |     x_max = torch.max(x, axis)[0]
20 |     y = torch.log((torch.exp(x - x_max)).sum(axis)) + x_max
21 |     return y
22 | 
23 | 
24 | def random_permute(X):
25 |     """Randomly permutes a tensor.
26 | 
27 |     Args:
28 |         X: Input tensor.
29 | 
30 |     Returns:
31 |         torch.Tensor
32 | 
33 |     """
34 |     X = X.transpose(1, 2)
35 |     b = torch.rand((X.size(0), X.size(1))).cuda()
36 |     idx = b.sort(0)[1]
37 |     adx = torch.range(0, X.size(1) - 1).long()
38 |     X = X[idx, adx[None, :]].transpose(1, 2)
39 |     return X
40 | 


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/encoder.py:
--------------------------------------------------------------------------------
 1 | '''Basic cortex_DIM encoder.
 2 | 
 3 | '''
 4 | 
 5 | import torch
 6 | 
 7 | from cortex_DIM.nn_modules.convnet import Convnet, FoldedConvnet
 8 | from cortex_DIM.nn_modules.resnet import ResNet, FoldedResNet
 9 | 
10 | 
11 | def create_encoder(Module):
12 |     class Encoder(Module):
13 |         '''Encoder used for cortex_DIM.
14 | 
15 |         '''
16 | 
17 |         def __init__(self, *args, local_idx=None, multi_idx=None, conv_idx=None, fc_idx=None, **kwargs):
18 |             '''
19 | 
20 |             Args:
21 |                 args: Arguments for parent class.
22 |                 local_idx: Index in list of convolutional layers for local features.
23 |                 multi_idx: Index in list of convolutional layers for multiple globals.
24 |                 conv_idx: Index in list of convolutional layers for intermediate features.
25 |                 fc_idx: Index in list of fully-connected layers for intermediate features.
26 |                 kwargs: Keyword arguments for the parent class.
27 |             '''
28 | 
29 |             super().__init__(*args, **kwargs)
30 | 
31 |             if local_idx is None:
32 |                 raise ValueError('`local_idx` must be set')
33 | 
34 |             conv_idx = conv_idx or local_idx
35 | 
36 |             self.local_idx = local_idx
37 |             self.multi_idx = multi_idx
38 |             self.conv_idx = conv_idx
39 |             self.fc_idx = fc_idx
40 | 
41 |         def forward(self, x: torch.Tensor):
42 |             '''
43 | 
44 |             Args:
45 |                 x: Input tensor.
46 | 
47 |             Returns:
48 |                 local_out, multi_out, hidden_out, global_out
49 | 
50 |             '''
51 | 
52 |             outs = super().forward(x, return_full_list=True)
53 |             if len(outs) == 2:
54 |                 conv_out, fc_out = outs
55 |             else:
56 |                 conv_before_out, res_out, conv_after_out, fc_out = outs
57 |                 conv_out = conv_before_out + res_out + conv_after_out
58 | 
59 |             local_out = conv_out[self.local_idx]
60 | 
61 |             if self.multi_idx is not None:
62 |                 multi_out = conv_out[self.multi_idx]
63 |             else:
64 |                 multi_out = None
65 | 
66 |             if len(fc_out) > 0:
67 |                 if self.fc_idx is not None:
68 |                     hidden_out = fc_out[self.fc_idx]
69 |                 else:
70 |                     hidden_out = None
71 |                 global_out = fc_out[-1]
72 |             else:
73 |                 hidden_out = None
74 |                 global_out = None
75 | 
76 |             conv_out = conv_out[self.conv_idx]
77 | 
78 |             return local_out, conv_out, multi_out, hidden_out, global_out
79 | 
80 |     return Encoder
81 | 
82 | 
83 | class ConvnetEncoder(create_encoder(Convnet)):
84 |     pass
85 | 
86 | 
87 | class FoldedConvnetEncoder(create_encoder(FoldedConvnet)):
88 |     pass
89 | 
90 | 
91 | class ResnetEncoder(create_encoder(ResNet)):
92 |     pass
93 | 
94 | 
95 | class FoldedResnetEncoder(create_encoder(FoldedResNet)):
96 |     pass
97 | 


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/mi_networks.py:
--------------------------------------------------------------------------------
  1 | """Module for networks used for computing MI.
  2 | 
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | from cortex_DIM.nn_modules.misc import Permute
 10 | 
 11 | 
 12 | class MIFCNet(nn.Module):
 13 |     """Simple custom network for computing MI.
 14 | 
 15 |     """
 16 |     def __init__(self, n_input, n_units):
 17 |         """
 18 | 
 19 |         Args:
 20 |             n_input: Number of input units.
 21 |             n_units: Number of output units.
 22 |         """
 23 |         super().__init__()
 24 | 
 25 |         assert(n_units >= n_input)
 26 | 
 27 |         self.linear_shortcut = nn.Linear(n_input, n_units)
 28 |         self.block_nonlinear = nn.Sequential(
 29 |             nn.Linear(n_input, n_units),
 30 |             nn.BatchNorm1d(n_units),
 31 |             nn.ReLU(),
 32 |             nn.Linear(n_units, n_units)
 33 |         )
 34 | 
 35 |         # initialize the initial projection to a sort of noisy copy
 36 |         eye_mask = np.zeros((n_units, n_input), dtype=np.uint8)
 37 |         for i in range(n_input):
 38 |             eye_mask[i, i] = 1
 39 | 
 40 |         self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
 41 |         self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
 42 | 
 43 |     def forward(self, x):
 44 |         """
 45 | 
 46 |         Args:
 47 |             x: Input tensor.
 48 | 
 49 |         Returns:
 50 |             torch.Tensor: network output.
 51 | 
 52 |         """
 53 |         h = self.block_nonlinear(x) + self.linear_shortcut(x)
 54 |         return h
 55 | 
 56 | 
 57 | class MI1x1ConvNet(nn.Module):
 58 |     """Simple custorm 1x1 convnet.
 59 | 
 60 |     """
 61 |     def __init__(self, n_input, n_units):
 62 |         """
 63 | 
 64 |         Args:
 65 |             n_input: Number of input units.
 66 |             n_units: Number of output units.
 67 |         """
 68 | 
 69 |         super().__init__()
 70 | 
 71 |         self.block_nonlinear = nn.Sequential(
 72 |             nn.Conv1d(n_input, n_units, kernel_size=1, stride=1, padding=0, bias=False),
 73 |             nn.BatchNorm1d(n_units),
 74 |             nn.ReLU(),
 75 |             nn.Conv1d(n_units, n_units, kernel_size=1, stride=1, padding=0, bias=True),
 76 |         )
 77 | 
 78 |         self.block_ln = nn.Sequential(
 79 |             Permute(0, 2, 1),
 80 |             nn.LayerNorm(n_units),
 81 |             Permute(0, 2, 1)
 82 |         )
 83 | 
 84 |         self.linear_shortcut = nn.Conv1d(n_input, n_units, kernel_size=1,
 85 |                                          stride=1, padding=0, bias=False)
 86 | 
 87 |         # initialize shortcut to be like identity (if possible)
 88 |         if n_units >= n_input:
 89 |             eye_mask = np.zeros((n_units, n_input, 1), dtype=np.uint8)
 90 |             for i in range(n_input):
 91 |                 eye_mask[i, i, 0] = 1
 92 |             self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
 93 |             self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
 94 | 
 95 |     def forward(self, x):
 96 |         """
 97 | 
 98 |             Args:
 99 |                 x: Input tensor.
100 | 
101 |             Returns:
102 |                 torch.Tensor: network output.
103 | 
104 |         """
105 |         h = self.block_ln(self.block_nonlinear(x) + self.linear_shortcut(x))
106 |         return h
107 | 


--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/misc.py:
--------------------------------------------------------------------------------
  1 | '''Various miscellaneous modules
  2 | 
  3 | '''
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | class View(torch.nn.Module):
  9 |     """Basic reshape module.
 10 | 
 11 |     """
 12 |     def __init__(self, *shape):
 13 |         """
 14 | 
 15 |         Args:
 16 |             *shape: Input shape.
 17 |         """
 18 |         super().__init__()
 19 |         self.shape = shape
 20 | 
 21 |     def forward(self, input):
 22 |         """Reshapes tensor.
 23 | 
 24 |         Args:
 25 |             input: Input tensor.
 26 | 
 27 |         Returns:
 28 |             torch.Tensor: Flattened tensor.
 29 | 
 30 |         """
 31 |         return input.view(*self.shape)
 32 | 
 33 | 
 34 | class Unfold(torch.nn.Module):
 35 |     """Module for unfolding tensor.
 36 | 
 37 |     Performs strided crops on 2d (image) tensors. Stride is assumed to be half the crop size.
 38 | 
 39 |     """
 40 |     def __init__(self, img_size, fold_size):
 41 |         """
 42 | 
 43 |         Args:
 44 |             img_size: Input size.
 45 |             fold_size: Crop size.
 46 |         """
 47 |         super().__init__()
 48 | 
 49 |         fold_stride = fold_size // 2
 50 |         self.fold_size = fold_size
 51 |         self.fold_stride = fold_stride
 52 |         self.n_locs = 2 * (img_size // fold_size) - 1
 53 |         self.unfold = torch.nn.Unfold((self.fold_size, self.fold_size),
 54 |                                       stride=(self.fold_stride, self.fold_stride))
 55 | 
 56 |     def forward(self, x):
 57 |         """Unfolds tensor.
 58 | 
 59 |         Args:
 60 |             x: Input tensor.
 61 | 
 62 |         Returns:
 63 |             torch.Tensor: Unfolded tensor.
 64 | 
 65 |         """
 66 |         N = x.size(0)
 67 |         x = self.unfold(x).reshape(N, -1, self.fold_size, self.fold_size, self.n_locs * self.n_locs)\
 68 |             .permute(0, 4, 1, 2, 3)\
 69 |             .reshape(N * self.n_locs * self.n_locs, -1, self.fold_size, self.fold_size)
 70 |         return x
 71 | 
 72 | 
 73 | class Fold(torch.nn.Module):
 74 |     """Module (re)folding tensor.
 75 | 
 76 |     Undoes the strided crops above. Works only on 1x1.
 77 | 
 78 |     """
 79 |     def __init__(self, img_size, fold_size):
 80 |         """
 81 | 
 82 |         Args:
 83 |             img_size: Images size.
 84 |             fold_size: Crop size.
 85 |         """
 86 |         super().__init__()
 87 |         self.n_locs = 2 * (img_size // fold_size) - 1
 88 | 
 89 |     def forward(self, x):
 90 |         """(Re)folds tensor.
 91 | 
 92 |         Args:
 93 |             x: Input tensor.
 94 | 
 95 |         Returns:
 96 |             torch.Tensor: Refolded tensor.
 97 | 
 98 |         """
 99 |         dim_c, dim_x, dim_y = x.size()[1:]
100 |         x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)
101 |         x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)\
102 |             .permute(0, 2, 3, 1)\
103 |             .reshape(-1, dim_c * dim_x * dim_y, self.n_locs, self.n_locs).contiguous()
104 |         return x
105 | 
106 | 
107 | class Permute(torch.nn.Module):
108 |     """Module for permuting axes.
109 | 
110 |     """
111 |     def __init__(self, *perm):
112 |         """
113 | 
114 |         Args:
115 |             *perm: Permute axes.
116 |         """
117 |         super().__init__()
118 |         self.perm = perm
119 | 
120 |     def forward(self, input):
121 |         """Permutes axes of tensor.
122 | 
123 |         Args:
124 |             input: Input tensor.
125 | 
126 |         Returns:
127 |             torch.Tensor: permuted tensor.
128 | 
129 |         """
130 |         return input.permute(*self.perm)
131 | 


--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/data.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/data.pt


--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt


--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt


--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/raw/README.txt:
--------------------------------------------------------------------------------
 1 | README for dataset NCI1
 2 | 
 3 | 
 4 | === Usage ===
 5 | 
 6 | This folder contains the following comma separated text files 
 7 | (replace DS by the name of the dataset):
 8 | 
 9 | n = total number of nodes
10 | m = total number of edges
11 | N = number of graphs
12 | 
13 | (1) 	DS_A.txt (m lines) 
14 | 	sparse (block diagonal) adjacency matrix for all graphs,
15 | 	each line corresponds to (row, col) resp. (node_id, node_id)
16 | 
17 | (2) 	DS_graph_indicator.txt (n lines)
18 | 	column vector of graph identifiers for all nodes of all graphs,
19 | 	the value in the i-th line is the graph_id of the node with node_id i
20 | 
21 | (3) 	DS_graph_labels.txt (N lines) 
22 | 	class labels for all graphs in the dataset,
23 | 	the value in the i-th line is the class label of the graph with graph_id i
24 | 
25 | (4) 	DS_node_labels.txt (n lines)
26 | 	column vector of node labels,
27 | 	the value in the i-th line corresponds to the node with node_id i
28 | 
29 | There are OPTIONAL files if the respective information is available:
30 | 
31 | (5) 	DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
32 | 	labels for the edges in DS_A_sparse.txt 
33 | 
34 | (6) 	DS_edge_attributes.txt (m lines; same size as DS_A.txt)
35 | 	attributes for the edges in DS_A.txt 
36 | 
37 | (7) 	DS_node_attributes.txt (n lines) 
38 | 	matrix of node attributes,
39 | 	the comma seperated values in the i-th line is the attribute vector of the node with node_id i
40 | 
41 | (8) 	DS_graph_attributes.txt (N lines) 
42 | 	regression values for all graphs in the dataset,
43 | 	the value in the i-th line is the attribute of the graph with graph_id i
44 | 
45 | 
46 | === Description ===
47 | 
48 | NCI1 and NCI109 represent two balanced subsets of datasets of chemical compounds screened 
49 | for activity against non-small cell lung cancer and ovarian cancer cell lines respectively
50 | (Wale and Karypis (2006) and http://pubchem.ncbi.nlm.nih.gov). 
51 | 
52 | 
53 | === Previous Use of the Dataset ===
54 | 
55 | Neumann, M., Garnett R., Bauckhage Ch., Kersting K.: Propagation Kernels: Efficient Graph 
56 | Kernels from Propagated Information. Under review at MLJ.
57 | 
58 | Neumann, M., Patricia, N., Garnett, R., Kersting, K.: Efficient Graph Kernels by 
59 | Randomization. In: P.A. Flach, T.D. Bie, N. Cristianini (eds.) ECML/PKDD, Notes in 
60 | Computer Science, vol. 7523, pp. 378-393. Springer (2012).
61 | 
62 | Shervashidze, N., Schweitzer, P., van Leeuwen, E., Mehlhorn, K., Borgwardt, K.:
63 | Weisfeiler-Lehman Graph Kernels. Journal of Machine Learning Research 12, 2539-2561 (2011)
64 | 
65 | 
66 | === References ===
67 | 
68 | N. Wale and G. Karypis. Comparison of descriptor spaces for chemical compound retrieval and 
69 | classification. In Proc. of ICDM, pages 678–689, Hong Kong, 2006.
70 | 
71 | 


--------------------------------------------------------------------------------
/unsupervised_TU/deepinfomax.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import numpy as np
  7 | import json
  8 | # from core.encoders import *
  9 | 
 10 | from torch_geometric.datasets import TUDataset
 11 | from torch_geometric.data import DataLoader
 12 | import sys
 13 | import json
 14 | from torch import optim
 15 | 
 16 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet
 17 | from losses import *
 18 | from gin import Encoder
 19 | from evaluate_embedding import evaluate_embedding
 20 | from model import *
 21 | 
 22 | from arguments import arg_parse
 23 | 
 24 | class GcnInfomax(nn.Module):
 25 |   def __init__(self, hidden_dim, num_gc_layers, alpha=0.5, beta=1., gamma=.1):
 26 |     super(GcnInfomax, self).__init__()
 27 | 
 28 |     self.alpha = alpha
 29 |     self.beta = beta
 30 |     self.gamma = gamma
 31 |     self.prior = args.prior
 32 | 
 33 |     self.embedding_dim = mi_units = hidden_dim * num_gc_layers
 34 |     self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)
 35 | 
 36 |     self.local_d = FF(self.embedding_dim)
 37 |     self.global_d = FF(self.embedding_dim)
 38 |     # self.local_d = MI1x1ConvNet(self.embedding_dim, mi_units)
 39 |     # self.global_d = MIFCNet(self.embedding_dim, mi_units)
 40 | 
 41 |     if self.prior:
 42 |         self.prior_d = PriorDiscriminator(self.embedding_dim)
 43 | 
 44 |     self.init_emb()
 45 | 
 46 |   def init_emb(self):
 47 |     initrange = -1.5 / self.embedding_dim
 48 |     for m in self.modules():
 49 |         if isinstance(m, nn.Linear):
 50 |             torch.nn.init.xavier_uniform_(m.weight.data)
 51 |             if m.bias is not None:
 52 |                 m.bias.data.fill_(0.0)
 53 | 
 54 | 
 55 |   def forward(self, x, edge_index, batch, num_graphs):
 56 | 
 57 |     # batch_size = data.num_graphs
 58 |     if x is None:
 59 |         x = torch.ones(batch.shape[0]).to(device)
 60 | 
 61 |     y, M = self.encoder(x, edge_index, batch)
 62 |     
 63 |     g_enc = self.global_d(y)
 64 |     l_enc = self.local_d(M)
 65 | 
 66 |     mode='fd'
 67 |     measure='JSD'
 68 |     local_global_loss = local_global_loss_(l_enc, g_enc, edge_index, batch, measure)
 69 |  
 70 |     if self.prior:
 71 |         prior = torch.rand_like(y)
 72 |         term_a = torch.log(self.prior_d(prior)).mean()
 73 |         term_b = torch.log(1.0 - self.prior_d(y)).mean()
 74 |         PRIOR = - (term_a + term_b) * self.gamma
 75 |     else:
 76 |         PRIOR = 0
 77 |     
 78 |     return local_global_loss + PRIOR
 79 | 
 80 | if __name__ == '__main__':
 81 |     
 82 |     args = arg_parse()
 83 |     # accuracies = {'logreg':[], 'svc':[], 'linearsvc':[], 'randomforest':[]}
 84 |     accuracies = accuracies = {'val':[], 'test':[]}
 85 |     epochs = 20
 86 |     log_interval = 1
 87 |     batch_size = 128
 88 |     lr = args.lr
 89 |     DS = args.DS
 90 |     path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', DS)
 91 |     # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
 92 | 
 93 |     dataset = TUDataset(path, name=DS).shuffle()
 94 |     try:
 95 |         dataset_num_features = dataset.num_features
 96 |     except:
 97 |         dataset_num_features = 1
 98 | 
 99 |     dataloader = DataLoader(dataset, batch_size=batch_size)
100 | 
101 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
102 |     model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device)
103 |     optimizer = torch.optim.Adam(model.parameters(), lr=lr)
104 | 
105 |     print('================')
106 |     print('lr: {}'.format(lr))
107 |     print('num_features: {}'.format(dataset_num_features))
108 |     print('hidden_dim: {}'.format(args.hidden_dim))
109 |     print('num_gc_layers: {}'.format(args.num_gc_layers))
110 |     print('================')
111 | 
112 |     
113 |     model.eval()
114 |     emb, y = model.encoder.get_embeddings(dataloader)
115 |     res = evaluate_embedding(emb, y)
116 |     # accuracies['logreg'].append(res[0])
117 |     # accuracies['svc'].append(res[1])
118 |     # accuracies['linearsvc'].append(res[2])
119 |     # accuracies['randomforest'].append(res[3])
120 |     accuracies['val'].append(res[0])
121 |     accuracies['test'].append(res[1])
122 | 
123 | 
124 |     for epoch in range(1, epochs+1):
125 |         loss_all = 0
126 |         model.train()
127 |         for data in dataloader:
128 |             data = data.to(device)
129 |             optimizer.zero_grad()
130 |             loss = model(data.x, data.edge_index, data.batch, data.num_graphs)
131 |             loss_all += loss.item() * data.num_graphs
132 |             loss.backward()
133 |             optimizer.step()
134 |         print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader)))
135 | 
136 |         if epoch % log_interval == 0:
137 |             model.eval()
138 |             emb, y = model.encoder.get_embeddings(dataloader)
139 |             res = evaluate_embedding(emb, y)
140 |             # accuracies['logreg'].append(res[0])
141 |             # accuracies['svc'].append(res[1])
142 |             # accuracies['linearsvc'].append(res[2])
143 |             # accuracies['randomforest'].append(res[3])
144 |             accuracies['val'].append(res[0])
145 |             accuracies['test'].append(res[1])
146 | 
147 |             print(accuracies)
148 | 
149 |     tpe  = ('local' if args.local else '') + ('prior' if args.prior else '')
150 |     with open('new_log', 'a+') as f:
151 |         s = json.dumps(accuracies)
152 |         f.write('{},{},{},{},{},{},{}\n'.format(args.DS, tpe, args.num_gc_layers, epochs, log_interval, lr, s))
153 | 


--------------------------------------------------------------------------------
/unsupervised_TU/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | 
3 | for seed in 0 1 2 3 4 
4 | do
5 |   CUDA_VISIBLE_DEVICES=$1 python simgrace.py --DS $2 --lr 0.01 --local --num-gc-layers 5 --eta$3 --seed $seed
6 | done
7 | 
8 | 


--------------------------------------------------------------------------------
/unsupervised_TU/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from cortex_DIM.functions.gan_losses import get_positive_expectation, get_negative_expectation
 5 | 
 6 | def local_global_loss_(l_enc, g_enc, edge_index, batch, measure):
 7 |     '''
 8 |     Args:
 9 |         l: Local feature map.
10 |         g: Global features.
11 |         measure: Type of f-divergence. For use with mode `fd`
12 |         mode: Loss mode. Fenchel-dual `fd`, NCE `nce`, or Donsker-Vadadhan `dv`.
13 |     Returns:
14 |         torch.Tensor: Loss.
15 |     '''
16 |     num_graphs = g_enc.shape[0]
17 |     num_nodes = l_enc.shape[0]
18 | 
19 |     pos_mask = torch.zeros((num_nodes, num_graphs)).cuda()
20 |     neg_mask = torch.ones((num_nodes, num_graphs)).cuda()
21 |     for nodeidx, graphidx in enumerate(batch):
22 |         pos_mask[nodeidx][graphidx] = 1.
23 |         neg_mask[nodeidx][graphidx] = 0.
24 | 
25 |     res = torch.mm(l_enc, g_enc.t())
26 | 
27 |     E_pos = get_positive_expectation(res * pos_mask, measure, average=False).sum()
28 |     E_pos = E_pos / num_nodes
29 |     E_neg = get_negative_expectation(res * neg_mask, measure, average=False).sum()
30 |     E_neg = E_neg / (num_nodes * (num_graphs - 1))
31 | 
32 |     return E_neg - E_pos
33 | 
34 | def adj_loss_(l_enc, g_enc, edge_index, batch):
35 |     num_graphs = g_enc.shape[0]
36 |     num_nodes = l_enc.shape[0]
37 | 
38 |     adj = torch.zeros((num_nodes, num_nodes)).cuda()
39 |     mask = torch.eye(num_nodes).cuda()
40 |     for node1, node2 in zip(edge_index[0], edge_index[1]):
41 |         adj[node1.item()][node2.item()] = 1.
42 |         adj[node2.item()][node1.item()] = 1.
43 | 
44 |     res = torch.sigmoid((torch.mm(l_enc, l_enc.t())))
45 |     res = (1-mask) * res
46 |     # print(res.shape, adj.shape)
47 |     # input()
48 | 
49 |     loss = nn.BCELoss()(res, adj)
50 |     return loss
51 | 


--------------------------------------------------------------------------------
/unsupervised_TU/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | import numpy as np
 6 | # from core.encoders import *
 7 | import json
 8 | from torch import optim
 9 | 
10 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet
11 | from losses import *
12 | 
13 | 
14 | class GlobalDiscriminator(nn.Module):
15 |     def __init__(self, args, input_dim):
16 |         super().__init__()
17 |         
18 |         self.l0 = nn.Linear(32, 32)
19 |         self.l1 = nn.Linear(32, 32)
20 | 
21 |         self.l2 = nn.Linear(512, 1)
22 |     def forward(self, y, M, data):
23 | 
24 |         adj = Variable(data['adj'].float(), requires_grad=False).cuda()
25 |         # h0 = Variable(data['feats'].float()).cuda()
26 |         batch_num_nodes = data['num_nodes'].int().numpy()
27 |         M, _ = self.encoder(M, adj, batch_num_nodes)
28 |         # h = F.relu(self.c0(M))
29 |         # h = self.c1(h)
30 |         # h = h.view(y.shape[0], -1)
31 |         h = torch.cat((y, M), dim=1)
32 |         h = F.relu(self.l0(h))
33 |         h = F.relu(self.l1(h))
34 |         return self.l2(h)
35 | 
36 | class PriorDiscriminator(nn.Module):
37 |     def __init__(self, input_dim):
38 |         super().__init__()
39 |         self.l0 = nn.Linear(input_dim, input_dim)
40 |         self.l1 = nn.Linear(input_dim, input_dim)
41 |         self.l2 = nn.Linear(input_dim, 1)
42 | 
43 |     def forward(self, x):
44 |         h = F.relu(self.l0(x))
45 |         h = F.relu(self.l1(h))
46 |         return torch.sigmoid(self.l2(h))
47 | 
48 | class FF(nn.Module):
49 |     def __init__(self, input_dim):
50 |         super().__init__()
51 |         # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1)
52 |         # self.c1 = nn.Conv1d(512, 512, kernel_size=1)
53 |         # self.c2 = nn.Conv1d(512, 1, kernel_size=1)
54 |         self.block = nn.Sequential(
55 |             nn.Linear(input_dim, input_dim),
56 |             nn.ReLU(),
57 |             nn.Linear(input_dim, input_dim),
58 |             nn.ReLU(),
59 |             nn.Linear(input_dim, input_dim),
60 |             nn.ReLU()
61 |         )
62 |         self.linear_shortcut = nn.Linear(input_dim, input_dim)
63 |         # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1, stride=1, padding=0)
64 |         # self.c1 = nn.Conv1d(512, 512, kernel_size=1, stride=1, padding=0)
65 |         # self.c2 = nn.Conv1d(512, 1, kernel_size=1, stride=1, padding=0)
66 | 
67 |     def forward(self, x):
68 |         return self.block(x) + self.linear_shortcut(x)
69 | 
70 | 


--------------------------------------------------------------------------------
/unsupervised_TU/readme.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | * [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric#installation)==1.7.0
 3 | 
 4 | ## Training & Evaluation
 5 | 
 6 | ```
 7 | ./go.sh $GPU_ID $DATASET_NAME $ETA
 8 | ```
 9 | 
10 | ```$DATASET_NAME``` is the dataset name (please refer to https://chrsmrrs.github.io/datasets/docs/datasets/), ```$GPU_ID``` is the lanched GPU ID and ```$ETA``` could be tuned among {0.1, 1.0, 10.0, 100.0}.
11 | 
12 | ## Acknowledgements
13 | - https://github.com/Shen-Lab/GraphCL/tree/master/unsupervised_TU
14 | 
15 | - https://github.com/fanyun-sun/InfoGraph/tree/master/unsupervised.
16 | 


--------------------------------------------------------------------------------
/unsupervised_TU/test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import json
 4 | import pandas as pd
 5 | import collections
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     for epoch in [20, 100]:
10 |         print(epoch)
11 |         real_res = {'logreg':[-1], 'svc':[-1], 'linearsvc':[-1], 'randomforest':[-1]}
12 |         for gc in [3, 5, 8, 16]:
13 |             for lr in [0.01, 0.1, 0.001]:
14 |                 for tpe in ['local', 'localprior']:
15 |                     res = collections.defaultdict(lambda :collections.defaultdict(list))
16 |                     with open(sys.argv[1], 'r') as f:
17 |                         for line in f:
18 |                             x = line.strip().split(',', 6)
19 |                             if x[1] != tpe:
20 |                                 continue
21 |                             if x[2] != str(gc):
22 |                                 continue
23 |                             if x[3] != str(epoch):
24 |                                 continue
25 |                             if x[5] != str(lr):
26 |                                 continue
27 |                             tmp = json.loads(x[-1])
28 | 
29 |                             DS = x[0]
30 |                             res[DS]['logreg'].append(tmp['logreg'])
31 |                             res[DS]['svc'].append(tmp['svc'])
32 |                             res[DS]['linearsvc'].append(tmp['linearsvc'])
33 |                             res[DS]['randomforest'].append(tmp['randomforest'])
34 | 
35 |                     for DS, lst in res.items():
36 |                         if DS != sys.argv[2]:
37 |                             continue
38 |                         # print('====================')
39 |                         # print(DS)
40 |                         for clf, v in lst.items():
41 |                             mn = np.mean(np.array(v[:5]), axis=0)
42 |                             std = np.std(np.array(v[:5]), axis=0)
43 | 
44 |                             idx = np.argmax(mn)
45 |                             if mn[idx] > real_res[clf][0] and len(v) > 1:
46 |                                 real_res[clf] = [mn[idx], std[idx], epoch, lr, gc, idx, len(v)]
47 |                                 # print(epoch, lr, gc, clf, idx, mn[idx], std[idx], len(v))
48 |         print(real_res)
49 | 
50 | 


--------------------------------------------------------------------------------