├── README.md
├── adversarial_robustness
├── README.md
└── code
│ ├── common
│ ├── Makefile
│ ├── _ext
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ │ └── __init__.cpython-37.pyc
│ │ ├── custom_kernel.d
│ │ ├── custom_kernel.o
│ │ └── my_lib
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyc
│ │ │ ├── __pycache__
│ │ │ └── __init__.cpython-37.pyc
│ │ │ ├── _my_lib.so
│ │ │ └── ffiex.py
│ ├── build.py
│ ├── cmd_args.py
│ ├── cmd_args.pyc
│ ├── dnn.py
│ ├── dnn.pyc
│ ├── functions
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ └── custom_func.cpython-37.pyc
│ │ ├── custom_func.py
│ │ └── custom_func.pyc
│ ├── graph_embedding.py
│ ├── graph_embedding.pyc
│ ├── modules
│ │ ├── __init__.pyc
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-37.pyc
│ │ │ └── custom_mod.cpython-37.pyc
│ │ ├── custom_mod.py
│ │ └── custom_mod.pyc
│ ├── src
│ │ ├── custom_kernel.cu
│ │ ├── custom_kernel.h
│ │ ├── my_lib.c
│ │ ├── my_lib.h
│ │ ├── my_lib_cuda.c
│ │ └── my_lib_cuda.h
│ └── test.py
│ ├── data_generator
│ ├── data_util.py
│ ├── data_util.pyc
│ ├── gen_er_components.py
│ └── pkl_dump.sh
│ ├── graph_attack
│ ├── collect_rl_results.py
│ ├── dqn.py
│ ├── er_trivial_attack.py
│ ├── genetic_algorithm.py
│ ├── grad_attack.py
│ ├── nstep_replay_mem.py
│ ├── nstep_replay_mem.pyc
│ ├── plot_dqn.py
│ ├── plot_dqn.sh
│ ├── q_net.py
│ ├── q_net.pyc
│ ├── rl_common.py
│ ├── run_dqn.sh
│ ├── run_ga.sh
│ ├── run_grad.sh
│ └── run_trivial.sh
│ └── graph_classification
│ ├── er_components.py
│ ├── graph_common.py
│ ├── run_er_components.sh
│ └── test_er_comp.sh
├── semisupervised_TU
├── README.md
├── environment.yml
├── finetuning
│ ├── datasets.py
│ ├── feature_expansion.py
│ ├── gcn_conv.py
│ ├── image_dataset.py
│ ├── main.py
│ ├── main_cl.py
│ ├── net_cl.py
│ ├── net_gae.py
│ ├── net_infomax.py
│ ├── train_eval.py
│ ├── tu_dataset.py
│ └── utils.py
└── pre-training
│ ├── datasets.py
│ ├── feature_expansion.py
│ ├── gcn_conv.py
│ ├── main.py
│ ├── res_gcn.py
│ ├── train_eval.py
│ ├── tu_dataset.py
│ └── utils.py
├── simgrace.png
├── transfer_learning
├── README.md
├── bio
│ ├── batch.py
│ ├── dataloader.py
│ ├── finetune.py
│ ├── finetune.sh
│ ├── finetune_tune.sh
│ ├── loader.py
│ ├── model.py
│ ├── models_simgrace
│ │ ├── simgrace_100.pth
│ │ ├── simgrace_20.pth
│ │ ├── simgrace_40.pth
│ │ ├── simgrace_60.pth
│ │ └── simgrace_80.pth
│ ├── pretrain_contextpred.py
│ ├── pretrain_deepgraphinfomax.py
│ ├── pretrain_edgepred.py
│ ├── pretrain_masking.py
│ ├── pretrain_simgrace.py
│ ├── pretrain_supervised.py
│ ├── result_analysis.py
│ ├── splitters.py
│ └── util.py
└── chem
│ ├── batch.py
│ ├── dataloader.py
│ ├── finetune.py
│ ├── finetune.sh
│ ├── finetune_mutag_ptc.py
│ ├── finetune_tune.sh
│ ├── loader.py
│ ├── model.py
│ ├── models_simgrace
│ ├── simgrace_100.pth
│ ├── simgrace_20.pth
│ ├── simgrace_40.pth
│ ├── simgrace_60.pth
│ └── simgrace_80.pth
│ ├── parse_result.py
│ ├── pretrain_contextpred.py
│ ├── pretrain_deepgraphinfomax.py
│ ├── pretrain_edgepred.py
│ ├── pretrain_masking.py
│ ├── pretrain_simgrace.py
│ ├── pretrain_supervised.py
│ ├── run.sh
│ ├── splitters.py
│ └── util.py
└── unsupervised_TU
├── Accuracy.txt
├── __pycache__
├── arguments.cpython-37.pyc
├── aug.cpython-37.pyc
├── evaluate_embedding.cpython-37.pyc
├── gin.cpython-37.pyc
├── losses.cpython-37.pyc
└── model.cpython-37.pyc
├── arguments.py
├── aug.py
├── cortex_DIM
├── configs
│ ├── convnets.py
│ └── resnets.py
├── functions
│ ├── __pycache__
│ │ ├── gan_losses.cpython-37.pyc
│ │ └── misc.cpython-37.pyc
│ ├── dim_losses.py
│ ├── gan_losses.py
│ └── misc.py
└── nn_modules
│ ├── __pycache__
│ ├── mi_networks.cpython-37.pyc
│ └── misc.cpython-37.pyc
│ ├── convnet.py
│ ├── encoder.py
│ ├── mi_networks.py
│ ├── misc.py
│ └── resnet.py
├── data
└── NCI1
│ └── NCI1
│ ├── processed
│ ├── data.pt
│ ├── pre_filter.pt
│ └── pre_transform.pt
│ └── raw
│ ├── NCI1_A.txt
│ ├── NCI1_graph_indicator.txt
│ ├── NCI1_graph_labels.txt
│ ├── NCI1_node_labels.txt
│ └── README.txt
├── deepinfomax.py
├── evaluate_embedding.py
├── gin.py
├── go.sh
├── losses.py
├── model.py
├── readme.md
├── simgrace.py
└── test.py
/README.md:
--------------------------------------------------------------------------------
1 | # SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation (WWW 2022)
2 | PyTorch implementation for [SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation](https://arxiv.org/pdf/2202.03104.pdf) accepted by The Web Conference 2022 (WWW 2022).
3 | ## Overview
4 | In this repository, we provide the codes of SimGRACE to evaluate its performances in terms of generalizability (unsupervised & semi-supervised learning), transferability (transfer learning) and robustness (adversarial robustness).
5 | 
6 | ## Dataset download
7 | * Semi-supervised learning & Unsupervised representation learning [TU Datasets](https://chrsmrrs.github.io/datasets/docs/datasets/) (social and biochemical graphs)
8 | * Transfer learning [chem data](http://snap.stanford.edu/gnn-pretrain/data/chem_dataset.zip) (2.5GB);[bio data](http://snap.stanford.edu/gnn-pretrain/data/bio_dataset.zip) (2GB)
9 | * Adversarial robustness [synthetic data](https://www.dropbox.com/sh/mu8odkd36x54rl3/AABg8ABiMqwcMEM5qKIY97nla?dl=0)
10 |
11 | ## Citation
12 | ```
13 | @inproceedings{10.1145/3485447.3512156,
14 | author = {Xia, Jun and Wu, Lirong and Chen, Jintao and Hu, Bozhen and Li, Stan Z.},
15 | title = {SimGRACE: A Simple Framework for Graph Contrastive Learning without Data Augmentation},
16 | year = {2022},
17 | isbn = {9781450390965},
18 | publisher = {Association for Computing Machinery},
19 | address = {New York, NY, USA},
20 | url = {https://doi.org/10.1145/3485447.3512156},
21 | doi = {10.1145/3485447.3512156},
22 | booktitle = {Proceedings of the ACM Web Conference 2022},
23 | pages = {1070–1079},
24 | numpages = {10},
25 | keywords = {graph representation learning, contrastive learning, Graph neural networks, robustness, graph self-supervised learning},
26 | location = {Virtual Event, Lyon, France},
27 | series = {WWW '22}
28 | }
29 | ```
30 | ## Useful resources for Pretrained Graphs Models (PGMs)
31 | * The first comprehensive survey for PGMs: [A Survey of Pretraining on Graphs: Taxonomy, Methods, and Applications](https://arxiv.org/abs/2202.07893v1)
32 | * [A curated list of must-read papers, open-source pretrained models and pretraining datasets.](https://github.com/junxia97/awesome-pretrain-on-graphs)
33 |
34 | ## Reference
35 | 1. [Graph Contrastive Learning Automated (ICML 2021)](https://github.com/Shen-Lab/GraphCL_Automated)
36 | 2. [Graph Contrastive Learning with Augmentations (NeurIPS 2020)](https://github.com/Shen-Lab/GraphCL)
37 | 3. [Strategies for Pre-training Graph Neural Networks (ICLR 2020)](https://github.com/snap-stanford/pretrain-gnns/)
38 | 4. [Adversarial Attack on Graph Structured Data (ICML 2018)](https://github.com/Hanjun-Dai/graph_adversarial_attack)
39 |
--------------------------------------------------------------------------------
/adversarial_robustness/README.md:
--------------------------------------------------------------------------------
1 | ## Dependencies & Dataset
2 |
3 | Please refer to https://github.com/Hanjun-Dai/graph_adversarial_attack for environment setup and to download dataset.
4 | After the configuration, you should have three directories: ```./code/```, ```./dropbox/``` and ```./pytorch_structure2vec/```.
5 |
6 | ## Training & Evaluation
7 | ### Pre-training + finetuning: ###
8 | ```
9 | cd ./code/graph_classification
10 | ./run_er_components.sh 15 20 0.15 2 -phase train
11 | ./run_er_components.sh 15 20 0.15 3 -phase train
12 | ./run_er_components.sh 15 20 0.15 4 -phase train
13 |
14 | ./run_er_components.sh 40 50 0.05 2 -phase train
15 | ./run_er_components.sh 40 50 0.05 3 -phase train
16 | ./run_er_components.sh 40 50 0.05 4 -phase train
17 |
18 | ./run_er_components.sh 90 100 0.02 2 -phase train
19 | ./run_er_components.sh 90 100 0.02 3 -phase train
20 | ./run_er_components.sh 90 100 0.02 4 -phase train
21 | ```
22 |
23 | ### Adversarial attacks: ###
24 | ```
25 | cd ./code/graph_attack
26 | ./run_trivial.sh 15 20 0.15 2 -phase train
27 | ./run_trivial.sh 15 20 0.15 3 -phase train
28 | ./run_trivial.sh 15 20 0.15 4 -phase train
29 | ./run_grad.sh 15 20 0.15 2 -phase train
30 | ./run_grad.sh 15 20 0.15 3 -phase train
31 | ./run_grad.sh 15 20 0.15 4 -phase train
32 | ./run_dqn.sh 15 20 0.15 2 -phase train
33 | ./run_dqn.sh 15 20 0.15 3 -phase train
34 | ./run_dqn.sh 15 20 0.15 4 -phase train
35 |
36 | ./run_trivial.sh 40 50 0.05 2 -phase train
37 | ./run_trivial.sh 40 50 0.05 3 -phase train
38 | ./run_trivial.sh 40 50 0.05 4 -phase train
39 | ./run_grad.sh 40 50 0.05 2 -phase train
40 | ./run_grad.sh 40 50 0.05 3 -phase train
41 | ./run_grad.sh 40 50 0.05 4 -phase train
42 | ./run_dqn.sh 40 50 0.05 2 -phase train
43 | ./run_dqn.sh 40 50 0.05 3 -phase train
44 | ./run_dqn.sh 40 50 0.05 4 -phase train
45 |
46 | ./run_trivial.sh 90 100 0.02 2 -phase train
47 | ./run_trivial.sh 90 100 0.02 3 -phase train
48 | ./run_trivial.sh 90 100 0.02 4 -phase train
49 | ./run_grad.sh 90 100 0.02 2 -phase train
50 | ./run_grad.sh 90 100 0.02 3 -phase train
51 | ./run_grad.sh 90 100 0.02 4 -phase train
52 | ./run_dqn.sh 90 100 0.02 2 -phase train
53 | ./run_dqn.sh 90 100 0.02 3 -phase train
54 | ./run_dqn.sh 90 100 0.02 4 -phase train
55 | ```
56 |
57 | ## Acknowledgements
58 | * https://github.com/Shen-Lab/GraphCL/tree/master/adversarialRobustness_Component
59 | * https://github.com/Hanjun-Dai/graph_adversarial_attack.
60 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/Makefile:
--------------------------------------------------------------------------------
1 | dir_guard = @mkdir -p $(@D)
2 |
3 | #INTEL_ROOT := /opt/intel
4 | MKL_ROOT = $(INTEL_ROOT)/mkl
5 | TBB_ROOT = $(INTEL_ROOT)/tbb
6 |
7 | FIND := find
8 | CXX := g++
9 | CXXFLAGS += -Wall -O3 -std=c++11
10 | LDFLAGS += -lm -lmkl_rt -ltbb
11 |
12 | CUDA_HOME := /usr/local/cuda-9.0
13 | NVCC := $(CUDA_HOME)/bin/nvcc
14 | NVCCFLAGS += --default-stream per-thread
15 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand -lcusparse
16 |
17 | CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
18 | -gencode arch=compute_35,code=sm_35 \
19 | -gencode arch=compute_50,code=sm_50 \
20 | -gencode arch=compute_50,code=compute_50
21 |
22 | build_root = _ext
23 | obj_build_root = $(build_root)
24 |
25 | include_dirs = $(CUDA_HOME)/include $(MKL_ROOT)/include $(TBB_ROOT)/include include
26 | CXXFLAGS += $(addprefix -I,$(include_dirs))
27 | CXXFLAGS += -fPIC
28 |
29 | NVCCFLAGS += $(addprefix -I,$(include_dirs))
30 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC'
31 | cu_files = $(shell $(FIND) src/ -name "*.cu" -printf "%P\n")
32 | cu_obj_files = $(subst .cu,.o,$(cu_files))
33 | objs = $(addprefix $(obj_build_root)/,$(cu_obj_files))
34 |
35 | DEPS = ${objs:.o=.d}
36 | mylib = _ext/my_lib/_my_lib.so
37 |
38 | all: $(objs) $(mylib)
39 |
40 | $(obj_build_root)/%.o: src/%.cu
41 | $(dir_guard)
42 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
43 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
44 |
45 | $(mylib): src/*.c src/*.h src/*.cu
46 | python build.py
47 |
48 | clean:
49 | rm -f $(obj_build_root)/*.o
50 | rm -f $(obj_build_root)/*.d
51 | rm -rf _ext
52 | rm -f functions/*.pyc
53 | rm -f modules/*.pyc
54 | -include $(DEPS)
55 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__init__.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/custom_kernel.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/custom_kernel.o
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__init__.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/_ext/my_lib/_my_lib.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/_ext/my_lib/_my_lib.so
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 | this_file = os.path.dirname(__file__)
6 |
7 | sources = ['src/my_lib.c']
8 | headers = ['src/my_lib.h']
9 | defines = []
10 | with_cuda = False
11 |
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/my_lib_cuda.c']
15 | headers += ['src/my_lib_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | with_cuda = True
18 |
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | extra_objects = ['_ext/custom_kernel.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.my_lib',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects,
31 | extra_compile_args=['-fopenmp'],
32 | extra_link_args=['-lgomp']
33 | )
34 |
35 | if __name__ == '__main__':
36 | ffi.build()
37 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/cmd_args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | # import cPickle as cp
3 | import pickle as cp
4 |
5 | cmd_opt = argparse.ArgumentParser(description='Argparser for molecule vae')
6 | cmd_opt.add_argument('-data_folder', type=str, default=None, help='data folder')
7 | cmd_opt.add_argument('-saved_model', type=str, default=None, help='saved model')
8 | cmd_opt.add_argument('-save_dir', type=str, default=None, help='save folder')
9 | cmd_opt.add_argument('-ctx', type=str, default='cpu', help='cpu/gpu')
10 | cmd_opt.add_argument('-phase', type=str, default='test', help='train/test')
11 | cmd_opt.add_argument('-logfile', type=str, default=None, help='log')
12 |
13 | cmd_opt.add_argument('-batch_size', type=int, default=50, help='minibatch size')
14 | cmd_opt.add_argument('-seed', type=int, default=1, help='seed')
15 | cmd_opt.add_argument('-min_n', type=int, default=0, help='min #nodes')
16 | cmd_opt.add_argument('-max_n', type=int, default=0, help='max #nodes')
17 | cmd_opt.add_argument('-min_c', type=int, default=0, help='min #components')
18 | cmd_opt.add_argument('-max_c', type=int, default=0, help='max #components')
19 | cmd_opt.add_argument('-er_p', type=float, default=0, help='parameter of er graphs')
20 | cmd_opt.add_argument('-n_graphs', type=int, default=0, help='number of graphs')
21 | cmd_opt.add_argument('-gm', default='mean_field', help='mean_field/loopy_bp/gcn')
22 | cmd_opt.add_argument('-latent_dim', type=int, default=64, help='dimension of latent layers')
23 | cmd_opt.add_argument('-out_dim', type=int, default=0, help='s2v output size')
24 | cmd_opt.add_argument('-hidden', type=int, default=32, help='dimension of classification')
25 | cmd_opt.add_argument('-max_lv', type=int, default=2, help='max rounds of message passing')
26 |
27 | cmd_opt.add_argument('-num_epochs', type=int, default=1000, help='number of epochs')
28 | cmd_opt.add_argument('-learning_rate', type=float, default=0.001, help='init learning_rate')
29 | cmd_opt.add_argument('-weight_decay', type=float, default=5e-4, help='weight_decay')
30 | cmd_opt.add_argument('-dropout', type=float, default=0.5, help='dropout rate')
31 |
32 | # for node classification
33 | cmd_opt.add_argument('-dataset', type=str, default=None, help='citeseer/cora/pubmed')
34 | cmd_opt.add_argument('-feature_dim', type=int, default=None, help='node feature dim')
35 | cmd_opt.add_argument('-num_class', type=int, default=None, help='# classes')
36 | cmd_opt.add_argument('-adj_norm', type=int, default=1, help='normalize the adj or not')
37 |
38 | # for bio graph classification
39 | cmd_opt.add_argument('-feat_dim', type=int, default=0, help='dimension of node feature')
40 | cmd_opt.add_argument('-fold', type=int, default=1, help='fold (1..10)')
41 |
42 | # for AT-SimGRACE
43 | cmd_opt.add_argument('-lr_inner', type=float, default=0.001, help='lr of inner opt')
44 | cmd_opt.add_argument('-epison', type=float, default=0.01, help='radius of perturbation ball')
45 | cmd_opt.add_argument('--clip_norm', type=int, default=50, help='Maximum norm of parameter gradient.')
46 | # for attack
47 |
48 | cmd_opt.add_argument('-idx_start', type=int, default=None, help='id of graph or node index')
49 | cmd_opt.add_argument('-num_instances', type=int, default=None, help='num of samples for attack, in genetic algorithm')
50 | cmd_opt.add_argument('-num_steps', type=int, default=100000, help='rl training steps')
51 | cmd_opt.add_argument('-targeted', type=int, default=0, help='0/1 target attack or not')
52 | cmd_opt.add_argument('-frac_meta', type=float, default=0, help='fraction for meta rl learning')
53 | cmd_opt.add_argument('-meta_test', type=int, default=0, help='for meta rl learning')
54 | cmd_opt.add_argument('-rand_att_type', type=str, default=None, help='random/exhaust')
55 | cmd_opt.add_argument('-reward_type', type=str, default=None, help='binary/nll')
56 | cmd_opt.add_argument('-base_model_dump', type=str, default=None, help='saved base model')
57 | cmd_opt.add_argument('-num_mod', type=int, default=1, help='number of modifications allowed')
58 |
59 | # for genetic algorithm
60 | cmd_opt.add_argument('-population_size', type=int, default=100, help='population size')
61 | cmd_opt.add_argument('-cross_rate', type=float, default=0.1, help='cross_rate')
62 | cmd_opt.add_argument('-mutate_rate', type=float, default=0.2, help='mutate rate')
63 | cmd_opt.add_argument('-rounds', type=int, default=10, help='rounds of evolution')
64 |
65 | # for node attack
66 | cmd_opt.add_argument('-bilin_q', type=int, default=0, help='bilinear q or not')
67 | cmd_opt.add_argument('-mlp_hidden', type=int, default=64, help='mlp hidden layer size')
68 | cmd_opt.add_argument('-n_hops', type=int, default=2, help='attack range')
69 |
70 | # for defence
71 | cmd_opt.add_argument('-del_rate', type=float, default=0, help='rate of deleting edge')
72 |
73 | cmd_args, _ = cmd_opt.parse_known_args()
74 |
75 | print(cmd_args)
76 |
77 | def build_kwargs(keys, arg_dict):
78 | st = ''
79 | for key in keys:
80 | st += '%s-%s' % (key, str(arg_dict[key]))
81 | return st
82 |
83 | def save_args(fout, args):
84 | with open(fout, 'wb') as f:
85 | cp.dump(args, f, cp.HIGHEST_PROTOCOL)
86 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/cmd_args.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/cmd_args.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/dnn.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/dnn.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__init__.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/__pycache__/custom_func.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/custom_func.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from _ext import my_lib
4 | import sys
5 |
6 | class JaggedLogSoftmax(Function):
7 | def forward(self, logits, prefix_sum):
8 | self.save_for_backward(prefix_sum)
9 |
10 | assert len(prefix_sum.size()) == 1
11 | output = logits.new()
12 | if not logits.is_cuda:
13 | my_lib.jagged_log_softmax_forward(logits, prefix_sum, output)
14 | else:
15 | my_lib.jagged_log_softmax_forward_cuda(logits, prefix_sum, output)
16 |
17 | self.save_for_backward(prefix_sum, output)
18 | return output
19 |
20 | def backward(self, grad_output):
21 | prefix_sum, output = self.saved_variables
22 | grad_input = grad_output.new()
23 | if not grad_output.is_cuda:
24 | my_lib.jagged_log_softmax_backward(output.data, grad_output, prefix_sum.data, grad_input)
25 | else:
26 | my_lib.jagged_log_softmax_backward_cuda(output.data, grad_output, prefix_sum.data, grad_input)
27 | return grad_input, None
28 |
29 | class JaggedArgmax(Function):
30 | def forward(self, values, prefix_sum):
31 | assert len(prefix_sum.size()) == 1
32 | output = prefix_sum.new()
33 | if not values.is_cuda:
34 | my_lib.jagged_argmax_forward(values, prefix_sum, output)
35 | else:
36 | my_lib.jagged_argmax_forward_cuda(values, prefix_sum, output)
37 |
38 | return output
39 |
40 | def backward(self, grad_output):
41 | assert False
42 |
43 | class JaggedMax(Function):
44 | def forward(self, values, prefix_sum):
45 | assert len(prefix_sum.size()) == 1
46 | idxes = prefix_sum.new()
47 | vmax = values.new()
48 | if not values.is_cuda:
49 | my_lib.jagged_max_forward(values, prefix_sum, vmax, idxes)
50 | else:
51 | my_lib.jagged_max_forward_cuda(values, prefix_sum, vmax, idxes)
52 |
53 | return vmax, idxes
54 |
55 | def backward(self, grad_output):
56 | assert False
57 |
58 | def GraphLaplacianNorm(raw_adj):
59 | ones = torch.ones(raw_adj.size()[0], 1)
60 | if raw_adj.is_cuda:
61 | ones = ones.cuda()
62 | norm = torch.mm(raw_adj, ones) ** 0.5
63 | indices = raw_adj._indices()
64 | values = raw_adj._values()
65 | if not values.is_cuda:
66 | my_lib.graph_laplacian_norm(indices, values, norm)
67 | else:
68 | my_lib.graph_laplacian_norm_cuda(indices, values, norm)
69 |
70 | def GraphDegreeNorm(raw_adj):
71 | ones = torch.ones(raw_adj.size()[0], 1)
72 | if raw_adj.is_cuda:
73 | ones = ones.cuda()
74 | norm = torch.mm(raw_adj, ones)
75 | indices = raw_adj._indices()
76 | values = raw_adj._values()
77 | if not values.is_cuda:
78 | my_lib.graph_degree_norm(indices, values, norm)
79 | else:
80 | my_lib.graph_degree_norm_cuda(indices, values, norm)
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/functions/custom_func.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/functions/custom_func.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/graph_embedding.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/graph_embedding.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__init__.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/__pycache__/custom_mod.cpython-37.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/custom_mod.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from functions.custom_func import JaggedLogSoftmax, JaggedArgmax, JaggedMax
3 | import networkx as nx
4 | import numpy as np
5 |
6 | class JaggedLogSoftmaxModule(Module):
7 | def forward(self, logits, prefix_sum):
8 | return JaggedLogSoftmax()(logits, prefix_sum)
9 |
10 | class JaggedArgmaxModule(Module):
11 | def forward(self, values, prefix_sum):
12 | return JaggedArgmax()(values, prefix_sum)
13 |
14 | class JaggedMaxModule(Module):
15 | def forward(self, values, prefix_sum):
16 | return JaggedMax()(values, prefix_sum)
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/modules/custom_mod.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/common/modules/custom_mod.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/custom_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef JAGGED_SOFTMAX_KERNEL_H
2 | #define JAGGED_SOFTMAX_KERNEL_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void HostSoftMaxForward(cudaStream_t stream, float *input, float *output, long* ps, int bsize);
9 |
10 | void HostSoftMaxBackward(cudaStream_t stream, float *gradOutput, float *gradInput, float *output, long* ps, int bsize);
11 |
12 | void HostArgmaxForward(cudaStream_t stream, float *input, long *output, long* ps, int bsize);
13 |
14 | void HostMaxForward(cudaStream_t stream, float *input, float* vmax, long *idxes, long* ps, int bsize);
15 |
16 | void HostGLapNorm(cudaStream_t stream, long* row_indices, long* col_indices, float* p_v, float* p_norm, int nnz);
17 |
18 | void HostGDegreeNorm(cudaStream_t stream, long* row_indices, float* p_v, float* p_norm, int nnz);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output)
5 | {
6 | values = THFloatTensor_newContiguous(values);
7 | THLongTensor_resizeAs(output, prefix_sum);
8 |
9 | float *input_data_base = values->storage->data + values->storageOffset;;
10 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
11 | long *p_out = output->storage->data + output->storageOffset;
12 | long bsize = (long)prefix_sum->size[0];
13 | long i, d;
14 |
15 | #pragma omp parallel for private(i, d)
16 | for (i = 0; i < bsize; i++)
17 | {
18 | long offset = (i == 0) ? 0 : ps[i - 1];
19 | long n_ele = ps[i] - offset;
20 |
21 | float* input_data = input_data_base + offset;
22 |
23 | float max_input = -FLT_MAX;
24 | long max_id = -1;
25 | for (d = 0; d < n_ele; d++)
26 | if (input_data[d] > max_input)
27 | {
28 | max_input = input_data[d];
29 | max_id = d;
30 | }
31 | assert(max_id >= 0);
32 | p_out[i] = max_id;
33 | }
34 |
35 | THFloatTensor_free(values);
36 | return 1;
37 | }
38 |
39 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes)
40 | {
41 | int64_t inputsize = prefix_sum->size[0];
42 |
43 | values = THFloatTensor_newContiguous(values);
44 | THLongTensor_resize1d(idxes, inputsize);
45 | THFloatTensor_resize1d(vmax, inputsize);
46 |
47 | float *input_data_base = values->storage->data + values->storageOffset;
48 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
49 | float *p_maxv = vmax->storage->data + vmax->storageOffset;
50 | long *p_i = idxes->storage->data + idxes->storageOffset;
51 |
52 | long bsize = (long)prefix_sum->size[0];
53 | long i, d;
54 |
55 | #pragma omp parallel for private(i, d)
56 | for (i = 0; i < bsize; i++)
57 | {
58 | long offset = (i == 0) ? 0 : ps[i - 1];
59 | long n_ele = ps[i] - offset;
60 |
61 | float* input_data = input_data_base + offset;
62 |
63 | float max_input = -FLT_MAX;
64 | long max_id = -1;
65 | for (d = 0; d < n_ele; d++)
66 | if (input_data[d] > max_input)
67 | {
68 | max_input = input_data[d];
69 | max_id = d;
70 | }
71 | assert(max_id >= 0);
72 | p_i[i] = max_id;
73 | p_maxv[i] = max_input;
74 | }
75 |
76 | THFloatTensor_free(values);
77 | return 1;
78 | }
79 |
80 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output)
81 | {
82 | logits = THFloatTensor_newContiguous(logits);
83 | THFloatTensor_resizeAs(output, logits);
84 | float *input_data_base = logits->storage->data + logits->storageOffset;// THTensor_(data)(logits);
85 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
86 | float *output_data_base = output->storage->data + output->storageOffset;
87 | uint64_t bsize = (uint64_t)prefix_sum->size[0];
88 | uint64_t i, d;
89 |
90 | #pragma omp parallel for private(i, d)
91 | for (i = 0; i < bsize; i++)
92 | {
93 | long offset = (i == 0) ? 0 : ps[i - 1];
94 |
95 | float* input_data = input_data_base + offset;
96 | float* output_data = output_data_base + offset;
97 |
98 | long n_ele = ps[i] - offset;
99 | float max_input = -FLT_MAX;
100 | for (d = 0; d < n_ele; d++)
101 | max_input = THMax(max_input, input_data[d]);
102 |
103 | double logsum = 0;
104 | for (d = 0; d < n_ele; d++)
105 | logsum += exp(input_data[d] - max_input);
106 | logsum = max_input + log(logsum);
107 |
108 | for (d = 0; d < n_ele; d++)
109 | output_data[d] = input_data[d] - logsum;
110 | }
111 |
112 | THFloatTensor_free(logits);
113 | return 1;
114 | }
115 |
116 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input)
117 | {
118 | grad_output = THFloatTensor_newContiguous(grad_output);
119 | output = THFloatTensor_newContiguous(output);
120 | THFloatTensor_resizeAs(grad_input, grad_output);
121 |
122 | float *output_data_base = output->storage->data + output->storageOffset;
123 | float *gradOutput_data_base = grad_output->storage->data + grad_output->storageOffset;
124 | long *ps = prefix_sum->storage->data + prefix_sum->storageOffset;
125 | float *gradInput_data_base = grad_input->storage->data + grad_input->storageOffset;
126 |
127 | uint64_t bsize = (uint64_t)prefix_sum->size[0];
128 | uint64_t i, d;
129 | #pragma omp parallel for private(i, d)
130 | for (i = 0; i < bsize; i++)
131 | {
132 | long offset = (i == 0) ? 0 : ps[i - 1];
133 | float *gradInput_data = gradInput_data_base + offset;
134 | float *output_data = output_data_base + offset;
135 | float *gradOutput_data = gradOutput_data_base + offset;
136 |
137 | double sum = 0;
138 | long n_ele = ps[i] - offset;
139 | for (d = 0; d < n_ele; d++)
140 | sum += gradOutput_data[d];
141 |
142 | for (d = 0; d < n_ele; d++)
143 | gradInput_data[d] = gradOutput_data[d] - exp(output_data[d]) * sum;
144 | }
145 |
146 | THFloatTensor_free(grad_output);
147 | THFloatTensor_free(output);
148 | return 1;
149 | }
150 |
151 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm)
152 | {
153 | uint64_t nnz = (uint64_t)values->size[0];
154 | long *row_indices = indices->storage->data + indices->storageOffset;
155 | long *col_indices = row_indices + indices->stride[0];
156 | float *p_v = values->storage->data + values->storageOffset;
157 | float *p_norm = norm->storage->data + norm->storageOffset;
158 |
159 | uint64_t i;
160 | #pragma omp parallel for private(i)
161 | for (i = 0; i < nnz; i++)
162 | {
163 | float norm = p_norm[ row_indices[i] ] * p_norm[ col_indices[i] ];
164 | p_v[i] /= norm;
165 | }
166 |
167 | return 1;
168 | }
169 |
170 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm)
171 | {
172 | uint64_t nnz = (uint64_t)values->size[0];
173 | long *row_indices = indices->storage->data + indices->storageOffset;
174 | float *p_v = values->storage->data + values->storageOffset;
175 | float *p_norm = norm->storage->data + norm->storageOffset;
176 |
177 | uint64_t i;
178 | #pragma omp parallel for private(i)
179 | for (i = 0; i < nnz; i++)
180 | {
181 | float norm = p_norm[ row_indices[i] ];
182 | p_v[i] /= norm;
183 | }
184 |
185 | return 1;
186 | }
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib.h:
--------------------------------------------------------------------------------
1 | int jagged_log_softmax_forward(THFloatTensor *logits, THLongTensor *prefix_sum, THFloatTensor *output);
2 |
3 | int jagged_log_softmax_backward(THFloatTensor *output, THFloatTensor *grad_output, THLongTensor *prefix_sum, THFloatTensor *grad_input);
4 |
5 | int jagged_argmax_forward(THFloatTensor *values, THLongTensor *prefix_sum, THLongTensor *output);
6 |
7 | int jagged_max_forward(THFloatTensor *values, THLongTensor *prefix_sum, THFloatTensor *vmax, THLongTensor *idxes);
8 |
9 | int graph_laplacian_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm);
10 |
11 | int graph_degree_norm(THLongTensor *indices, THFloatTensor *values, THFloatTensor *norm);
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include "custom_kernel.h"
4 |
5 | // this symbol will be resolved automatically from PyTorch libs
6 | extern THCState *state;
7 |
8 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output)
9 | {
10 | logits = THCudaTensor_newContiguous(state, logits);
11 | THCudaTensor_resizeAs(state, output, logits);
12 |
13 | float *input_data_base = THCudaTensor_data(state, logits);
14 | long* ps = THCudaLongTensor_data(state, prefix_sum);
15 | float *output_data_base = THCudaTensor_data(state, output);
16 |
17 | int bsize = (int)prefix_sum->size[0];
18 | cudaStream_t stream = THCState_getCurrentStream(state);
19 | HostSoftMaxForward(stream, input_data_base, output_data_base, ps, bsize);
20 |
21 | THCudaTensor_free(state, logits);
22 | return 1;
23 | }
24 |
25 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input)
26 | {
27 | output = THCudaTensor_newContiguous(state, output);
28 | grad_output = THCudaTensor_newContiguous(state, grad_output);
29 |
30 | THCudaTensor_resizeAs(state, grad_input, grad_output);
31 | float *output_data_base = THCudaTensor_data(state, output);
32 | float *gradOutput_data_base = THCudaTensor_data(state, grad_output);
33 | long* ps = THCudaLongTensor_data(state, prefix_sum);
34 | float *gradInput_data_base = THCudaTensor_data(state, grad_input);
35 |
36 | int bsize = (int)prefix_sum->size[0];
37 | cudaStream_t stream = THCState_getCurrentStream(state);
38 | HostSoftMaxBackward(stream, gradOutput_data_base, gradInput_data_base, output_data_base, ps, bsize);
39 | THCudaTensor_free(state, grad_output);
40 | THCudaTensor_free(state, output);
41 | return 1;
42 | }
43 |
44 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output)
45 | {
46 | values = THCudaTensor_newContiguous(state, values);
47 | THCudaLongTensor_resizeAs(state, output, prefix_sum);
48 |
49 | float *input_data_base = THCudaTensor_data(state, values);
50 | long* ps = THCudaLongTensor_data(state, prefix_sum);
51 | long *output_data_base = THCudaLongTensor_data(state, output);
52 |
53 | int bsize = (int)prefix_sum->size[0];
54 | cudaStream_t stream = THCState_getCurrentStream(state);
55 | HostArgmaxForward(stream, input_data_base, output_data_base, ps, bsize);
56 |
57 | THCudaTensor_free(state, values);
58 | return 1;
59 | }
60 |
61 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes)
62 | {
63 | int64_t inputsize = prefix_sum->size[0];
64 | values = THCudaTensor_newContiguous(state, values);
65 | THCudaLongTensor_resize1d(state, idxes, inputsize);
66 | THCudaTensor_resize1d(state, vmax, inputsize);
67 |
68 | float *input_data_base = THCudaTensor_data(state, values);
69 | long* ps = THCudaLongTensor_data(state, prefix_sum);
70 | long *p_i = THCudaLongTensor_data(state, idxes);
71 | float *p_maxv = THCudaTensor_data(state, vmax);
72 |
73 | int bsize = (int)prefix_sum->size[0];
74 | cudaStream_t stream = THCState_getCurrentStream(state);
75 | HostMaxForward(stream, input_data_base, p_maxv, p_i, ps, bsize);
76 |
77 | THCudaTensor_free(state, values);
78 | return 1;
79 | }
80 |
81 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm)
82 | {
83 | uint64_t nnz = (uint64_t)values->size[0];
84 | long *row_indices = THCudaLongTensor_data(state, indices);
85 | long *col_indices = row_indices + THCudaLongTensor_stride(state, indices, 0);
86 | float *p_v = THCudaTensor_data(state, values);
87 | float *p_norm = THCudaTensor_data(state, norm);
88 |
89 | cudaStream_t stream = THCState_getCurrentStream(state);
90 | HostGLapNorm(stream, row_indices, col_indices, p_v, p_norm, nnz);
91 | return 1;
92 | }
93 |
94 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm)
95 | {
96 | uint64_t nnz = (uint64_t)values->size[0];
97 | long *row_indices = THCudaLongTensor_data(state, indices);
98 | float *p_v = THCudaTensor_data(state, values);
99 | float *p_norm = THCudaTensor_data(state, norm);
100 |
101 | cudaStream_t stream = THCState_getCurrentStream(state);
102 | HostGDegreeNorm(stream, row_indices, p_v, p_norm, nnz);
103 | return 1;
104 | }
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/src/my_lib_cuda.h:
--------------------------------------------------------------------------------
1 | int jagged_log_softmax_forward_cuda(THCudaTensor *logits, THCudaLongTensor *prefix_sum, THCudaTensor *output);
2 |
3 | int jagged_log_softmax_backward_cuda(THCudaTensor *output, THCudaTensor *grad_output, THCudaLongTensor *prefix_sum, THCudaTensor *grad_input);
4 |
5 | int jagged_argmax_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaLongTensor *output);
6 |
7 | int jagged_max_forward_cuda(THCudaTensor *values, THCudaLongTensor *prefix_sum, THCudaTensor *vmax, THCudaLongTensor *idxes);
8 |
9 | int graph_laplacian_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm);
10 |
11 | int graph_degree_norm_cuda(THCudaLongTensor *indices, THCudaTensor *values, THCudaTensor *norm);
--------------------------------------------------------------------------------
/adversarial_robustness/code/common/test.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Function
4 | import torch.nn.functional as F
5 | from torch.autograd import Variable
6 | import numpy as np
7 | from modules.custom_mod import JaggedLogSoftmaxModule, JaggedArgmaxModule, JaggedMaxModule
8 | import sys
9 |
10 | def cpu_test():
11 | mod = JaggedLogSoftmaxModule()
12 | for i in range(10):
13 | a = torch.rand(10000, 10)
14 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])]))
15 | c = mod(Variable(a), Variable(b))
16 | c2 = F.log_softmax(Variable(a), dim=1)
17 | print(torch.sum(torch.abs(c - c2)))
18 |
19 | a = torch.rand(100, 30)
20 | b = torch.from_numpy(np.array([ (i + 1) * 30 for i in range(100)]))
21 | va = Variable(a, requires_grad=True)
22 | vb = Variable(b)
23 | c = mod(va, vb)
24 | t = F.torch.mean(c)
25 | t.backward()
26 | b1 = va.grad
27 |
28 | va = Variable(a, requires_grad=True)
29 | c = F.log_softmax(va, dim=1)
30 | t = F.torch.mean(c)
31 | t.backward()
32 | b2 = va.grad
33 |
34 | print(torch.sum(torch.abs(b1 - b2)))
35 |
36 | def gpu_test():
37 | mod = JaggedLogSoftmaxModule()
38 | for i in range(10):
39 | a = torch.rand(10000, 10).cuda()
40 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
41 | c1 = mod(Variable(a), Variable(b))
42 | c2 = F.log_softmax(Variable(a), dim=1)
43 | c3 = F.log_softmax(Variable(a.cpu()), dim=1).cuda()
44 | print(torch.sum(torch.abs(c3 - c2)).data[0], torch.sum(torch.abs(c3 - c1)).data[0], torch.sum(torch.abs(c2 - c1)).data[0])
45 |
46 | a = torch.rand(1000, 100).cuda()
47 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
48 | va = Variable(a, requires_grad=True)
49 | vb = Variable(b)
50 | c = mod(va, vb)
51 | t = F.torch.sum(c)
52 | t.backward()
53 | b1 = va.grad
54 |
55 | va = Variable(a, requires_grad=True)
56 | c = F.log_softmax(va, dim=1)
57 | t = F.torch.sum(c)
58 | t.backward()
59 | b2 = va.grad
60 |
61 | va = Variable(a.cpu(), requires_grad=True)
62 | c = F.log_softmax(va, dim=1)
63 | t = F.torch.sum(c)
64 | t.backward()
65 | b3 = va.grad.cuda()
66 | print(torch.sum(torch.abs(b3 - b2)).data[0], torch.sum(torch.abs(b3 - b1)).data[0], torch.sum(torch.abs(b2 - b1)).data[0])
67 |
68 | def argmax():
69 | torch.manual_seed(1)
70 | mod = JaggedArgmaxModule()
71 |
72 | a = torch.rand(10, 4).cuda()
73 | print(a)
74 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
75 | c = mod(Variable(a), Variable(b))
76 | print(c)
77 |
78 | a = torch.randn(10).cuda()
79 | print(a)
80 | b = torch.LongTensor([2, 5, 9, 10]).cuda()
81 | c = mod(Variable(a), Variable(b))
82 | print(c)
83 |
84 | torch.manual_seed(1)
85 | mod = JaggedMaxModule()
86 |
87 | a = torch.rand(10, 4).cuda()
88 | print(a)
89 | b = torch.from_numpy(np.array([ (i + 1) * int(a.size()[1]) for i in range(a.size()[0])])).cuda()
90 | c1, c2 = mod(Variable(a), Variable(b))
91 | print(c1)
92 | print(c2)
93 |
94 | a = torch.randn(10).cuda()
95 | print(a)
96 | b = torch.LongTensor([2, 5, 9, 10]).cuda()
97 | c = mod(Variable(a), Variable(b))
98 | print(c[0], c[1])
--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/data_util.py:
--------------------------------------------------------------------------------
1 | # import cPickle as cp
2 | import pickle as cp
3 | import networkx as nx
4 |
5 | def load_pkl(fname, num_graph):
6 | g_list = []
7 | with open(fname, 'rb') as f:
8 | for i in range(num_graph):
9 | g = cp.load(f)
10 | g_list.append(g)
11 | return g_list
12 |
13 | def g2txt(g, label, fid):
14 | fid.write('%d %d\n' % (len(g), label))
15 | for i in range(len(g)):
16 | fid.write('%d' % len(g.neighbors(i)))
17 | for j in g.neighbors(i):
18 | fid.write(' %d' % j)
19 | fid.write('\n')
--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/data_util.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/data_generator/data_util.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/gen_er_components.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | # import cPickle as cp
4 | import pickle as cp
5 | import random
6 | import numpy as np
7 | import networkx as nx
8 | import time
9 | from tqdm import tqdm
10 |
11 |
12 | def get_component():
13 | cur_n = np.random.randint(max_n - min_n + 1) + min_n
14 | g = nx.erdos_renyi_graph(n = cur_n, p = p)
15 |
16 | comps = [c for c in nx.connected_component_subgraphs(g)]
17 | random.shuffle(comps)
18 | for i in range(1, len(comps)):
19 | x = random.choice(comps[i - 1].nodes())
20 | y = random.choice(comps[i].nodes())
21 | g.add_edge(x, y)
22 | assert nx.is_connected(g)
23 | return g
24 |
25 | if __name__ == '__main__':
26 | save_dir = None
27 | max_n = None
28 | min_n = None
29 | num_graph = None
30 | p = None
31 | n_comp = None
32 | for i in range(1, len(sys.argv), 2):
33 | if sys.argv[i] == '-save_dir':
34 | save_dir = sys.argv[i + 1]
35 | if sys.argv[i] == '-max_n':
36 | max_n = int(sys.argv[i + 1])
37 | if sys.argv[i] == '-min_n':
38 | min_n = int(sys.argv[i + 1])
39 | if sys.argv[i] == '-num_graph':
40 | num_graph = int(sys.argv[i + 1])
41 | if sys.argv[i] == '-p':
42 | p = float(sys.argv[i + 1])
43 | if sys.argv[i] == '-n_comp':
44 | n_comp = int(sys.argv[i + 1])
45 |
46 | assert save_dir is not None
47 | assert max_n is not None
48 | assert min_n is not None
49 | assert num_graph is not None
50 | assert p is not None
51 | assert n_comp is not None
52 |
53 | fout_name = '%s/ncomp-%d-nrange-%d-%d-n_graph-%d-p-%.2f.pkl' % (save_dir, n_comp, min_n, max_n, num_graph, p)
54 | print('Final Output: ' + fout_name)
55 | print("Generating graphs...")
56 | min_n = min_n // n_comp
57 | max_n = max_n // n_comp
58 |
59 | for i in tqdm(range(num_graph)):
60 |
61 | for j in range(n_comp):
62 | g = get_component()
63 |
64 | if j == 0:
65 | g_all = g
66 | else:
67 | g_all = nx.disjoint_union(g_all, g)
68 | assert nx.number_connected_components(g_all) == n_comp
69 |
70 | with open(fout_name, 'ab') as fout:
71 | cp.dump(g_all, fout, cp.HIGHEST_PROTOCOL)
72 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/data_generator/pkl_dump.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | min_n=90
4 | max_n=100
5 | p=0.02
6 | output_root=../../dropbox/data/components
7 |
8 | if [ ! -e $output_root ];
9 | then
10 | mkdir -p $output_root
11 | fi
12 |
13 | for t_c in 1 2 3 4 5; do
14 |
15 | n_comp=$t_c
16 |
17 | python gen_er_components.py \
18 | -save_dir $output_root \
19 | -max_n $max_n \
20 | -min_n $min_n \
21 | -num_graph 5000 \
22 | -p $p \
23 | -n_comp $n_comp
24 |
25 | done
26 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/collect_rl_results.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | if __name__ == '__main__':
4 | result_root = '../../dropbox/scratch/results/graph_classification/components'
5 | targets = os.listdir(result_root)
6 | targets = sorted(targets)
7 | for fname in targets:
8 | if fname[0] == '.':
9 | continue
10 | configs = os.listdir(result_root + '/' + fname)
11 | best_num = 100
12 | best_config = None
13 |
14 | for config in configs:
15 | if config[0] == '.' or 'epoch-best' in config:
16 | continue
17 | if '0.1' in config:
18 | continue
19 | result = result_root + '/' + fname + '/' + config + '/epoch-best.txt'
20 | with open(result, 'r') as f:
21 | num = float(f.readline().strip())
22 | if num < best_num:
23 | best_config = config
24 | best_num = num
25 | print fname, best_config, best_num
26 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/er_trivial_attack.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 | import sys
5 | import numpy as np
6 | import torch
7 | import networkx as nx
8 | import random
9 | from torch.autograd import Variable
10 | from torch.nn.parameter import Parameter
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 | from tqdm import tqdm
15 | from copy import deepcopy
16 |
17 | from q_net import NStepQNet, QNet, greedy_actions
18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
19 | from cmd_args import cmd_args
20 | from graph_embedding import S2VGraph
21 |
22 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model
23 |
24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
25 | from graph_common import loop_dataset, load_er_data
26 |
27 | def propose_attack(model, s2v_g, num_added=1):
28 | g = s2v_g.to_networkx()
29 | comps = [c for c in nx.connected_component_subgraphs(g)]
30 | set_id = {}
31 |
32 | for i in range(len(comps)):
33 | for j in comps[i].nodes():
34 | set_id[j] = i
35 |
36 | cand = []
37 | for i in range(len(g) - 1):
38 | for j in range(i + 1, len(g)):
39 | if set_id[i] != set_id[j] or i == j:
40 | continue
41 | cand.append('%d %d' % (i, j))
42 |
43 | if cmd_args.rand_att_type == 'random':
44 | added = np.random.choice(cand, num_added)
45 | added = [(int(w.split()[0]), int(w.split()[1])) for w in added]
46 | g.add_edges_from(added)
47 | return S2VGraph(g, s2v_g.label)
48 | elif cmd_args.rand_att_type == 'exhaust':
49 | g_list = []
50 | for c in cand:
51 | x, y = [int(w) for w in c.split()]
52 | g2 = g.copy()
53 | g2.add_edge(x, y)
54 | g_list.append(S2VGraph(g2, s2v_g.label))
55 | _, _, acc = model(g_list)
56 | ans = g_list[0]
57 | for i in range(len(g_list)):
58 | if acc.numpy()[i] < 1:
59 | ans = g_list[i]
60 | break
61 | return ans
62 | else:
63 | raise NotImplementedError
64 |
65 | if __name__ == '__main__':
66 | random.seed(cmd_args.seed)
67 | np.random.seed(cmd_args.seed)
68 | torch.manual_seed(cmd_args.seed)
69 |
70 | label_map, train_glist, test_glist = load_er_data()
71 |
72 | base_classifier = load_base_model(label_map, test_glist)
73 |
74 | new_test_list = []
75 | for g in tqdm(test_glist):
76 | new_test_list.append(propose_attack(base_classifier, g))
77 |
78 | test_graphs(base_classifier, new_test_list)
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/grad_attack.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import sys
4 | import numpy as np
5 | import torch
6 | import networkx as nx
7 | import random
8 | from torch.autograd import Variable
9 | from torch.nn.parameter import Parameter
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | from tqdm import tqdm
14 | from copy import deepcopy
15 |
16 | from q_net import NStepQNet, QNet, greedy_actions
17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
18 | from cmd_args import cmd_args
19 | from graph_embedding import S2VGraph
20 |
21 | from rl_common import GraphEdgeEnv, load_graphs, test_graphs, attackable, load_base_model
22 |
23 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
24 | from graph_common import loop_dataset, load_er_data
25 |
26 | def propose_attack(model, s2v_g, num_added=1):
27 | g = s2v_g.to_networkx()
28 | comps = [c for c in nx.connected_component_subgraphs(g)]
29 | set_id = {}
30 | for i in range(len(comps)):
31 | for j in comps[i].nodes():
32 | set_id[j] = i
33 |
34 | node_feat, edge_feat, labels = model.PrepareFeatureLabel([s2v_g])
35 | if cmd_args.ctx == 'gpu':
36 | node_feat = node_feat.cuda()
37 | labels = labels.cuda()
38 |
39 | cand_list = [s2v_g]
40 | for l in range( len(model.label_map) ):
41 | print('66666666')
42 | if l == s2v_g.label:
43 | continue
44 | labels[0] = l
45 | model.zero_grad()
46 | (_, embed), sp_dict = model.s2v([s2v_g], node_feat, edge_feat, pool_global=True, n2n_grad=True)
47 | print('77777777')
48 | _, loss, _ = model.mlp(embed, labels)
49 | print(loss)
50 | loss.backward()
51 | grad = sp_dict['n2n'].grad.data.numpy().flatten()
52 | idxes = np.argsort(grad)
53 | added = []
54 |
55 | for p in idxes:
56 | x = p // s2v_g.num_nodes
57 | y = p % s2v_g.num_nodes
58 | if set_id[x] != set_id[y] or x == y or grad[p] > 0:
59 | continue
60 | added.append((x, y))
61 | if len(added) >= num_added:
62 | break
63 | if len(added) == 0:
64 | continue
65 | g2 = g.copy()
66 | g2.add_edges_from(added)
67 |
68 | cand_list.append( S2VGraph(g2, s2v_g.label) )
69 |
70 | _, _, acc = model(cand_list)
71 | acc = acc.double().cpu().numpy()
72 | for i in range(len(cand_list)):
73 | if acc[i] < 1.0:
74 | return cand_list[i]
75 | return cand_list[0]
76 |
77 | if __name__ == '__main__':
78 | random.seed(cmd_args.seed)
79 | np.random.seed(cmd_args.seed)
80 | torch.manual_seed(cmd_args.seed)
81 |
82 | label_map, train_glist, test_glist = load_er_data()
83 |
84 | base_classifier = load_base_model(label_map, test_glist)
85 |
86 | new_test_list = []
87 | for g in tqdm(test_glist):
88 | new_test_list.append(propose_attack(base_classifier, g))
89 |
90 | test_graphs(base_classifier, new_test_list)
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/nstep_replay_mem.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 |
4 | class NstepReplaySubMemCell(object):
5 | def __init__(self, memory_size):
6 | self.memory_size = memory_size
7 |
8 | self.actions = [None] * self.memory_size
9 | self.rewards = [None] * self.memory_size
10 | self.states = [None] * self.memory_size
11 | self.s_primes = [None] * self.memory_size
12 | self.terminals = [None] * self.memory_size
13 |
14 | self.count = 0
15 | self.current = 0
16 |
17 | def add(self, s_t, a_t, r_t, s_prime, terminal):
18 | self.actions[self.current] = a_t
19 | self.rewards[self.current] = r_t
20 | self.states[self.current] = s_t
21 | self.s_primes[self.current] = s_prime
22 | self.terminals[self.current] = terminal
23 |
24 | self.count = max(self.count, self.current + 1)
25 | self.current = (self.current + 1) % self.memory_size
26 |
27 | def add_list(self, list_st, list_at, list_rt, list_sp, list_term):
28 | for i in range(len(list_st)):
29 | if list_sp is None:
30 | sp = (None, None, None)
31 | else:
32 | sp = list_sp[i]
33 | self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i])
34 |
35 | def sample(self, batch_size):
36 | assert self.count >= batch_size
37 |
38 | list_st = []
39 | list_at = []
40 | list_rt = []
41 | list_s_primes = []
42 | list_term = []
43 |
44 | for i in range(batch_size):
45 | idx = random.randint(0, self.count - 1)
46 | list_st.append(self.states[idx])
47 | list_at.append(self.actions[idx])
48 | list_rt.append(float(self.rewards[idx]))
49 | list_s_primes.append(self.s_primes[idx])
50 | list_term.append(self.terminals[idx])
51 |
52 | return list_st, list_at, list_rt, list_s_primes, list_term
53 |
54 | def hash_state_action(s_t, a_t):
55 | key = s_t[0]
56 | base = 179424673
57 | for e in s_t[1].directed_edges:
58 | key = (key * base + e[0]) % base
59 | key = (key * base + e[1]) % base
60 | if s_t[2] is not None:
61 | key = (key * base + s_t[2]) % base
62 | else:
63 | key = (key * base) % base
64 |
65 | key = (key * base + a_t) % base
66 | return key
67 |
68 | class NstepReplayMemCell(object):
69 | def __init__(self, memory_size, balance_sample = False):
70 | self.sub_list = []
71 | self.balance_sample = balance_sample
72 | self.sub_list.append(NstepReplaySubMemCell(memory_size))
73 | if balance_sample:
74 | self.sub_list.append(NstepReplaySubMemCell(memory_size))
75 | self.state_set = set()
76 |
77 | def add(self, s_t, a_t, r_t, s_prime, terminal):
78 | if not self.balance_sample or r_t < 0:
79 | self.sub_list[0].add(s_t, a_t, r_t, s_prime, terminal)
80 | else:
81 | assert r_t > 0
82 | key = hash_state_action(s_t, a_t)
83 | if key in self.state_set:
84 | return
85 | self.state_set.add(key)
86 | self.sub_list[1].add(s_t, a_t, r_t, s_prime, terminal)
87 |
88 | def sample(self, batch_size):
89 | if not self.balance_sample or self.sub_list[1].count < batch_size:
90 | return self.sub_list[0].sample(batch_size)
91 |
92 | list_st, list_at, list_rt, list_s_primes, list_term = self.sub_list[0].sample(batch_size // 2)
93 | list_st2, list_at2, list_rt2, list_s_primes2, list_term2 = self.sub_list[1].sample(batch_size - batch_size // 2)
94 |
95 | return list_st + list_st2, list_at + list_at2, list_rt + list_rt2, list_s_primes + list_s_primes2, list_term + list_term2
96 |
97 | class NstepReplayMem(object):
98 | def __init__(self, memory_size, n_steps, balance_sample = False):
99 | self.mem_cells = []
100 | for i in range(n_steps - 1):
101 | self.mem_cells.append(NstepReplayMemCell(memory_size, False))
102 | self.mem_cells.append(NstepReplayMemCell(memory_size, balance_sample))
103 |
104 | self.n_steps = n_steps
105 | self.memory_size = memory_size
106 |
107 | def add(self, s_t, a_t, r_t, s_prime, terminal, t):
108 | assert t >= 0 and t < self.n_steps
109 | if t == self.n_steps - 1:
110 | assert terminal
111 | else:
112 | assert not terminal
113 | self.mem_cells[t].add(s_t, a_t, r_t, s_prime, terminal)
114 |
115 | def add_list(self, list_st, list_at, list_rt, list_sp, list_term, t):
116 | for i in range(len(list_st)):
117 | if list_sp is None:
118 | sp = (None, None, None)
119 | else:
120 | sp = list_sp[i]
121 | self.add(list_st[i], list_at[i], list_rt[i], sp, list_term[i], t)
122 |
123 | def sample(self, batch_size, t = None):
124 | if t is None:
125 | t = np.random.randint(self.n_steps)
126 | list_st, list_at, list_rt, list_s_primes, list_term = self.mem_cells[t].sample(batch_size)
127 | return t, list_st, list_at, list_rt, list_s_primes, list_term
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/nstep_replay_mem.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/plot_dqn.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 | import sys
5 | import numpy as np
6 | import torch
7 | import networkx as nx
8 | import random
9 | from torch.autograd import Variable
10 | from torch.nn.parameter import Parameter
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 | from tqdm import tqdm
15 | from copy import deepcopy
16 |
17 | from q_net import NStepQNet, QNet, greedy_actions
18 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
19 | from cmd_args import cmd_args
20 |
21 | from rl_common import GraphEdgeEnv, local_args, load_graphs, test_graphs, load_base_model, attackable, get_supervision
22 | from nstep_replay_mem import NstepReplayMem
23 |
24 | sys.path.append('%s/../graph_classification' % os.path.dirname(os.path.realpath(__file__)))
25 | from graph_common import loop_dataset
26 |
27 | class Agent(object):
28 | def __init__(self, g_list, test_g_list, env):
29 | self.g_list = g_list
30 | if test_g_list is None:
31 | self.test_g_list = g_list
32 | else:
33 | self.test_g_list = test_g_list
34 | self.mem_pool = NstepReplayMem(memory_size=50000, n_steps=2)
35 | self.env = env
36 | # self.net = QNet()
37 | self.net = NStepQNet(2)
38 | self.old_net = NStepQNet(2)
39 | if cmd_args.ctx == 'gpu':
40 | self.net = self.net.cuda()
41 | self.old_net = self.old_net.cuda()
42 | self.eps_start = 1.0
43 | self.eps_end = 1.0
44 | self.eps_step = 10000
45 | self.burn_in = 100
46 | self.step = 0
47 |
48 | self.best_eval = None
49 | self.pos = 0
50 | self.sample_idxes = list(range(len(g_list)))
51 | random.shuffle(self.sample_idxes)
52 | self.take_snapshot()
53 |
54 | def take_snapshot(self):
55 | self.old_net.load_state_dict(self.net.state_dict())
56 |
57 | def make_actions(self, time_t, greedy=False):
58 | self.eps = self.eps_end + max(0., (self.eps_start - self.eps_end)
59 | * (self.eps_step - max(0., self.step)) / self.eps_step)
60 |
61 | if random.random() < self.eps and not greedy:
62 | actions = self.env.uniformRandActions()
63 | else:
64 | cur_state = self.env.getStateRef()
65 | actions, _, _ = self.net(time_t, cur_state, None, greedy_acts=True)
66 | actions = list(actions.cpu().numpy())
67 |
68 | return actions
69 |
70 | def run_simulation(self):
71 | if (self.pos + 1) * cmd_args.batch_size > len(self.sample_idxes):
72 | self.pos = 0
73 | random.shuffle(self.sample_idxes)
74 |
75 | selected_idx = self.sample_idxes[self.pos * cmd_args.batch_size : (self.pos + 1) * cmd_args.batch_size]
76 | self.pos += 1
77 | self.env.setup([self.g_list[idx] for idx in selected_idx])
78 |
79 | t = 0
80 | while not env.isTerminal():
81 | list_at = self.make_actions(t)
82 | list_st = self.env.cloneState()
83 | self.env.step(list_at)
84 |
85 | assert (env.rewards is not None) == env.isTerminal()
86 | if env.isTerminal():
87 | rewards = env.rewards
88 | s_prime = None
89 | else:
90 | rewards = np.zeros(len(list_at), dtype=np.float32)
91 | s_prime = self.env.cloneState()
92 |
93 | self.mem_pool.add_list(list_st, list_at, rewards, s_prime, [env.isTerminal()] * len(list_at), t)
94 | t += 1
95 |
96 | def eval(self):
97 | self.env.setup(deepcopy(self.test_g_list))
98 | t = 0
99 | while not self.env.isTerminal():
100 | list_at = self.make_actions(t, greedy=True)
101 | self.env.step(list_at)
102 | t += 1
103 | test_loss = loop_dataset(env.g_list, env.classifier, list(range(len(env.g_list))), epoch=101)
104 | print('\033[93m average test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
105 | with open('%s/edge_added.txt' % cmd_args.save_dir, 'w') as f:
106 | for i in range(len(self.test_g_list)):
107 | f.write('%d %d ' % (self.test_g_list[i].label, env.pred[i] + 1))
108 | f.write('%d %d\n' % env.added_edges[i])
109 | reward = np.mean(self.env.rewards)
110 | print(reward)
111 | return reward, test_loss[1]
112 |
113 | if __name__ == '__main__':
114 | random.seed(cmd_args.seed)
115 | np.random.seed(cmd_args.seed)
116 | torch.manual_seed(cmd_args.seed)
117 |
118 | label_map, _, g_list = load_graphs()
119 | # random.shuffle(g_list)
120 | base_classifier = load_base_model(label_map, g_list)
121 | env = GraphEdgeEnv(base_classifier, n_edges = 1)
122 |
123 | if cmd_args.frac_meta > 0:
124 | num_train = int( len(g_list) * (1 - cmd_args.frac_meta) )
125 | agent = Agent(g_list[:num_train], g_list[num_train:], env)
126 | else:
127 | agent = Agent(g_list, None, env)
128 |
129 | assert cmd_args.phase == 'test'
130 | agent.net.load_state_dict(torch.load(cmd_args.save_dir + '/epoch-best.model'))
131 | agent.eval()
132 | # env.setup([g_list[idx] for idx in selected_idx])
133 | # t = 0
134 | # while not env.isTerminal():
135 | # policy_net = net_list[t]
136 | # t += 1
137 | # batch_graph, picked_nodes = env.getState()
138 | # log_probs, prefix_sum = policy_net(batch_graph, picked_nodes)
139 | # actions = env.sampleActions(torch.exp(log_probs).data.cpu().numpy(), prefix_sum.data.cpu().numpy(), greedy=True)
140 | # env.step(actions)
141 |
142 | # test_loss = loop_dataset(env.g_list, base_classifier, list(range(len(env.g_list))))
143 | # print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
144 |
145 | # print(np.mean(avg_rewards), np.mean(env.rewards))
146 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/plot_dqn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | dropbox=../../dropbox
4 |
5 | min_n=40
6 | max_n=50
7 | p=0.05
8 | min_c=1
9 | max_c=3
10 | base_lv=4
11 | data_folder=$dropbox/data/components
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
14 |
15 | lr=0.001
16 | max_lv=5
17 | frac_meta=0
18 |
19 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold
20 |
21 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta}
22 |
23 | python plot_dqn.py \
24 | -data_folder $data_folder \
25 | -save_dir $output_root \
26 | -max_n $max_n \
27 | -min_n $min_n \
28 | -max_lv $max_lv \
29 | -frac_meta $frac_meta \
30 | -min_c $min_c \
31 | -max_c $max_c \
32 | -n_graphs 5000 \
33 | -er_p $p \
34 | -learning_rate $lr \
35 | -base_model_dump $base_model_dump \
36 | -logfile $output_root/log.txt \
37 | $@
38 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/q_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import sys
4 | import numpy as np
5 | import torch
6 | import networkx as nx
7 | import random
8 | from torch.autograd import Variable
9 | from torch.nn.parameter import Parameter
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | from tqdm import tqdm
14 | sys.path.append('%s/../../pytorch_structure2vec/s2v_lib' % os.path.dirname(os.path.realpath(__file__)))
15 | from pytorch_util import weights_init
16 |
17 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
18 | from graph_embedding import EmbedMeanField, EmbedLoopyBP
19 | from cmd_args import cmd_args
20 | from modules.custom_mod import JaggedMaxModule
21 | from rl_common import local_args
22 |
23 | def greedy_actions(q_values, v_p, banned_list):
24 | actions = []
25 | offset = 0
26 | banned_acts = []
27 | prefix_sum = v_p.data.cpu().numpy()
28 | for i in range(len(prefix_sum)):
29 | n_nodes = prefix_sum[i] - offset
30 |
31 | if banned_list is not None and banned_list[i] is not None:
32 | for j in banned_list[i]:
33 | banned_acts.append(offset + j)
34 | offset = prefix_sum[i]
35 |
36 | q_values = q_values.data.clone()
37 | if len(banned_acts):
38 | q_values[banned_acts, :] = np.finfo(np.float64).min
39 | jmax = JaggedMaxModule()
40 | values, actions = jmax(Variable(q_values), v_p)
41 |
42 | return actions.data, values.data
43 |
44 | class QNet(nn.Module):
45 | def __init__(self, s2v_module = None):
46 | super(QNet, self).__init__()
47 | if cmd_args.gm == 'mean_field':
48 | model = EmbedMeanField
49 | elif cmd_args.gm == 'loopy_bp':
50 | model = EmbedLoopyBP
51 | else:
52 | print('unknown gm %s' % cmd_args.gm)
53 | sys.exit()
54 |
55 | if cmd_args.out_dim == 0:
56 | embed_dim = cmd_args.latent_dim
57 | else:
58 | embed_dim = cmd_args.out_dim
59 | if local_args.mlp_hidden:
60 | self.linear_1 = nn.Linear(embed_dim * 2, local_args.mlp_hidden)
61 | self.linear_out = nn.Linear(local_args.mlp_hidden, 1)
62 | else:
63 | self.linear_out = nn.Linear(embed_dim * 2, 1)
64 | weights_init(self)
65 |
66 | if s2v_module is None:
67 | self.s2v = model(latent_dim=cmd_args.latent_dim,
68 | output_dim=cmd_args.out_dim,
69 | num_node_feats=2,
70 | num_edge_feats=0,
71 | max_lv=cmd_args.max_lv)
72 | else:
73 | self.s2v = s2v_module
74 |
75 | def PrepareFeatures(self, batch_graph, picked_nodes):
76 | n_nodes = 0
77 | prefix_sum = []
78 | picked_ones = []
79 | for i in range(len(batch_graph)):
80 | if picked_nodes is not None and picked_nodes[i] is not None:
81 | assert picked_nodes[i] >= 0 and picked_nodes[i] < batch_graph[i].num_nodes
82 | picked_ones.append(n_nodes + picked_nodes[i])
83 | n_nodes += batch_graph[i].num_nodes
84 | prefix_sum.append(n_nodes)
85 |
86 | node_feat = torch.zeros(n_nodes, 2)
87 | node_feat[:, 0] = 1.0
88 |
89 | if len(picked_ones):
90 | node_feat.numpy()[picked_ones, 1] = 1.0
91 | node_feat.numpy()[picked_ones, 0] = 0.0
92 |
93 | return node_feat, torch.LongTensor(prefix_sum)
94 |
95 | def add_offset(self, actions, v_p):
96 | prefix_sum = v_p.data.cpu().numpy()
97 |
98 | shifted = []
99 | for i in range(len(prefix_sum)):
100 | if i > 0:
101 | offset = prefix_sum[i - 1]
102 | else:
103 | offset = 0
104 | shifted.append(actions[i] + offset)
105 |
106 | return shifted
107 |
108 | def rep_global_embed(self, graph_embed, v_p):
109 | prefix_sum = v_p.data.cpu().numpy()
110 |
111 | rep_idx = []
112 | for i in range(len(prefix_sum)):
113 | if i == 0:
114 | n_nodes = prefix_sum[i]
115 | else:
116 | n_nodes = prefix_sum[i] - prefix_sum[i - 1]
117 | rep_idx += [i] * n_nodes
118 |
119 | rep_idx = Variable(torch.LongTensor(rep_idx))
120 | if cmd_args.ctx == 'gpu':
121 | rep_idx = rep_idx.cuda()
122 | graph_embed = torch.index_select(graph_embed, 0, rep_idx)
123 | return graph_embed
124 |
125 | def forward(self, time_t, states, actions, greedy_acts = False):
126 | batch_graph, picked_nodes, banned_list = zip(*states)
127 |
128 | node_feat, prefix_sum = self.PrepareFeatures(batch_graph, picked_nodes)
129 |
130 | if cmd_args.ctx == 'gpu':
131 | node_feat = node_feat.cuda()
132 | prefix_sum = prefix_sum.cuda()
133 | prefix_sum = Variable(prefix_sum)
134 |
135 | embed, graph_embed = self.s2v(batch_graph, node_feat, None, pool_global=True)
136 |
137 | if actions is None:
138 | graph_embed = self.rep_global_embed(graph_embed, prefix_sum)
139 | else:
140 | shifted = self.add_offset(actions, prefix_sum)
141 | embed = embed[shifted, :]
142 |
143 | embed_s_a = torch.cat((embed, graph_embed), dim=1)
144 |
145 | if local_args.mlp_hidden:
146 | embed_s_a = F.relu( self.linear_1(embed_s_a) )
147 |
148 | raw_pred = self.linear_out(embed_s_a)
149 |
150 | if greedy_acts:
151 | actions, _ = greedy_actions(raw_pred, prefix_sum, banned_list)
152 |
153 | return actions, raw_pred, prefix_sum
154 |
155 | class NStepQNet(nn.Module):
156 | def __init__(self, num_steps, s2v_module = None):
157 | super(NStepQNet, self).__init__()
158 |
159 | list_mod = [QNet(s2v_module)]
160 |
161 | for i in range(1, num_steps):
162 | list_mod.append(QNet(list_mod[0].s2v))
163 |
164 | self.list_mod = nn.ModuleList(list_mod)
165 |
166 | self.num_steps = num_steps
167 |
168 | def forward(self, time_t, states, actions, greedy_acts = False):
169 | assert time_t >= 0 and time_t < self.num_steps
170 |
171 | return self.list_mod[time_t](time_t, states, actions, greedy_acts)
172 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/q_net.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/adversarial_robustness/code/graph_attack/q_net.pyc
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_dqn.sh:
--------------------------------------------------------------------------------
1 | dropbox=../../dropbox
2 |
3 | min_n=$1
4 | max_n=$2
5 | p=$3
6 | min_c=1
7 | max_c=3
8 | base_lv=$4
9 | data_folder=$dropbox/data/components
10 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
11 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
12 |
13 | lr=0.001
14 | max_lv=5
15 | frac_meta=0
16 |
17 | output_base=$dropbox/scratch/results/graph_classification/components/$save_fold
18 |
19 | output_root=$output_base/lv-${max_lv}-frac-${frac_meta}
20 |
21 | if [ ! -e $output_root ];
22 | then
23 | mkdir -p $output_root
24 | fi
25 |
26 | python dqn.py \
27 | -data_folder $data_folder \
28 | -save_dir $output_root \
29 | -max_n $max_n \
30 | -min_n $min_n \
31 | -max_lv $max_lv \
32 | -frac_meta $frac_meta \
33 | -min_c $min_c \
34 | -max_c $max_c \
35 | -n_graphs 5000 \
36 | -er_p $p \
37 | -learning_rate $lr \
38 | -base_model_dump $base_model_dump \
39 | -logfile $output_root/log.txt \
40 | $@
41 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_ga.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | dropbox=../../dropbox
4 |
5 | min_n=$1
6 | max_n=$2
7 | p=$3
8 | min_c=1
9 | max_c=3
10 | base_lv=$4
11 | data_folder=$dropbox/data/components
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${base_lv}
13 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
14 |
15 | idx_start=0
16 | num=2000
17 | pop=50
18 | cross=0.1
19 | mutate=0.2
20 | rounds=10
21 |
22 | output_base=$HOME/scratch/results/graph_classification/components/$save_fold
23 | output_root=$output_base/ga-p-${pop}-c-${cross}-m-${mutate}-r-${rounds}
24 |
25 | if [ ! -e $output_root ];
26 | then
27 | mkdir -p $output_root
28 | fi
29 |
30 | python genetic_algorithm.py \
31 | -data_folder $data_folder \
32 | -save_dir $output_root \
33 | -idx_start $idx_start \
34 | -population_size $pop \
35 | -cross_rate $cross \
36 | -mutate_rate $mutate \
37 | -rounds $rounds \
38 | -num_instances $num \
39 | -max_n $max_n \
40 | -min_n $min_n \
41 | -min_c $min_c \
42 | -max_c $max_c \
43 | -n_graphs 5000 \
44 | -er_p $p \
45 | -base_model_dump $base_model_dump \
46 | $@
47 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_grad.sh:
--------------------------------------------------------------------------------
1 | min_n=$1
2 | max_n=$2
3 | p=$3
4 | dropbox=../../dropbox/
5 | data_folder=$dropbox/data/components
6 | min_c=1
7 | max_c=3
8 | max_lv=$4
9 | rand=random
10 |
11 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
12 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
13 |
14 | output_root=./saved
15 |
16 | if [ ! -e $output_root ];
17 | then
18 | mkdir -p $output_root
19 | fi
20 |
21 | python grad_attack.py \
22 | -data_folder $data_folder \
23 | -save_dir $output_root \
24 | -max_n $max_n \
25 | -min_n $min_n \
26 | -rand_att_type $rand \
27 | -min_c $min_c \
28 | -max_c $max_c \
29 | -base_model_dump $base_model_dump \
30 | -n_graphs 5000 \
31 | -er_p $p \
32 | $@
33 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_attack/run_trivial.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | min_n=$1
4 | max_n=$2
5 | p=$3
6 | dropbox=../../dropbox/
7 | data_folder=$dropbox/data/components
8 | min_c=1
9 | max_c=3
10 | max_lv=$4
11 | # rand=exhaust
12 | rand=random
13 |
14 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
15 | base_model_dump=$dropbox/scratch/results/graph_classification/components/$save_fold/epoch-best
16 |
17 | output_root=./saved
18 |
19 | if [ ! -e $output_root ];
20 | then
21 | mkdir -p $output_root
22 | fi
23 |
24 | python er_trivial_attack.py \
25 | -data_folder $data_folder \
26 | -save_dir $output_root \
27 | -max_n $max_n \
28 | -min_n $min_n \
29 | -max_lv $max_lv \
30 | -rand_att_type $rand \
31 | -min_c $min_c \
32 | -max_c $max_c \
33 | -base_model_dump $base_model_dump \
34 | -n_graphs 5000 \
35 | -er_p $p \
36 | $@
37 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/er_components.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 | import sys
5 | import numpy as np
6 | import torch
7 | import random
8 | from torch.autograd import Variable
9 | from torch.nn.parameter import Parameter
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import torch.optim as optim
13 | from tqdm import tqdm
14 | # import cPickle as cp
15 | import pickle as cp
16 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
17 | from cmd_args import cmd_args, save_args
18 | from dnn import GraphClassifier
19 | from graph_embedding import S2VGraph
20 |
21 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__)))
22 | from data_util import load_pkl
23 |
24 | from graph_common import loop_dataset, load_er_data
25 |
26 | if __name__ == '__main__':
27 | random.seed(cmd_args.seed)
28 | np.random.seed(cmd_args.seed)
29 | torch.manual_seed(cmd_args.seed)
30 |
31 | label_map, train_glist, test_glist = load_er_data()
32 |
33 | if cmd_args.saved_model is not None and cmd_args.saved_model != '':
34 | print('loading model from %s' % cmd_args.saved_model)
35 | with open('%s-args.pkl' % cmd_args.saved_model, 'rb') as f:
36 | base_args = cp.load(f)
37 | classifier = GraphClassifier(label_map, **vars(base_args))
38 | classifier.load_state_dict(torch.load(cmd_args.saved_model + '.model'))
39 | else:
40 | classifier = GraphClassifier(label_map, **vars(cmd_args))
41 |
42 | if cmd_args.ctx == 'gpu':
43 | classifier = classifier.cuda()
44 | if cmd_args.phase == 'test':
45 | test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=101)
46 | print('\033[93maverage test: loss %.5f acc %.5f\033[0m' % (test_loss[0], test_loss[1]))
47 |
48 | if cmd_args.phase == 'train':
49 | optimizer = optim.Adam(classifier.parameters(), lr=cmd_args.learning_rate)
50 |
51 | train_idxes = list(range(len(train_glist)))
52 | best_loss = None
53 | for epoch in range(cmd_args.num_epochs):
54 | random.shuffle(train_idxes)
55 | avg_loss = loop_dataset(train_glist, classifier, train_idxes, optimizer=optimizer, epoch=epoch)
56 | print('\033[92maverage training of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, avg_loss[0], avg_loss[1]))
57 |
58 | test_loss = loop_dataset(test_glist, classifier, list(range(len(test_glist))), epoch=epoch)
59 | print('\033[93maverage test of epoch %d: loss %.5f acc %.5f\033[0m' % (epoch, test_loss[0], test_loss[1]))
60 |
61 | if best_loss is None or test_loss[0] < best_loss:
62 | best_loss = test_loss[0]
63 | print('----saving to best model since this is the best valid loss so far.----')
64 | torch.save(classifier.state_dict(), cmd_args.save_dir + '/epoch-best.model')
65 | save_args(cmd_args.save_dir + '/epoch-best-args.pkl', cmd_args)
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/graph_common.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import os
4 | import sys
5 | import numpy as np
6 | import torch
7 | import networkx as nx
8 | import random
9 | from torch.autograd import Variable
10 | from torch.nn.parameter import Parameter
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | import torch.optim as optim
14 | from tqdm import tqdm
15 | sys.path.append('%s/../common' % os.path.dirname(os.path.realpath(__file__)))
16 | from cmd_args import cmd_args
17 | from graph_embedding import S2VGraph
18 | sys.path.append('%s/../data_generator' % os.path.dirname(os.path.realpath(__file__)))
19 | from data_util import load_pkl
20 | from copy import deepcopy
21 |
22 | @torch.no_grad()
23 | def gen_adv_output(data, model, z):
24 | z = Variable(z.detach().data, requires_grad=False)
25 | model_adv = deepcopy(model)
26 | adv_optim = optim.Adam(model_adv.parameters(), lr=cmd_args.lr_inner)
27 | def closure(z):
28 | adv_optim.zero_grad()
29 | z_tmp = model_adv.forward_cl(data)
30 | loss_tmp = model_adv.loss_cl(z, z_tmp)
31 | loss_tmp.backward()
32 | torch.nn.utils.clip_grad_norm_(model_adv.parameters(), cmd_args.clip_norm)
33 | closure = torch.enable_grad()(closure)
34 | closure(z)
35 | state = dict()
36 | for i in range(2):
37 | for name, param in model_adv.named_parameters():
38 | if name.split('.')[0] != 'mlp' and name.split('.')[0] != 'projection_head':
39 | if i == 0:
40 | state[name] = torch.zeros_like(param.grad)
41 | dev = state[name] + cmd_args.lr_inner * param.grad
42 | clip_coef = cmd_args.epison / (dev.norm() + 1e-12)
43 | dev = clip_coef * dev if clip_coef < 1 else dev
44 | param.sub_(state[name]).add_(dev)
45 | state[name] = dev
46 | closure(z)
47 | z2 = model_adv.forward_cl(data)
48 | return z2
49 |
50 | def loop_dataset(g_list, classifier, sample_idxes, optimizer=None, bsize=cmd_args.batch_size, epoch=0):
51 | total_loss = []
52 | total_iters = (len(sample_idxes) + (bsize - 1) * (optimizer is None)) // bsize
53 | pbar = tqdm(range(total_iters), unit='batch')
54 |
55 | n_samples = 0
56 | for pos in pbar:
57 | selected_idx = sample_idxes[pos * bsize : (pos + 1) * bsize]
58 | batch_graph = [g_list[idx] for idx in selected_idx]
59 | if epoch <= 150:
60 | x1 = classifier.forward_cl(batch_graph)
61 | x2 = gen_adv_output(batch_graph, classifier, x1)
62 | x2 = Variable(x2.detach().data, requires_grad=False)
63 | loss = classifier.loss_cl(x1, x2)
64 | acc = torch.zeros(1)
65 | else:
66 | _, loss, acc = classifier(batch_graph)
67 | acc = acc.sum().item() / float(acc.size()[0])
68 | if optimizer is not None:
69 | optimizer.zero_grad()
70 | loss.backward()
71 | optimizer.step()
72 | loss = loss.data.cpu().numpy()
73 | pbar.set_description('loss: %0.5f acc: %0.5f' % (loss, acc))
74 | total_loss.append( np.array([loss, acc]) * len(selected_idx))
75 | n_samples += len(selected_idx)
76 | if optimizer is None:
77 | assert n_samples == len(sample_idxes)
78 | total_loss = np.array(total_loss)
79 | avg_loss = np.sum(total_loss, 0) / n_samples
80 | return avg_loss
81 |
82 | def load_er_data():
83 | frac_train = 0.9
84 | pattern = 'nrange-%d-%d-n_graph-%d-p-%.2f' % (cmd_args.min_n, cmd_args.max_n, cmd_args.n_graphs, cmd_args.er_p)
85 | num_train = int(frac_train * cmd_args.n_graphs)
86 | train_glist = []
87 | test_glist = []
88 | label_map = {}
89 | for i in range(cmd_args.min_c, cmd_args.max_c + 1):
90 | cur_list = load_pkl('%s/ncomp-%d-%s.pkl' % (cmd_args.data_folder, i, pattern), cmd_args.n_graphs)
91 | assert len(cur_list) == cmd_args.n_graphs
92 | train_glist += [S2VGraph(cur_list[j], i) for j in range(num_train)]
93 | test_glist += [S2VGraph(cur_list[j], i) for j in range(num_train, len(cur_list))]
94 | label_map[i] = i - cmd_args.min_c
95 | cmd_args.num_class = len(label_map)
96 | cmd_args.feat_dim = 1
97 | print('# train:', len(train_glist), ' # test:', len(test_glist))
98 |
99 | return label_map, train_glist, test_glist
100 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/run_er_components.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | min_n=$1
4 | max_n=$2
5 | p=$3
6 | dropbox=../../dropbox
7 | data_folder=$dropbox/data/components
8 | min_c=1
9 | max_c=3
10 | max_lv=$4
11 |
12 |
13 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
14 | output_root=../../dropbox/scratch/results/graph_classification/components/$save_fold
15 |
16 | if [ ! -e $output_root ];
17 | then
18 | mkdir -p $output_root
19 | fi
20 |
21 | python er_components.py \
22 | -data_folder $data_folder \
23 | -save_dir $output_root \
24 | -max_n $max_n \
25 | -min_n $min_n \
26 | -max_lv $max_lv \
27 | -min_c $min_c \
28 | -max_c $max_c \
29 | -n_graphs 5000 \
30 | -er_p $p \
31 | $@
32 |
--------------------------------------------------------------------------------
/adversarial_robustness/code/graph_classification/test_er_comp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | min_n=40
4 | max_n=50
5 | p=0.05
6 | dropbox=../../dropbox/
7 | data_folder=$dropbox/data/components
8 | min_c=1
9 | max_c=3
10 | max_lv=4
11 |
12 | save_fold=nodes-${min_n}-${max_n}-p-${p}-c-${min_c}-${max_c}-lv-${max_lv}
13 | output_root=$HOME/scratch/results/graph_classification/components/$save_fold
14 | saved_model=$output_root/epoch-best
15 |
16 | if [ ! -e $output_root ];
17 | then
18 | mkdir -p $output_root
19 | fi
20 |
21 | python er_components.py \
22 | -data_folder $data_folder \
23 | -save_dir $output_root \
24 | -max_n $max_n \
25 | -min_n $min_n \
26 | -max_lv $max_lv \
27 | -min_c $min_c \
28 | -max_c $max_c \
29 | -saved_model $saved_model \
30 | -n_graphs 5000 \
31 | -er_p $p \
32 | $@
33 |
--------------------------------------------------------------------------------
/semisupervised_TU/README.md:
--------------------------------------------------------------------------------
1 | ## Dependencies
2 |
3 | You can create a conda environment named simgrace with the command:
4 | ```
5 | conda env create -f environment.yml
6 | conda activate simgrace
7 | ```
8 |
9 | Then, you need to create two directories for pre-trained models and finetuned results to avoid errors:
10 |
11 | ```
12 | cd ./pre-training
13 | mkdir models
14 | mkdir logs
15 | cd ..
16 | cd ./funetuning
17 | mkdir logs
18 | cd ..
19 | ```
20 |
21 | ## SimGRACE with Perturbations of Various Magnitudes
22 |
23 | Take NCI1 as an example:
24 |
25 | ### Pre-training: ###
26 |
27 | ```
28 | cd ./pre-training
29 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 0
30 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 1
31 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 2
32 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 3
33 | CUDA_VISIBLE_DEVICES=$GPU_ID python main.py --dataset NCI1 --eta 1.0 --lr 0.001 --suffix 4
34 | ```
35 |
36 | ### Finetuning: ###
37 |
38 | ```
39 | cd ./funetuning
40 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 0
41 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 1
42 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 2
43 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 3
44 | CUDA_VISIBLE_DEVICES=$GPU_ID python main_cl.py --dataset NCI1 --eta 1.0 --semi_split 100 --model_epoch 100 --suffix 4
45 | ```
46 |
47 | Five suffixes stand for five runs (with mean & std reported), and eta could be tuned among ```0.1, 1.0, 10.0, 100.0```.
48 | ```lr``` in pre-training should be tuned from {0.01, 0.001, 0.0001} and ```model_epoch``` in finetuning (this means the epoch checkpoint loaded from pre-trained model) from {20, 40, 60, 80, 100}.
49 |
50 | ## Acknowledgements
51 | * https://github.com/Shen-Lab/GraphCL/tree/master/semisupervised_TU
52 | * https://github.com/chentingpc/gfn.
53 |
--------------------------------------------------------------------------------
/semisupervised_TU/environment.yml:
--------------------------------------------------------------------------------
1 | name: simgrace
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - ca-certificates=2020.6.24=0
7 | - certifi=2020.6.20=py36_0
8 | - ld_impl_linux-64=2.33.1=h53a641e_7
9 | - libedit=3.1.20191231=h7b6447c_0
10 | - libffi=3.3=he6710b0_1
11 | - libgcc-ng=9.1.0=hdf63c60_0
12 | - libstdcxx-ng=9.1.0=hdf63c60_0
13 | - ncurses=6.2=he6710b0_1
14 | - openssl=1.1.1g=h7b6447c_0
15 | - pip=20.1.1=py36_1
16 | - python=3.6.10=h7579374_2
17 | - readline=8.0=h7b6447c_0
18 | - setuptools=47.3.1=py36_0
19 | - sqlite=3.32.3=h62c20be_0
20 | - tk=8.6.10=hbc83047_0
21 | - wheel=0.34.2=py36_0
22 | - xz=5.2.5=h7b6447c_0
23 | - zlib=1.2.11=h7b6447c_3
24 | - pip:
25 | - decorator==4.4.2
26 | - future==0.18.2
27 | - isodate==0.6.0
28 | - joblib==0.16.0
29 | - networkx==2.4
30 | - numpy==1.19.0
31 | - pandas==1.0.5
32 | - pillow==7.2.0
33 | - plyfile==0.7.2
34 | - pyparsing==2.4.7
35 | - python-dateutil==2.8.1
36 | - pytz==2020.1
37 | - rdflib==5.0.0
38 | - scikit-learn==0.23.1
39 | - scipy==1.5.0
40 | - six==1.15.0
41 | - threadpoolctl==2.1.0
42 | - torch==1.4.0
43 | - torch-cluster==1.4.5
44 | - torch-geometric==1.1.0
45 | - torch-scatter==1.1.0
46 | - torch-sparse==0.4.4
47 | - torchvision==0.5.0
48 |
--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/datasets.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import re
3 |
4 | import torch
5 | from torch_geometric.datasets import MNISTSuperpixels
6 | from torch_geometric.utils import degree
7 | import torch_geometric.transforms as T
8 | from feature_expansion import FeatureExpander
9 | from image_dataset import ImageDataset
10 | from tu_dataset import TUDatasetExt
11 |
12 |
13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None):
14 | if root is None or root == '':
15 | path = osp.join(osp.expanduser('~'), 'pyG_data', name)
16 | else:
17 | path = osp.join(root, name)
18 | degree = feat_str.find("deg") >= 0
19 | onehot_maxdeg = re.findall("odeg(\d+)", feat_str)
20 | onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None
21 | k = re.findall("an{0,1}k(\d+)", feat_str)
22 | k = int(k[0]) if k else 0
23 | groupd = re.findall("groupd(\d+)", feat_str)
24 | groupd = int(groupd[0]) if groupd else 0
25 | remove_edges = re.findall("re(\w+)", feat_str)
26 | remove_edges = remove_edges[0] if remove_edges else 'none'
27 | edge_noises_add = re.findall("randa([\d\.]+)", feat_str)
28 | edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0
29 | edge_noises_delete = re.findall("randd([\d\.]+)", feat_str)
30 | edge_noises_delete = float(
31 | edge_noises_delete[0]) if edge_noises_delete else 0
32 | centrality = feat_str.find("cent") >= 0
33 | coord = feat_str.find("coord") >= 0
34 |
35 | pre_transform = FeatureExpander(
36 | degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k,
37 | centrality=centrality, remove_edges=remove_edges,
38 | edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete,
39 | group_degree=groupd).transform
40 |
41 | if 'MNIST' in name or 'CIFAR' in name:
42 | if name == 'MNIST_SUPERPIXEL':
43 | train_dataset = MNISTSuperpixels(path, True,
44 | pre_transform=pre_transform, transform=T.Cartesian())
45 | test_dataset = MNISTSuperpixels(path, False,
46 | pre_transform=pre_transform, transform=T.Cartesian())
47 | else:
48 | train_dataset = ImageDataset(path, name, True,
49 | pre_transform=pre_transform, coord=coord,
50 | processed_file_prefix="data_%s" % feat_str)
51 | test_dataset = ImageDataset(path, name, False,
52 | pre_transform=pre_transform, coord=coord,
53 | processed_file_prefix="data_%s" % feat_str)
54 | dataset = (train_dataset, test_dataset)
55 | else:
56 | dataset = TUDatasetExt(
57 | path, name, pre_transform=pre_transform,
58 | use_node_attr=True, processed_filename="data_%s.pt" % feat_str)
59 |
60 | dataset.data.edge_attr = None
61 |
62 | return dataset
63 |
--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/gcn_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | from torch_scatter import scatter_add
4 | from torch_geometric.nn.conv import MessagePassing
5 | from torch_geometric.utils import remove_self_loops, add_self_loops
6 | from torch_geometric.nn.inits import glorot, zeros
7 |
8 |
9 | class GCNConv(MessagePassing):
10 | r"""The graph convolutional operator from the `"Semi-supervised
11 | Classfication with Graph Convolutional Networks"
12 | `_ paper
13 |
14 | .. math::
15 | \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
16 | \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},
17 |
18 | where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
19 | adjacency matrix with inserted self-loops and
20 | :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.
21 |
22 | Args:
23 | in_channels (int): Size of each input sample.
24 | out_channels (int): Size of each output sample.
25 | improved (bool, optional): If set to :obj:`True`, the layer computes
26 | :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
27 | (default: :obj:`False`)
28 | cached (bool, optional): If set to :obj:`True`, the layer will cache
29 | the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
30 | \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`.
31 | (default: :obj:`False`)
32 | bias (bool, optional): If set to :obj:`False`, the layer will not learn
33 | an additive bias. (default: :obj:`True`)
34 | edge_norm (bool, optional): whether or not to normalize adj matrix.
35 | (default: :obj:`True`)
36 | gfn (bool, optional): If `True`, only linear transform (1x1 conv) is
37 | applied to every nodes. (default: :obj:`False`)
38 | """
39 |
40 | def __init__(self,
41 | in_channels,
42 | out_channels,
43 | improved=False,
44 | cached=False,
45 | bias=True,
46 | edge_norm=True,
47 | gfn=False):
48 | super(GCNConv, self).__init__('add')
49 |
50 | self.in_channels = in_channels
51 | self.out_channels = out_channels
52 | self.improved = improved
53 | self.cached = cached
54 | self.cached_result = None
55 | self.edge_norm = edge_norm
56 | self.gfn = gfn
57 |
58 | self.weight = Parameter(torch.Tensor(in_channels, out_channels))
59 |
60 | if bias:
61 | self.bias = Parameter(torch.Tensor(out_channels))
62 | else:
63 | self.register_parameter('bias', None)
64 |
65 | self.reset_parameters()
66 |
67 | def reset_parameters(self):
68 | glorot(self.weight)
69 | zeros(self.bias)
70 | self.cached_result = None
71 |
72 | @staticmethod
73 | def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
74 | if edge_weight is None:
75 | edge_weight = torch.ones((edge_index.size(1), ),
76 | dtype=dtype,
77 | device=edge_index.device)
78 | edge_weight = edge_weight.view(-1)
79 | assert edge_weight.size(0) == edge_index.size(1)
80 |
81 | edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
82 | edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
83 | # Add edge_weight for loop edges.
84 | loop_weight = torch.full((num_nodes, ),
85 | 1 if not improved else 2,
86 | dtype=edge_weight.dtype,
87 | device=edge_weight.device)
88 | edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
89 |
90 | row, col = edge_index
91 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
92 | deg_inv_sqrt = deg.pow(-0.5)
93 | deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
94 |
95 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
96 |
97 | def forward(self, x, edge_index, edge_weight=None):
98 | """"""
99 | x = torch.matmul(x, self.weight)
100 | if self.gfn:
101 | return x
102 |
103 | if not self.cached or self.cached_result is None:
104 | if self.edge_norm:
105 | edge_index, norm = GCNConv.norm(
106 | edge_index, x.size(0), edge_weight, self.improved, x.dtype)
107 | else:
108 | norm = None
109 | self.cached_result = edge_index, norm
110 |
111 | edge_index, norm = self.cached_result
112 | return self.propagate(edge_index, x=x, norm=norm)
113 |
114 | def message(self, x_j, norm):
115 | if self.edge_norm:
116 | return norm.view(-1, 1) * x_j
117 | else:
118 | return x_j
119 |
120 | def update(self, aggr_out):
121 | if self.bias is not None:
122 | aggr_out = aggr_out + self.bias
123 | return aggr_out
124 |
125 | def __repr__(self):
126 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
127 | self.out_channels)
128 |
--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/image_dataset.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 |
4 | import torch
5 | import torchvision
6 | from torchvision import datasets, transforms
7 | from torch_geometric.data import InMemoryDataset, Data
8 |
9 |
10 | class ImageDataset(InMemoryDataset):
11 | def __init__(self,
12 | root,
13 | name,
14 | train=True,
15 | transform=None,
16 | pre_transform=None,
17 | pre_filter=None,
18 | coord=False,
19 | processed_file_prefix='data'):
20 | assert name in ['MNIST', 'CIFAR10'], "Unsupported data name %s" % name
21 | self.name = name
22 | self.coord = coord
23 | self.processed_file_prefix = processed_file_prefix
24 | self.traindata = None
25 | self.testdata = None
26 | super(ImageDataset, self).__init__(
27 | root, transform, pre_transform, pre_filter)
28 | path = self.processed_paths[0] if train else self.processed_paths[1]
29 | self.data, self.slices = torch.load(path)
30 |
31 | @property
32 | def raw_file_names(self):
33 | if self.name == 'MNIST':
34 | return ['t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte',
35 | 'train-images-idx3-ubyte', 'train-labels-idx1-ubyte']
36 | elif self.name == 'CIFAR10':
37 | return ['data_batch_1', 'data_batch_2', 'data_batch_3',
38 | 'data_batch_4', 'data_batch_5', 'test_batch']
39 |
40 | @property
41 | def processed_file_names(self):
42 | return ['%s_training.pt' % self.processed_file_prefix,
43 | '%s_test.pt' % self.processed_file_prefix]
44 |
45 | def download(self):
46 | transform = transforms.ToTensor()
47 | if self.name == 'CIFAR10':
48 | data_train = datasets.CIFAR10(root=self.raw_dir,
49 | transform=transform,
50 | train=True,
51 | download=True)
52 | data_test = datasets.CIFAR10(root=self.raw_dir,
53 | transform=transform,
54 | train=False,
55 | download=True)
56 | elif self.name == 'MNIST':
57 | data_train = datasets.MNIST(root=self.raw_dir,
58 | transform=transform,
59 | train=True,
60 | download=True)
61 | data_test = datasets.MNIST(root=self.raw_dir,
62 | transform=transform,
63 | train=False,
64 | download=True)
65 | else:
66 | raise ValueError("Unknown data name {}".format(self.name))
67 | self.traindata = data_train
68 | self.testdata = data_test
69 |
70 | def process(self):
71 | trainLoader = torch.utils.data.DataLoader(self.traindata)
72 | testLoader = torch.utils.data.DataLoader(self.testdata)
73 | if self.name == 'MNIST':
74 | num_row, num_col = 28, 28
75 | elif self.name == 'CIFAR10':
76 | num_row, num_col = 32, 32
77 | else:
78 | raise ValueError('dataset error')
79 | num_edges = (3 * num_row - 2) * (3 * num_col - 2)
80 | edge_index_array = np.zeros(shape=[2, num_edges])
81 | edge_attr_array = np.zeros(shape=[1, num_edges])
82 | curt = 0
83 | for j in range(num_row):
84 | for k in range(num_col):
85 | for m in range(max(j-1, 0), min(j+1, num_row-1)+1):
86 | for n in range(max(k-1, 0), min(k+1, num_col-1)+1):
87 | edge_index_array[0][curt] = j * num_row + k
88 | edge_index_array[1][curt] = m * num_row + n
89 | edge_attr_array[0][curt] = self.weight(j, k, m, n)
90 | curt += 1
91 | edge_index = torch.from_numpy(edge_index_array).to(torch.int64)
92 | edge_attr = torch.from_numpy(edge_attr_array).to(torch.float)
93 |
94 | def transform_data(data_loader, edge_index, edge_attr):
95 | data_list = []
96 | channel, num_row, num_col = data_loader.dataset[0][0].size()
97 | if self.coord:
98 | x = torch.arange(num_col, dtype=torch.float)
99 | x = x.view((1, -1)).repeat(num_row, 1).view((-1, 1)) - x.mean()
100 | y = torch.arange(num_row, dtype=torch.float)
101 | y = y.view((-1, 1)).repeat(1, num_col).view((-1, 1)) - y.mean()
102 | coord = torch.cat([x, y], -1)
103 |
104 | for image, label in iter(data_loader):
105 | x = image[0].permute([1,2,0]).view(
106 | num_row * num_col, image[0].size()[0])
107 | if self.coord:
108 | x = torch.cat([x, coord], -1)
109 | data = Data(
110 | edge_index=edge_index, edge_attr=edge_attr, x=x, y=label)
111 | if self.pre_filter is not None:
112 | data = self.pre_filter(data)
113 | if self.pre_transform is not None:
114 | data = self.pre_transform(data)
115 | data_list.append(data)
116 | return data_list
117 |
118 | train_data_list = transform_data(trainLoader, edge_index, edge_attr)
119 | torch.save(self.collate(train_data_list), self.processed_paths[0])
120 |
121 | test_data_list = transform_data(testLoader, edge_index, edge_attr)
122 | torch.save(self.collate(test_data_list), self.processed_paths[1])
123 |
124 | @staticmethod
125 | def weight(pos_x, pos_y, pos_x_new, pos_y_new):
126 | dist = (pos_x - pos_x_new) ** 2 + (pos_y - pos_y_new) ** 2
127 | return math.exp(-dist)
128 |
129 | def __repr__(self):
130 | return '{}({})'.format(self.name, len(self))
131 |
132 |
--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/tu_dataset.py:
--------------------------------------------------------------------------------
1 | from torch_geometric.datasets import TUDataset
2 | import torch
3 | from itertools import repeat, product
4 | from copy import deepcopy
5 |
6 |
7 |
8 | class TUDatasetExt(TUDataset):
9 | r"""A variety of graph kernel benchmark datasets, *.e.g.* "IMDB-BINARY",
10 | "REDDIT-BINARY" or "PROTEINS", collected from the `TU Dortmund University
11 | `_.
12 |
13 | Args:
14 | root (string): Root directory where the dataset should be saved.
15 | name (string): The `name `_ of
16 | the dataset.
17 | transform (callable, optional): A function/transform that takes in an
18 | :obj:`torch_geometric.data.Data` object and returns a transformed
19 | version. The data object will be transformed before every access.
20 | (default: :obj:`None`)
21 | pre_transform (callable, optional): A function/transform that takes in
22 | an :obj:`torch_geometric.data.Data` object and returns a
23 | transformed version. The data object will be transformed before
24 | being saved to disk. (default: :obj:`None`)
25 | pre_filter (callable, optional): A function that takes in an
26 | :obj:`torch_geometric.data.Data` object and returns a boolean
27 | value, indicating whether the data object should be included in the
28 | final dataset. (default: :obj:`None`)
29 | use_node_attr (bool, optional): If :obj:`True`, the dataset will
30 | contain additional continuous node features (if present).
31 | (default: :obj:`False`)
32 | """
33 |
34 | url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/' \
35 | 'graphkerneldatasets'
36 |
37 | def __init__(self,
38 | root,
39 | name,
40 | transform=None,
41 | pre_transform=None,
42 | pre_filter=None,
43 | use_node_attr=False,
44 | processed_filename='data.pt'):
45 | self.processed_filename = processed_filename
46 | super(TUDatasetExt, self).__init__(root, name, transform, pre_transform,
47 | pre_filter, use_node_attr)
48 |
49 | @property
50 | def processed_file_names(self):
51 | return self.processed_filename
52 |
53 |
--------------------------------------------------------------------------------
/semisupervised_TU/finetuning/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | def print_weights(model):
4 | for name, param in model.named_parameters():
5 | if param.requires_grad:
6 | print(name, param.shape)
7 | sys.stdout.flush()
8 |
9 |
10 | def logger(info):
11 | fold, epoch = info['fold'], info['epoch']
12 | if epoch == 1 or epoch % 10 == 0:
13 | train_acc, test_acc = info['train_acc'], info['test_acc']
14 | print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format(
15 | fold, epoch, train_acc, test_acc))
16 | sys.stdout.flush()
17 |
18 |
19 |
--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/datasets.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import re
3 |
4 | import torch
5 | from torch_geometric.datasets import MNISTSuperpixels
6 | from torch_geometric.utils import degree
7 | import torch_geometric.transforms as T
8 | from feature_expansion import FeatureExpander
9 | from image_dataset import ImageDataset
10 | from tu_dataset import TUDatasetExt
11 |
12 |
13 | def get_dataset(name, sparse=True, feat_str="deg+ak3+reall", root=None, aug=None, aug_ratio=None):
14 | if root is None or root == '':
15 | path = osp.join(osp.expanduser('~'), 'pyG_data', name)
16 | else:
17 | path = osp.join(root, name)
18 | degree = feat_str.find("deg") >= 0
19 | onehot_maxdeg = re.findall("odeg(\d+)", feat_str)
20 | onehot_maxdeg = int(onehot_maxdeg[0]) if onehot_maxdeg else None
21 | k = re.findall("an{0,1}k(\d+)", feat_str)
22 | k = int(k[0]) if k else 0
23 | groupd = re.findall("groupd(\d+)", feat_str)
24 | groupd = int(groupd[0]) if groupd else 0
25 | remove_edges = re.findall("re(\w+)", feat_str)
26 | remove_edges = remove_edges[0] if remove_edges else 'none'
27 | edge_noises_add = re.findall("randa([\d\.]+)", feat_str)
28 | edge_noises_add = float(edge_noises_add[0]) if edge_noises_add else 0
29 | edge_noises_delete = re.findall("randd([\d\.]+)", feat_str)
30 | edge_noises_delete = float(
31 | edge_noises_delete[0]) if edge_noises_delete else 0
32 | centrality = feat_str.find("cent") >= 0
33 | coord = feat_str.find("coord") >= 0
34 |
35 | pre_transform = FeatureExpander(
36 | degree=degree, onehot_maxdeg=onehot_maxdeg, AK=k,
37 | centrality=centrality, remove_edges=remove_edges,
38 | edge_noises_add=edge_noises_add, edge_noises_delete=edge_noises_delete,
39 | group_degree=groupd).transform
40 |
41 | print(aug, aug_ratio)
42 | if 'MNIST' in name or 'CIFAR' in name:
43 | if name == 'MNIST_SUPERPIXEL':
44 | train_dataset = MNISTSuperpixels(path, True,
45 | pre_transform=pre_transform, transform=T.Cartesian())
46 | test_dataset = MNISTSuperpixels(path, False,
47 | pre_transform=pre_transform, transform=T.Cartesian())
48 | else:
49 | train_dataset = ImageDataset(path, name, True,
50 | pre_transform=pre_transform, coord=coord,
51 | processed_file_prefix="data_%s" % feat_str)
52 | test_dataset = ImageDataset(path, name, False,
53 | pre_transform=pre_transform, coord=coord,
54 | processed_file_prefix="data_%s" % feat_str)
55 | dataset = (train_dataset, test_dataset)
56 | else:
57 | dataset = TUDatasetExt(
58 | path, name, pre_transform=pre_transform,
59 | use_node_attr=True, processed_filename="data_%s.pt" % feat_str, aug=aug, aug_ratio=aug_ratio)
60 |
61 | dataset.data.edge_attr = None
62 |
63 | return dataset
64 |
--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/gcn_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn import Parameter
3 | from torch_scatter import scatter_add
4 | from torch_geometric.nn.conv import MessagePassing
5 | from torch_geometric.utils import remove_self_loops, add_self_loops
6 | from torch_geometric.nn.inits import glorot, zeros
7 |
8 |
9 | class GCNConv(MessagePassing):
10 | r"""The graph convolutional operator from the `"Semi-supervised
11 | Classfication with Graph Convolutional Networks"
12 | `_ paper
13 |
14 | .. math::
15 | \mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
16 | \mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta},
17 |
18 | where :math:`\mathbf{\hat{A}} = \mathbf{A} + \mathbf{I}` denotes the
19 | adjacency matrix with inserted self-loops and
20 | :math:`\hat{D}_{ii} = \sum_{j=0} \hat{A}_{ij}` its diagonal degree matrix.
21 |
22 | Args:
23 | in_channels (int): Size of each input sample.
24 | out_channels (int): Size of each output sample.
25 | improved (bool, optional): If set to :obj:`True`, the layer computes
26 | :math:`\mathbf{\hat{A}}` as :math:`\mathbf{A} + 2\mathbf{I}`.
27 | (default: :obj:`False`)
28 | cached (bool, optional): If set to :obj:`True`, the layer will cache
29 | the computation of :math:`{\left(\mathbf{\hat{D}}^{-1/2}
30 | \mathbf{\hat{A}} \mathbf{\hat{D}}^{-1/2} \right)}`.
31 | (default: :obj:`False`)
32 | bias (bool, optional): If set to :obj:`False`, the layer will not learn
33 | an additive bias. (default: :obj:`True`)
34 | edge_norm (bool, optional): whether or not to normalize adj matrix.
35 | (default: :obj:`True`)
36 | gfn (bool, optional): If `True`, only linear transform (1x1 conv) is
37 | applied to every nodes. (default: :obj:`False`)
38 | """
39 |
40 | def __init__(self,
41 | in_channels,
42 | out_channels,
43 | improved=False,
44 | cached=False,
45 | bias=True,
46 | edge_norm=True,
47 | gfn=False):
48 | super(GCNConv, self).__init__('add')
49 |
50 | self.in_channels = in_channels
51 | self.out_channels = out_channels
52 | self.improved = improved
53 | self.cached = cached
54 | self.cached_result = None
55 | self.edge_norm = edge_norm
56 | self.gfn = gfn
57 |
58 | self.weight = Parameter(torch.Tensor(in_channels, out_channels))
59 |
60 | if bias:
61 | self.bias = Parameter(torch.Tensor(out_channels))
62 | else:
63 | self.register_parameter('bias', None)
64 |
65 | self.reset_parameters()
66 |
67 | def reset_parameters(self):
68 | glorot(self.weight)
69 | zeros(self.bias)
70 | self.cached_result = None
71 |
72 | @staticmethod
73 | def norm(edge_index, num_nodes, edge_weight, improved=False, dtype=None):
74 | if edge_weight is None:
75 | edge_weight = torch.ones((edge_index.size(1), ),
76 | dtype=dtype,
77 | device=edge_index.device)
78 | edge_weight = edge_weight.view(-1)
79 | assert edge_weight.size(0) == edge_index.size(1)
80 |
81 | edge_index, edge_weight = remove_self_loops(edge_index, edge_weight)
82 | edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
83 | # Add edge_weight for loop edges.
84 | loop_weight = torch.full((num_nodes, ),
85 | 1 if not improved else 2,
86 | dtype=edge_weight.dtype,
87 | device=edge_weight.device)
88 | edge_weight = torch.cat([edge_weight, loop_weight], dim=0)
89 |
90 | row, col = edge_index
91 | deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
92 | deg_inv_sqrt = deg.pow(-0.5)
93 | deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
94 |
95 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
96 |
97 | def forward(self, x, edge_index, edge_weight=None):
98 | """"""
99 | x = torch.matmul(x, self.weight)
100 | if self.gfn:
101 | return x
102 |
103 | if not self.cached or self.cached_result is None:
104 | if self.edge_norm:
105 | edge_index, norm = GCNConv.norm(
106 | edge_index, x.size(0), edge_weight, self.improved, x.dtype)
107 | else:
108 | norm = None
109 | self.cached_result = edge_index, norm
110 |
111 | edge_index, norm = self.cached_result
112 | return self.propagate(edge_index, x=x, norm=norm)
113 |
114 | def message(self, x_j, norm):
115 | if self.edge_norm:
116 | return norm.view(-1, 1) * x_j
117 | else:
118 | return x_j
119 |
120 | def update(self, aggr_out):
121 | if self.bias is not None:
122 | aggr_out = aggr_out + self.bias
123 | return aggr_out
124 |
125 | def __repr__(self):
126 | return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
127 | self.out_channels)
128 |
--------------------------------------------------------------------------------
/semisupervised_TU/pre-training/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | def print_weights(model):
4 | for name, param in model.named_parameters():
5 | if param.requires_grad:
6 | print(name, param.shape)
7 | sys.stdout.flush()
8 |
9 |
10 | def logger(info):
11 | fold, epoch = info['fold'], info['epoch']
12 | if epoch == 1 or epoch % 10 == 0:
13 | train_acc, test_acc = info['train_acc'], info['test_acc']
14 | print('{:02d}/{:03d}: Train Acc: {:.3f}, Test Accuracy: {:.3f}'.format(
15 | fold, epoch, train_acc, test_acc))
16 | sys.stdout.flush()
17 |
18 |
19 |
--------------------------------------------------------------------------------
/simgrace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/simgrace.png
--------------------------------------------------------------------------------
/transfer_learning/README.md:
--------------------------------------------------------------------------------
1 | ## Dependencies & Dataset
2 |
3 | Please refer to https://github.com/snap-stanford/pretrain-gnns#installation for environment setup and https://github.com/snap-stanford/pretrain-gnns#dataset-download to download dataset.
4 |
5 | ## Training & Evaluation
6 | ### Step 1: Pre-training: ###
7 | ```
8 | cd ./bio
9 | python pretrain_simgrace.py --eta 0.1
10 | cd ./chem
11 | python pretrain_simgrace.py --eta 0.1
12 | ```
13 | ### Step 2: Finetuning: ###
14 | ```
15 | cd ./bio
16 | ./finetune.sh
17 | cd ./chem
18 | ./run.sh
19 | ```
20 | Results will be recorded in ```result.log```.
21 |
22 |
23 | ## Acknowledgements
24 |
25 | * https://github.com/snap-stanford/pretrain-gnns.
26 | * https://github.com/Shen-Lab/GraphCL/tree/master/transferLearning_MoleculeNet_PPI.
27 |
--------------------------------------------------------------------------------
/transfer_learning/bio/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from torch.utils.data.dataloader import default_collate
3 |
4 | from batch import BatchFinetune, BatchMasking, BatchAE, BatchSubstructContext
5 |
6 | class DataLoaderFinetune(torch.utils.data.DataLoader):
7 | r"""Data loader which merges data objects from a
8 | :class:`torch_geometric.data.dataset` to a mini-batch.
9 | Args:
10 | dataset (Dataset): The dataset from which to load the data.
11 | batch_size (int, optional): How may samples per batch to load.
12 | (default: :obj:`1`)
13 | shuffle (bool, optional): If set to :obj:`True`, the data will be
14 | reshuffled at every epoch (default: :obj:`True`)
15 | """
16 |
17 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
18 | super(DataLoaderFinetune, self).__init__(
19 | dataset,
20 | batch_size,
21 | shuffle,
22 | collate_fn=lambda data_list: BatchFinetune.from_data_list(data_list),
23 | **kwargs)
24 |
25 | class DataLoaderMasking(torch.utils.data.DataLoader):
26 | r"""Data loader which merges data objects from a
27 | :class:`torch_geometric.data.dataset` to a mini-batch.
28 | Args:
29 | dataset (Dataset): The dataset from which to load the data.
30 | batch_size (int, optional): How may samples per batch to load.
31 | (default: :obj:`1`)
32 | shuffle (bool, optional): If set to :obj:`True`, the data will be
33 | reshuffled at every epoch (default: :obj:`True`)
34 | """
35 |
36 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
37 | super(DataLoaderMasking, self).__init__(
38 | dataset,
39 | batch_size,
40 | shuffle,
41 | collate_fn=lambda data_list: BatchMasking.from_data_list(data_list),
42 | **kwargs)
43 |
44 |
45 | class DataLoaderAE(torch.utils.data.DataLoader):
46 | r"""Data loader which merges data objects from a
47 | :class:`torch_geometric.data.dataset` to a mini-batch.
48 | Args:
49 | dataset (Dataset): The dataset from which to load the data.
50 | batch_size (int, optional): How may samples per batch to load.
51 | (default: :obj:`1`)
52 | shuffle (bool, optional): If set to :obj:`True`, the data will be
53 | reshuffled at every epoch (default: :obj:`True`)
54 | """
55 |
56 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
57 | super(DataLoaderAE, self).__init__(
58 | dataset,
59 | batch_size,
60 | shuffle,
61 | collate_fn=lambda data_list: BatchAE.from_data_list(data_list),
62 | **kwargs)
63 |
64 |
65 | class DataLoaderSubstructContext(torch.utils.data.DataLoader):
66 | r"""Data loader which merges data objects from a
67 | :class:`torch_geometric.data.dataset` to a mini-batch.
68 | Args:
69 | dataset (Dataset): The dataset from which to load the data.
70 | batch_size (int, optional): How may samples per batch to load.
71 | (default: :obj:`1`)
72 | shuffle (bool, optional): If set to :obj:`True`, the data will be
73 | reshuffled at every epoch (default: :obj:`True`)
74 | """
75 |
76 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
77 | super(DataLoaderSubstructContext, self).__init__(
78 | dataset,
79 | batch_size,
80 | shuffle,
81 | collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list),
82 | **kwargs)
83 |
84 |
85 |
86 |
--------------------------------------------------------------------------------
/transfer_learning/bio/finetune.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | split=species
3 | ### for GIN
4 | for runseed in 0 1 2 3 4 5 6 7 8 9
5 | do
6 | python finetune.py --model_file models_simgrace/simgcl_80.pth --split $split --epochs 10 --device 0 --runseed $runseed --gnn_type gin --lr 1e-3
7 | done
8 |
--------------------------------------------------------------------------------
/transfer_learning/bio/finetune_tune.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | runseed=$1
3 | device=$2
4 | split=species
5 |
6 | ### for GIN
7 | for unsup in contextpred infomax edgepred masking
8 | do
9 | model_file=${unsup}
10 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin
11 |
12 | model_file=supervised_${unsup}
13 | python finetune.py --model_file model_gin/${model_file}.pth --split $split --filename gin_${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type gin
14 | done
15 |
16 | python finetune.py --split $split --filename gin_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type gin
17 | python finetune.py --split $split --model_file model_gin/supervised.pth --filename gin_supervised --epochs 50 --device $device --runseed $runseed --gnn_type gin
18 |
19 |
20 | ### for other GNNs
21 | for gnn_type in gcn gat graphsage
22 | do
23 | python finetune.py --split $split --filename ${gnn_type}_nopretrain --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type
24 |
25 | model_file=${gnn_type}_supervised_masking
26 | python finetune.py --model_file model_architecture/${model_file}.pth --split $split --filename ${model_file} --epochs 50 --device $device --runseed $runseed --gnn_type $gnn_type
27 |
28 | done
--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_100.pth
--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_20.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_20.pth
--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_40.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_40.pth
--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_60.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_60.pth
--------------------------------------------------------------------------------
/transfer_learning/bio/models_simgrace/simgrace_80.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/bio/models_simgrace/simgrace_80.pth
--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_deepgraphinfomax.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import BioDataset
4 | from torch_geometric.data import DataLoader
5 | from torch_geometric.nn.inits import uniform
6 | from torch_geometric.nn import global_mean_pool
7 |
8 | import torch
9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | import torch.optim as optim
12 |
13 | from tqdm import tqdm
14 | import numpy as np
15 |
16 | from model import GNN
17 | from sklearn.metrics import roc_auc_score
18 |
19 | import pandas as pd
20 |
21 |
22 | def cycle_index(num, shift):
23 | arr = torch.arange(num) + shift
24 | arr[-shift:] = torch.arange(shift)
25 | return arr
26 |
27 | class Discriminator(nn.Module):
28 | def __init__(self, hidden_dim):
29 | super(Discriminator, self).__init__()
30 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
31 | self.reset_parameters()
32 |
33 | def reset_parameters(self):
34 | size = self.weight.size(0)
35 | uniform(size, self.weight)
36 |
37 | def forward(self, x, summary):
38 | h = torch.matmul(summary, self.weight)
39 | return torch.sum(x*h, dim = 1)
40 |
41 | class Infomax(nn.Module):
42 | def __init__(self, gnn, discriminator):
43 | super(Infomax, self).__init__()
44 | self.gnn = gnn
45 | self.discriminator = discriminator
46 | self.loss = nn.BCEWithLogitsLoss()
47 | self.pool = global_mean_pool
48 |
49 |
50 | def train(args, model, device, loader, optimizer):
51 | model.train()
52 |
53 | train_acc_accum = 0
54 | train_loss_accum = 0
55 |
56 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
57 | batch = batch.to(device)
58 | node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr)
59 | summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch))
60 |
61 | positive_expanded_summary_emb = summary_emb[batch.batch]
62 |
63 | shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)]
64 | negative_expanded_summary_emb = shifted_summary_emb[batch.batch]
65 |
66 | positive_score = model.discriminator(node_emb, positive_expanded_summary_emb)
67 | negative_score = model.discriminator(node_emb, negative_expanded_summary_emb)
68 |
69 | optimizer.zero_grad()
70 | loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score))
71 | loss.backward()
72 |
73 | optimizer.step()
74 |
75 | train_loss_accum += float(loss.detach().cpu().item())
76 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
77 | train_acc_accum += float(acc.detach().cpu().item())
78 |
79 | return train_acc_accum/(step+1), train_loss_accum/(step+1)
80 |
81 |
82 | def main():
83 | # Training settings
84 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
85 | parser.add_argument('--device', type=int, default=0,
86 | help='which gpu to use if any (default: 0)')
87 | parser.add_argument('--batch_size', type=int, default=256,
88 | help='input batch size for training (default: 256)')
89 | parser.add_argument('--epochs', type=int, default=100,
90 | help='number of epochs to train (default: 100)')
91 | parser.add_argument('--lr', type=float, default=0.001,
92 | help='learning rate (default: 0.001)')
93 | parser.add_argument('--decay', type=float, default=0,
94 | help='weight decay (default: 0)')
95 | parser.add_argument('--num_layer', type=int, default=5,
96 | help='number of GNN message passing layers (default: 5).')
97 | parser.add_argument('--emb_dim', type=int, default=300,
98 | help='embedding dimensions (default: 300)')
99 | parser.add_argument('--dropout_ratio', type=float, default=0,
100 | help='dropout ratio (default: 0)')
101 | parser.add_argument('--JK', type=str, default="last",
102 | help='how the node features across layers are combined. last, sum, max or concat')
103 | parser.add_argument('--gnn_type', type=str, default="gin")
104 | parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model')
105 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
106 | parser.add_argument('--num_workers', type=int, default = 4, help='number of workers for dataset loading')
107 | args = parser.parse_args()
108 |
109 |
110 | torch.manual_seed(0)
111 | np.random.seed(0)
112 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
113 | if torch.cuda.is_available():
114 | torch.cuda.manual_seed_all(0)
115 |
116 | #set up dataset
117 | root_unsupervised = 'dataset/unsupervised'
118 | dataset = BioDataset(root_unsupervised, data_type='unsupervised')
119 |
120 | print(dataset)
121 |
122 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
123 |
124 | #set up model
125 | gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
126 |
127 | discriminator = Discriminator(args.emb_dim)
128 |
129 | model = Infomax(gnn, discriminator)
130 |
131 | model.to(device)
132 |
133 | #set up optimizer
134 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
135 | print(optimizer)
136 |
137 |
138 | for epoch in range(1, args.epochs+1):
139 | print("====epoch " + str(epoch))
140 |
141 | train_acc, train_loss = train(args, model, device, loader, optimizer)
142 |
143 | print(train_acc)
144 | print(train_loss)
145 |
146 |
147 | if not args.model_file == "":
148 | torch.save(model.gnn.state_dict(), args.model_file + ".pth")
149 |
150 | if __name__ == "__main__":
151 | main()
152 |
--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_edgepred.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import BioDataset
4 | from dataloader import DataLoaderAE
5 | from util import NegativeEdge
6 |
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | import torch.optim as optim
11 |
12 | from tqdm import tqdm
13 | import numpy as np
14 |
15 | from model import GNN
16 | import pandas as pd
17 |
18 | criterion = nn.BCEWithLogitsLoss()
19 |
20 | def train(args, model, device, loader, optimizer):
21 | model.train()
22 |
23 | train_acc_accum = 0
24 | train_loss_accum = 0
25 |
26 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
27 | batch = batch.to(device)
28 | node_emb = model(batch.x, batch.edge_index, batch.edge_attr)
29 |
30 | positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1)
31 | negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1)
32 |
33 | optimizer.zero_grad()
34 | loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score))
35 | loss.backward()
36 | optimizer.step()
37 |
38 | train_loss_accum += float(loss.detach().cpu().item())
39 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
40 | train_acc_accum += float(acc.detach().cpu().item())
41 |
42 | return train_acc_accum/(step+1), train_loss_accum/(step + 1)
43 |
44 |
45 | def main():
46 | # Training settings
47 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
48 | parser.add_argument('--device', type=int, default=0,
49 | help='which gpu to use if any (default: 0)')
50 | parser.add_argument('--batch_size', type=int, default=256,
51 | help='input batch size for training (default: 256)')
52 | parser.add_argument('--epochs', type=int, default=100,
53 | help='number of epochs to train (default: 100)')
54 | parser.add_argument('--lr', type=float, default=0.001,
55 | help='learning rate (default: 0.001)')
56 | parser.add_argument('--decay', type=float, default=0,
57 | help='weight decay (default: 0)')
58 | parser.add_argument('--num_layer', type=int, default=5,
59 | help='number of GNN message passing layers (default: 5).')
60 | parser.add_argument('--emb_dim', type=int, default=300,
61 | help='embedding dimensions (default: 300)')
62 | parser.add_argument('--dropout_ratio', type=float, default=0,
63 | help='dropout ratio (default: 0)')
64 | parser.add_argument('--JK', type=str, default="last",
65 | help='how the node features across layers are combined. last, sum, max or concat')
66 | parser.add_argument('--gnn_type', type=str, default="gin")
67 | parser.add_argument('--model_file', type = str, default = '', help='filename to output the pre-trained model')
68 | parser.add_argument('--num_workers', type=int, default = 12, help='number of workers for dataset loading')
69 | args = parser.parse_args()
70 |
71 |
72 | torch.manual_seed(0)
73 | np.random.seed(0)
74 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
75 | if torch.cuda.is_available():
76 | torch.cuda.manual_seed_all(0)
77 |
78 | #set up dataset
79 | root_unsupervised = 'dataset/unsupervised'
80 | dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = NegativeEdge())
81 | dataset.data.to(device)
82 |
83 | print(dataset)
84 |
85 | loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
86 |
87 | #set up model
88 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device)
89 |
90 | model.to(device)
91 |
92 | #set up optimizer
93 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
94 | #optimizer = optim.Adam(model.graph_pred_linear.parameters(), lr=args.lr, weight_decay=args.decay)
95 | print(optimizer)
96 |
97 |
98 | for epoch in range(1, args.epochs+1):
99 | print("====epoch " + str(epoch))
100 |
101 | train_acc, train_loss = train(args, model, device, loader, optimizer)
102 |
103 | print(train_acc)
104 | print(train_loss)
105 |
106 | if not args.model_file == "":
107 | torch.save(model.state_dict(), args.model_file + ".pth")
108 |
109 |
110 |
111 | if __name__ == "__main__":
112 | main()
--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_masking.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import BioDataset
4 | from dataloader import DataLoaderMasking
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import torch.optim as optim
10 |
11 | from tqdm import tqdm
12 | import numpy as np
13 |
14 | from model import GNN, GNN_graphpred
15 |
16 | import pandas as pd
17 |
18 | from util import MaskEdge
19 |
20 | from torch_geometric.nn import global_add_pool, global_mean_pool, global_max_pool
21 |
22 | #criterion = nn.BCEWithLogitsLoss()
23 | criterion = nn.CrossEntropyLoss()
24 |
25 | def compute_accuracy(pred, target):
26 | #return float(torch.sum((pred.detach() > 0) == target.to(torch.uint8)).cpu().item())/(pred.shape[0]*pred.shape[1])
27 | return float(torch.sum(torch.max(pred.detach(), dim = 1)[1] == target).cpu().item())/len(pred)
28 |
29 | def train(args, model_list, loader, optimizer_list, device):
30 | model, linear_pred_edges = model_list
31 | optimizer_model, optimizer_linear_pred_edges = optimizer_list
32 |
33 | model.train()
34 | linear_pred_edges.train()
35 |
36 | loss_accum = 0
37 | acc_accum = 0
38 |
39 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
40 | batch = batch.to(device)
41 |
42 | node_rep = model(batch.x, batch.edge_index, batch.edge_attr)
43 |
44 | ### predict the edge types.
45 | masked_edge_index = batch.edge_index[:, batch.masked_edge_idx]
46 | edge_rep = node_rep[masked_edge_index[0]] + node_rep[masked_edge_index[1]]
47 | pred_edge = linear_pred_edges(edge_rep)
48 |
49 | #converting the binary classification to multiclass classification
50 | edge_label = torch.argmax(batch.mask_edge_label, dim = 1)
51 |
52 | acc_edge = compute_accuracy(pred_edge, edge_label)
53 | acc_accum += acc_edge
54 |
55 | optimizer_model.zero_grad()
56 | optimizer_linear_pred_edges.zero_grad()
57 |
58 | loss = criterion(pred_edge, edge_label)
59 | loss.backward()
60 |
61 | optimizer_model.step()
62 | optimizer_linear_pred_edges.step()
63 |
64 | loss_accum += float(loss.cpu().item())
65 |
66 | return loss_accum/(step + 1), acc_accum/(step + 1)
67 |
68 | def main():
69 | # Training settings
70 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
71 | parser.add_argument('--device', type=int, default=0,
72 | help='which gpu to use if any (default: 0)')
73 | parser.add_argument('--batch_size', type=int, default=256,
74 | help='input batch size for training (default: 256)')
75 | parser.add_argument('--epochs', type=int, default=100,
76 | help='number of epochs to train (default: 100)')
77 | parser.add_argument('--lr', type=float, default=0.001,
78 | help='learning rate (default: 0.001)')
79 | parser.add_argument('--decay', type=float, default=0,
80 | help='weight decay (default: 0)')
81 | parser.add_argument('--num_layer', type=int, default=5,
82 | help='number of GNN message passing layers (default: 5).')
83 | parser.add_argument('--emb_dim', type=int, default=300,
84 | help='embedding dimensions (default: 300)')
85 | parser.add_argument('--dropout_ratio', type=float, default=0,
86 | help='dropout ratio (default: 0)')
87 | parser.add_argument('--mask_rate', type=float, default=0.15,
88 | help='dropout ratio (default: 0.15)')
89 | parser.add_argument('--JK', type=str, default="last",
90 | help='how the node features are combined across layers. last, sum, max or concat')
91 | parser.add_argument('--gnn_type', type=str, default="gin")
92 | parser.add_argument('--model_file', type=str, default = '', help='filename to output the model')
93 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
94 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
95 | args = parser.parse_args()
96 |
97 | torch.manual_seed(0)
98 | np.random.seed(0)
99 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
100 | if torch.cuda.is_available():
101 | torch.cuda.manual_seed_all(0)
102 |
103 | print("num layer: %d mask rate: %f" %(args.num_layer, args.mask_rate))
104 |
105 | #set up dataset
106 | root_unsupervised = 'dataset/unsupervised'
107 | dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform = MaskEdge(mask_rate = args.mask_rate))
108 |
109 | print(dataset)
110 |
111 | loader = DataLoaderMasking(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
112 |
113 |
114 | #set up models, one for pre-training and one for context embeddings
115 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type).to(device)
116 | #Linear layer for classifying different edge types
117 | linear_pred_edges = torch.nn.Linear(args.emb_dim, 7).to(device)
118 |
119 | model_list = [model, linear_pred_edges]
120 |
121 | #set up optimizers
122 | optimizer_model = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
123 | optimizer_linear_pred_edges = optim.Adam(linear_pred_edges.parameters(), lr=args.lr, weight_decay=args.decay)
124 |
125 | optimizer_list = [optimizer_model, optimizer_linear_pred_edges]
126 |
127 | for epoch in range(1, args.epochs+1):
128 | print("====epoch " + str(epoch))
129 |
130 | train_loss, train_acc = train(args, model_list, loader, optimizer_list, device)
131 | print(train_loss, train_acc)
132 |
133 | if not args.model_file == "":
134 | torch.save(model.state_dict(), args.model_file + ".pth")
135 |
136 |
137 | if __name__ == "__main__":
138 | main()
139 |
--------------------------------------------------------------------------------
/transfer_learning/bio/pretrain_supervised.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from splitters import random_split, species_split
4 | from loader import BioDataset
5 | from torch_geometric.data import DataLoader
6 |
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | import torch.optim as optim
11 |
12 | from tqdm import tqdm
13 | import numpy as np
14 |
15 | from model import GNN, GNN_graphpred
16 | from sklearn.metrics import roc_auc_score
17 |
18 | import pandas as pd
19 |
20 | from util import combine_dataset
21 |
22 | criterion = nn.BCEWithLogitsLoss()
23 |
24 | def train(args, model, device, loader, optimizer):
25 | model.train()
26 |
27 | loss_accum = 0
28 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
29 | batch = batch.to(device)
30 | pred = model(batch)
31 | y = batch.go_target_pretrain.view(pred.shape).to(torch.float64)
32 |
33 | optimizer.zero_grad()
34 | loss = criterion(pred.double(), y)
35 | loss.backward()
36 |
37 | optimizer.step()
38 |
39 | loss_accum += loss.detach().cpu()
40 |
41 | return loss_accum / (step + 1)
42 |
43 |
44 | def main():
45 | # Training settings
46 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
47 | parser.add_argument('--device', type=int, default=0,
48 | help='which gpu to use if any (default: 0)')
49 | parser.add_argument('--batch_size', type=int, default=32,
50 | help='input batch size for training (default: 32)')
51 | parser.add_argument('--epochs', type=int, default=100,
52 | help='number of epochs to train (default: 100)')
53 | parser.add_argument('--lr', type=float, default=0.001,
54 | help='learning rate (default: 0.001)')
55 | parser.add_argument('--decay', type=float, default=0,
56 | help='weight decay (default: 0)')
57 | parser.add_argument('--num_layer', type=int, default=5,
58 | help='number of GNN message passing layers (default: 5).')
59 | parser.add_argument('--emb_dim', type=int, default=300,
60 | help='embedding dimensions (default: 300)')
61 | parser.add_argument('--dropout_ratio', type=float, default=0.2,
62 | help='dropout ratio (default: 0.2)')
63 | parser.add_argument('--graph_pooling', type=str, default="mean",
64 | help='graph level pooling (sum, mean, max, set2set, attention)')
65 | parser.add_argument('--JK', type=str, default="last",
66 | help='how the node features across layers are combined. last, sum, max or concat')
67 | parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)')
68 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
69 | parser.add_argument('--gnn_type', type=str, default="gin")
70 | parser.add_argument('--num_workers', type=int, default = 0, help='number of workers for dataset loading')
71 | parser.add_argument('--seed', type=int, default=42, help = "Seed for splitting dataset.")
72 | parser.add_argument('--split', type=str, default = "species", help='Random or species split')
73 | args = parser.parse_args()
74 |
75 |
76 | torch.manual_seed(0)
77 | np.random.seed(0)
78 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
79 | if torch.cuda.is_available():
80 | torch.cuda.manual_seed_all(0)
81 |
82 | root_supervised = 'dataset/supervised'
83 |
84 | dataset = BioDataset(root_supervised, data_type='supervised')
85 |
86 | if args.split == "random":
87 | print("random splitting")
88 | train_dataset, valid_dataset, test_dataset = random_split(dataset, seed = args.seed)
89 | print(train_dataset)
90 | print(valid_dataset)
91 | pretrain_dataset = combine_dataset(train_dataset, valid_dataset)
92 | print(pretrain_dataset)
93 | elif args.split == "species":
94 | print("species splitting")
95 | trainval_dataset, test_dataset = species_split(dataset)
96 | test_dataset_broad, test_dataset_none, _ = random_split(test_dataset, seed = args.seed, frac_train=0.5, frac_valid=0.5, frac_test=0)
97 | print(trainval_dataset)
98 | print(test_dataset_broad)
99 | pretrain_dataset = combine_dataset(trainval_dataset, test_dataset_broad)
100 | print(pretrain_dataset)
101 | #train_dataset, valid_dataset, _ = random_split(trainval_dataset, seed = args.seed, frac_train=0.85, frac_valid=0.15, frac_test=0)
102 | else:
103 | raise ValueError("Unknown split name.")
104 |
105 |
106 | train_loader = DataLoader(pretrain_dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
107 |
108 | num_tasks = len(pretrain_dataset[0].go_target_pretrain)
109 |
110 | #set up model
111 | model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type)
112 | if not args.input_model_file == "":
113 | model.from_pretrained(args.input_model_file + ".pth")
114 |
115 | model.to(device)
116 |
117 | #set up optimizer
118 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
119 | print(optimizer)
120 |
121 | for epoch in range(1, args.epochs+1):
122 | print("====epoch " + str(epoch))
123 |
124 | train_loss = train(args, model, device, train_loader, optimizer)
125 |
126 | if not args.output_model_file == "":
127 | torch.save(model.gnn.state_dict(), args.output_model_file + ".pth")
128 |
129 |
130 |
131 | if __name__ == "__main__":
132 | main()
133 |
--------------------------------------------------------------------------------
/transfer_learning/bio/splitters.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import random
3 | import numpy as np
4 |
5 | def random_split(dataset, frac_train=0.8, frac_valid=0.1, frac_test=0.1,
6 | seed=0):
7 | """
8 | Adapted from graph-pretrain
9 | :param dataset:
10 | :param task_idx:
11 | :param null_value:
12 | :param frac_train:
13 | :param frac_valid:
14 | :param frac_test:
15 | :param seed:
16 | :return: train, valid, test slices of the input dataset obj.
17 | """
18 | np.testing.assert_almost_equal(frac_train + frac_valid + frac_test, 1.0)
19 |
20 | num_mols = len(dataset)
21 | random.seed(seed)
22 | all_idx = list(range(num_mols))
23 | random.shuffle(all_idx)
24 |
25 | train_idx = all_idx[:int(frac_train * num_mols)]
26 | valid_idx = all_idx[int(frac_train * num_mols):int(frac_valid * num_mols)
27 | + int(frac_train * num_mols)]
28 | test_idx = all_idx[int(frac_valid * num_mols) + int(frac_train * num_mols):]
29 |
30 | assert len(set(train_idx).intersection(set(valid_idx))) == 0
31 | assert len(set(valid_idx).intersection(set(test_idx))) == 0
32 | assert len(train_idx) + len(valid_idx) + len(test_idx) == num_mols
33 |
34 | train_dataset = dataset[torch.tensor(train_idx)]
35 | valid_dataset = dataset[torch.tensor(valid_idx)]
36 | if frac_test == 0:
37 | test_dataset = None
38 | else:
39 | test_dataset = dataset[torch.tensor(test_idx)]
40 |
41 | return train_dataset, valid_dataset, test_dataset
42 |
43 | def species_split(dataset, train_valid_species_id_list=[3702, 6239, 511145,
44 | 7227, 10090, 4932, 7955],
45 | test_species_id_list=[9606]):
46 | """
47 | Split dataset based on species_id attribute
48 | :param dataset:
49 | :param train_valid_species_id_list:
50 | :param test_species_id_list:
51 | :return: train_valid dataset, test dataset
52 | """
53 | # NB: pytorch geometric dataset object can be indexed using slices or
54 | # byte tensors. We will use byte tensors here
55 |
56 | train_valid_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8)
57 | for id in train_valid_species_id_list:
58 | train_valid_byte_tensor += (dataset.data.species_id == id)
59 |
60 | test_species_byte_tensor = torch.zeros(len(dataset), dtype=torch.uint8)
61 | for id in test_species_id_list:
62 | test_species_byte_tensor += (dataset.data.species_id == id)
63 |
64 | assert ((train_valid_byte_tensor + test_species_byte_tensor) == 1).all()
65 |
66 | train_valid_dataset = dataset[train_valid_byte_tensor]
67 | test_valid_dataset = dataset[test_species_byte_tensor]
68 |
69 | return train_valid_dataset, test_valid_dataset
70 |
71 | if __name__ == "__main__":
72 | from collections import Counter
73 |
--------------------------------------------------------------------------------
/transfer_learning/chem/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data
2 | from torch.utils.data.dataloader import default_collate
3 |
4 | from batch import BatchSubstructContext, BatchMasking, BatchAE
5 |
6 | class DataLoaderSubstructContext(torch.utils.data.DataLoader):
7 | r"""Data loader which merges data objects from a
8 | :class:`torch_geometric.data.dataset` to a mini-batch.
9 | Args:
10 | dataset (Dataset): The dataset from which to load the data.
11 | batch_size (int, optional): How may samples per batch to load.
12 | (default: :obj:`1`)
13 | shuffle (bool, optional): If set to :obj:`True`, the data will be
14 | reshuffled at every epoch (default: :obj:`True`)
15 | """
16 |
17 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
18 | super(DataLoaderSubstructContext, self).__init__(
19 | dataset,
20 | batch_size,
21 | shuffle,
22 | collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list),
23 | **kwargs)
24 |
25 | class DataLoaderMasking(torch.utils.data.DataLoader):
26 | r"""Data loader which merges data objects from a
27 | :class:`torch_geometric.data.dataset` to a mini-batch.
28 | Args:
29 | dataset (Dataset): The dataset from which to load the data.
30 | batch_size (int, optional): How may samples per batch to load.
31 | (default: :obj:`1`)
32 | shuffle (bool, optional): If set to :obj:`True`, the data will be
33 | reshuffled at every epoch (default: :obj:`True`)
34 | """
35 |
36 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
37 | super(DataLoaderMasking, self).__init__(
38 | dataset,
39 | batch_size,
40 | shuffle,
41 | collate_fn=lambda data_list: BatchMasking.from_data_list(data_list),
42 | **kwargs)
43 |
44 |
45 | class DataLoaderAE(torch.utils.data.DataLoader):
46 | r"""Data loader which merges data objects from a
47 | :class:`torch_geometric.data.dataset` to a mini-batch.
48 | Args:
49 | dataset (Dataset): The dataset from which to load the data.
50 | batch_size (int, optional): How may samples per batch to load.
51 | (default: :obj:`1`)
52 | shuffle (bool, optional): If set to :obj:`True`, the data will be
53 | reshuffled at every epoch (default: :obj:`True`)
54 | """
55 |
56 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
57 | super(DataLoaderAE, self).__init__(
58 | dataset,
59 | batch_size,
60 | shuffle,
61 | collate_fn=lambda data_list: BatchAE.from_data_list(data_list),
62 | **kwargs)
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/transfer_learning/chem/finetune.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | split=scaffold
3 | dataset=$1
4 |
5 | CUDA_VISIBLE_DEVICES=0
6 | for runseed in 0 1 2 3 4 5 6 7 8 9
7 | do
8 | model_file=${unsup}
9 | python finetune.py --input_model_file models_simgrace/simgrace_80.pth --split $split --runseed $runseed --gnn_type gin --dataset $dataset --lr 1e-3 --epochs 100
10 | done
11 |
--------------------------------------------------------------------------------
/transfer_learning/chem/finetune_tune.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 | runseed=$1
3 | device=$2
4 | split=scaffold
5 |
6 | ### for GIN
7 | for dataset in bbbp sider toxcast
8 | do
9 | for unsup in contextpred infomax edgepred masking
10 | do
11 | model_file=${unsup}
12 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset
13 |
14 | model_file=supervised_${unsup}
15 | python finetune.py --input_model_file model_gin/${model_file}.pth --split $split --filename ${dataset}/gin_${model_file} --device $device --runseed $runseed --gnn_type gin --dataset $dataset
16 | done
17 |
18 | python finetune.py --split $split --filename ${dataset}/gin_nopretrain --device $device --runseed $runseed --gnn_type gin --dataset $dataset
19 | python finetune.py --split $split --input_model_file model_gin/supervised.pth --filename ${dataset}/gin_supervised --device $device --runseed $runseed --gnn_type gin --dataset $dataset
20 |
21 |
22 | ### for other GNNs
23 | for gnn_type in gcn gat graphsage
24 | do
25 | python finetune.py --split $split --filename ${dataset}/${gnn_type}_nopretrain --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset
26 |
27 | model_file=${gnn_type}_supervised_contextpred
28 | python finetune.py --input_model_file model_architecture/${model_file}.pth --split $split --filename ${dataset}/${model_file} --device $device --runseed $runseed --gnn_type $gnn_type --dataset $dataset
29 |
30 | done
31 | done
32 |
33 |
34 | fold_idx=$1
35 |
36 | for batch_size in 8 64
37 | do
38 | for drop_ratio in 0 0.2 0.5
39 | do
40 | for dataset in ptc_mr mutag
41 | do
42 | for unsup in contextpred edgepred masking infomax
43 | do
44 |
45 | model_file=${unsup}
46 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
47 |
48 |
49 | model_file=supervised_${unsup}
50 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
51 |
52 | done
53 |
54 | model_file=supervised
55 | python finetune_mutag_ptc.py --input_model_file model_gin/${model_file}.pth --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/${model_file} --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
56 |
57 | python finetune_mutag_ptc.py --dataset $dataset --filename ${dataset}_drop${drop_ratio}_bsize${batch_size}/nopretrain --fold_idx $fold_idx --dropout_ratio $drop_ratio --batch_size $batch_size
58 |
59 | done
60 | done
61 | done
--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_100.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_100.pth
--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_20.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_20.pth
--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_40.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_40.pth
--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_60.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_60.pth
--------------------------------------------------------------------------------
/transfer_learning/chem/models_simgrace/simgrace_80.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/transfer_learning/chem/models_simgrace/simgrace_80.pth
--------------------------------------------------------------------------------
/transfer_learning/chem/parse_result.py:
--------------------------------------------------------------------------------
1 | ### Parsing the result!
2 | import tensorflow as tf
3 | import os
4 | import numpy as np
5 | import pickle
6 |
7 | def get_test_acc(event_file):
8 | val_auc_list = np.zeros(100)
9 | test_auc_list = np.zeros(100)
10 | for e in list(tf.train.summary_iterator(event_file)):
11 | if len(e.summary.value) == 0:
12 | continue
13 | if e.summary.value[0].tag == "data/val_auc":
14 | val_auc_list[e.step-1] = e.summary.value[0].simple_value
15 | if e.summary.value[0].tag == "data/test_auc":
16 | test_auc_list[e.step-1] = e.summary.value[0].simple_value
17 |
18 | best_epoch = np.argmax(val_auc_list)
19 |
20 | return test_auc_list[best_epoch]
21 |
22 | if __name__ == "__main__":
23 |
24 | dataset_list = ["bbbp", "sider", "toxcast"]
25 | #10 random seed
26 | seed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
27 | config_list = []
28 |
29 | config_list.append("gin_nopretrain")
30 | config_list.append("gin_infomax")
31 | config_list.append("gin_edgepred")
32 | config_list.append("gin_masking")
33 | config_list.append("gin_contextpred")
34 | config_list.append("gin_supervised")
35 | config_list.append("gin_supervised_infomax")
36 | config_list.append("gin_supervised_edgepred")
37 | config_list.append("gin_supervised_masking")
38 | config_list.append("gin_supervised_contextpred")
39 | config_list.append("gcn_nopretrain")
40 | config_list.append("gcn_supervised_contextpred")
41 | config_list.append("graphsage_nopretrain")
42 | config_list.append("graphsage_supervised_contextpred")
43 | config_list.append("gat_nopretrain")
44 | config_list.append("gat_supervised_contextpred")
45 |
46 | result_mat = np.zeros((len(seed_list), len(config_list), len(dataset_list)))
47 |
48 | for i, seed in enumerate(seed_list):
49 | for j, config in enumerate(config_list):
50 | for k, dataset in enumerate(dataset_list):
51 | dir_name = "runs/finetune_cls_runseed" + str(seed) + "/" + dataset + "/" + config
52 | print(dir_name)
53 | file_in_dir = os.listdir(dir_name)
54 | event_file_list = []
55 | for f in file_in_dir:
56 | if "events" in f:
57 | event_file_list.append(f)
58 |
59 | event_file = event_file_list[0]
60 |
61 | result_mat[i, j, k] = get_test_acc(dir_name + "/" + event_file)
62 |
63 | with open("result_summary", "wb") as f:
64 | pickle.dump({"result_mat": result_mat, "seed_list": seed_list, "config_list": config_list, "dataset_list": dataset_list}, f)
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_deepgraphinfomax.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import MoleculeDataset
4 | from torch_geometric.data import DataLoader
5 | from torch_geometric.nn.inits import uniform
6 | from torch_geometric.nn import global_mean_pool
7 |
8 | import torch
9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | import torch.optim as optim
12 |
13 | from tqdm import tqdm
14 | import numpy as np
15 |
16 | from model import GNN
17 | from sklearn.metrics import roc_auc_score
18 |
19 | from splitters import scaffold_split, random_split, random_scaffold_split
20 | import pandas as pd
21 |
22 | from tensorboardX import SummaryWriter
23 |
24 |
25 | def cycle_index(num, shift):
26 | arr = torch.arange(num) + shift
27 | arr[-shift:] = torch.arange(shift)
28 | return arr
29 |
30 | class Discriminator(nn.Module):
31 | def __init__(self, hidden_dim):
32 | super(Discriminator, self).__init__()
33 | self.weight = nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
34 | self.reset_parameters()
35 |
36 | def reset_parameters(self):
37 | size = self.weight.size(0)
38 | uniform(size, self.weight)
39 |
40 | def forward(self, x, summary):
41 | h = torch.matmul(summary, self.weight)
42 | return torch.sum(x*h, dim = 1)
43 |
44 | class Infomax(nn.Module):
45 | def __init__(self, gnn, discriminator):
46 | super(Infomax, self).__init__()
47 | self.gnn = gnn
48 | self.discriminator = discriminator
49 | self.loss = nn.BCEWithLogitsLoss()
50 | self.pool = global_mean_pool
51 |
52 |
53 | def train(args, model, device, loader, optimizer):
54 | model.train()
55 |
56 | train_acc_accum = 0
57 | train_loss_accum = 0
58 |
59 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
60 | batch = batch.to(device)
61 | node_emb = model.gnn(batch.x, batch.edge_index, batch.edge_attr)
62 | summary_emb = torch.sigmoid(model.pool(node_emb, batch.batch))
63 |
64 | positive_expanded_summary_emb = summary_emb[batch.batch]
65 |
66 | shifted_summary_emb = summary_emb[cycle_index(len(summary_emb), 1)]
67 | negative_expanded_summary_emb = shifted_summary_emb[batch.batch]
68 |
69 | positive_score = model.discriminator(node_emb, positive_expanded_summary_emb)
70 | negative_score = model.discriminator(node_emb, negative_expanded_summary_emb)
71 |
72 | optimizer.zero_grad()
73 | loss = model.loss(positive_score, torch.ones_like(positive_score)) + model.loss(negative_score, torch.zeros_like(negative_score))
74 | loss.backward()
75 |
76 | optimizer.step()
77 |
78 | train_loss_accum += float(loss.detach().cpu().item())
79 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
80 | train_acc_accum += float(acc.detach().cpu().item())
81 |
82 | return train_acc_accum/step, train_loss_accum/step
83 |
84 |
85 | def main():
86 | # Training settings
87 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
88 | parser.add_argument('--device', type=int, default=0,
89 | help='which gpu to use if any (default: 0)')
90 | parser.add_argument('--batch_size', type=int, default=256,
91 | help='input batch size for training (default: 256)')
92 | parser.add_argument('--epochs', type=int, default=100,
93 | help='number of epochs to train (default: 100)')
94 | parser.add_argument('--lr', type=float, default=0.001,
95 | help='learning rate (default: 0.001)')
96 | parser.add_argument('--decay', type=float, default=0,
97 | help='weight decay (default: 0)')
98 | parser.add_argument('--num_layer', type=int, default=5,
99 | help='number of GNN message passing layers (default: 5).')
100 | parser.add_argument('--emb_dim', type=int, default=300,
101 | help='embedding dimensions (default: 300)')
102 | parser.add_argument('--dropout_ratio', type=float, default=0,
103 | help='dropout ratio (default: 0)')
104 | parser.add_argument('--JK', type=str, default="last",
105 | help='how the node features across layers are combined. last, sum, max or concat')
106 | parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.')
107 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
108 | parser.add_argument('--gnn_type', type=str, default="gin")
109 | parser.add_argument('--seed', type=int, default=0, help = "Seed for splitting dataset.")
110 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
111 | args = parser.parse_args()
112 |
113 |
114 | torch.manual_seed(0)
115 | np.random.seed(0)
116 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
117 | if torch.cuda.is_available():
118 | torch.cuda.manual_seed_all(0)
119 |
120 |
121 | #set up dataset
122 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset)
123 |
124 | print(dataset)
125 |
126 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
127 |
128 | #set up model
129 | gnn = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
130 |
131 | discriminator = Discriminator(args.emb_dim)
132 |
133 | model = Infomax(gnn, discriminator)
134 |
135 | model.to(device)
136 |
137 | #set up optimizer
138 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
139 | print(optimizer)
140 |
141 | for epoch in range(1, args.epochs+1):
142 | print("====epoch " + str(epoch))
143 |
144 | train_acc, train_loss = train(args, model, device, loader, optimizer)
145 |
146 | print(train_acc)
147 | print(train_loss)
148 |
149 |
150 | if not args.output_model_file == "":
151 | torch.save(gnn.state_dict(), args.output_model_file + ".pth")
152 |
153 | if __name__ == "__main__":
154 | main()
155 |
--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_edgepred.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import MoleculeDataset
4 | from dataloader import DataLoaderAE
5 | from util import NegativeEdge
6 |
7 | import torch
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | import torch.optim as optim
11 |
12 | from tqdm import tqdm
13 | import numpy as np
14 |
15 | from model import GNN, GNN_graphpred
16 | from sklearn.metrics import roc_auc_score
17 |
18 | from splitters import scaffold_split, random_split, random_scaffold_split
19 | import pandas as pd
20 |
21 | from tensorboardX import SummaryWriter
22 |
23 | criterion = nn.BCEWithLogitsLoss()
24 |
25 | def train(args, model, device, loader, optimizer):
26 | model.train()
27 |
28 | train_acc_accum = 0
29 | train_loss_accum = 0
30 |
31 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
32 | batch = batch.to(device)
33 | node_emb = model(batch.x, batch.edge_index, batch.edge_attr)
34 |
35 | positive_score = torch.sum(node_emb[batch.edge_index[0, ::2]] * node_emb[batch.edge_index[1, ::2]], dim = 1)
36 | negative_score = torch.sum(node_emb[batch.negative_edge_index[0]] * node_emb[batch.negative_edge_index[1]], dim = 1)
37 |
38 | optimizer.zero_grad()
39 | loss = criterion(positive_score, torch.ones_like(positive_score)) + criterion(negative_score, torch.zeros_like(negative_score))
40 | loss.backward()
41 | optimizer.step()
42 |
43 | train_loss_accum += float(loss.detach().cpu().item())
44 | acc = (torch.sum(positive_score > 0) + torch.sum(negative_score < 0)).to(torch.float32)/float(2*len(positive_score))
45 | train_acc_accum += float(acc.detach().cpu().item())
46 |
47 | return train_acc_accum/step, train_loss_accum/step
48 |
49 |
50 | def main():
51 | # Training settings
52 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
53 | parser.add_argument('--device', type=int, default=0,
54 | help='which gpu to use if any (default: 0)')
55 | parser.add_argument('--batch_size', type=int, default=256,
56 | help='input batch size for training (default: 256)')
57 | parser.add_argument('--epochs', type=int, default=100,
58 | help='number of epochs to train (default: 100)')
59 | parser.add_argument('--lr', type=float, default=0.001,
60 | help='learning rate (default: 0.001)')
61 | parser.add_argument('--decay', type=float, default=0,
62 | help='weight decay (default: 0)')
63 | parser.add_argument('--num_layer', type=int, default=5,
64 | help='number of GNN message passing layers (default: 5).')
65 | parser.add_argument('--emb_dim', type=int, default=300,
66 | help='embedding dimensions (default: 300)')
67 | parser.add_argument('--dropout_ratio', type=float, default=0,
68 | help='dropout ratio (default: 0)')
69 | parser.add_argument('--JK', type=str, default="last",
70 | help='how the node features across layers are combined. last, sum, max or concat')
71 | parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.')
72 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
73 | parser.add_argument('--gnn_type', type=str, default="gin")
74 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
75 | args = parser.parse_args()
76 |
77 |
78 | torch.manual_seed(0)
79 | np.random.seed(0)
80 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
81 | if torch.cuda.is_available():
82 | torch.cuda.manual_seed_all(0)
83 |
84 | #set up dataset
85 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset, transform = NegativeEdge())
86 |
87 | print(dataset[0])
88 |
89 | loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
90 |
91 | #set up model
92 | model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type)
93 |
94 | model.to(device)
95 |
96 | #set up optimizer
97 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
98 | print(optimizer)
99 |
100 | for epoch in range(1, args.epochs+1):
101 | print("====epoch " + str(epoch))
102 |
103 | train_acc, train_loss = train(args, model, device, loader, optimizer)
104 |
105 | print(train_acc)
106 | print(train_loss)
107 |
108 | if not args.output_model_file == "":
109 | torch.save(model.state_dict(), args.output_model_file + ".pth")
110 |
111 | if __name__ == "__main__":
112 | main()
113 |
--------------------------------------------------------------------------------
/transfer_learning/chem/pretrain_supervised.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | from loader import MoleculeDataset
4 | from torch_geometric.data import DataLoader
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 | import torch.optim as optim
10 |
11 | from tqdm import tqdm
12 | import numpy as np
13 |
14 | from model import GNN, GNN_graphpred
15 | # from model_extra import GNN, GNN_graphpred
16 | from sklearn.metrics import roc_auc_score
17 |
18 | from splitters import scaffold_split, random_split, random_scaffold_split
19 | import pandas as pd
20 |
21 | from tensorboardX import SummaryWriter
22 |
23 | criterion = nn.BCEWithLogitsLoss(reduction = "none")
24 |
25 | def train(args, model, device, loader, optimizer):
26 | model.train()
27 |
28 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
29 | batch = batch.to(device)
30 | pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch)
31 | y = batch.y.view(pred.shape).to(torch.float64)
32 |
33 | #Whether y is non-null or not.
34 | is_valid = y**2 > 0
35 | #Loss matrix
36 | loss_mat = criterion(pred.double(), (y+1)/2)
37 | #loss matrix after removing null target
38 | loss_mat = torch.where(is_valid, loss_mat, torch.zeros(loss_mat.shape).to(loss_mat.device).to(loss_mat.dtype))
39 |
40 | optimizer.zero_grad()
41 | loss = torch.sum(loss_mat)/torch.sum(is_valid)
42 | loss.backward()
43 |
44 | optimizer.step()
45 |
46 |
47 | def eval(args, model, device, loader, normalized_weight):
48 | model.eval()
49 | y_true = []
50 | y_scores = []
51 |
52 | for step, batch in enumerate(tqdm(loader, desc="Iteration")):
53 | batch = batch.to(device)
54 |
55 | with torch.no_grad():
56 | pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch)
57 |
58 | y_true.append(batch.y.view(pred.shape).cpu())
59 | y_scores.append(pred.cpu())
60 |
61 | y_true = torch.cat(y_true, dim = 0).numpy()
62 | y_scores = torch.cat(y_scores, dim = 0).numpy()
63 |
64 | roc_list = []
65 | weight = []
66 | for i in range(y_true.shape[1]):
67 | #AUC is only defined when there is at least one positive data.
68 | if np.sum(y_true[:,i] == 1) > 0 and np.sum(y_true[:,i] == -1) > 0:
69 | is_valid = y_true[:,i]**2 > 0
70 | roc_list.append(roc_auc_score((y_true[is_valid,i] + 1)/2, y_scores[is_valid,i]))
71 | weight.append(normalized_weight[i])
72 |
73 | if len(roc_list) < y_true.shape[1]:
74 | print("Some target is missing!")
75 | print("Missing ratio: %f" %(1 - float(len(roc_list))/y_true.shape[1]))
76 |
77 | weight = np.array(weight)
78 | roc_list = np.array(roc_list)
79 |
80 | return weight.dot(roc_list)
81 |
82 |
83 | def main():
84 | # Training settings
85 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
86 | parser.add_argument('--device', type=int, default=0,
87 | help='which gpu to use if any (default: 0)')
88 | parser.add_argument('--batch_size', type=int, default=32,
89 | help='input batch size for training (default: 32)')
90 | parser.add_argument('--epochs', type=int, default=100,
91 | help='number of epochs to train (default: 100)')
92 | parser.add_argument('--lr', type=float, default=0.001,
93 | help='learning rate (default: 0.001)')
94 | parser.add_argument('--decay', type=float, default=0,
95 | help='weight decay (default: 0)')
96 | parser.add_argument('--num_layer', type=int, default=5,
97 | help='number of GNN message passing layers (default: 5).')
98 | parser.add_argument('--emb_dim', type=int, default=300,
99 | help='embedding dimensions (default: 300)')
100 | parser.add_argument('--dropout_ratio', type=float, default=0.2,
101 | help='dropout ratio (default: 0.2)')
102 | parser.add_argument('--graph_pooling', type=str, default="mean",
103 | help='graph level pooling (sum, mean, max, set2set, attention)')
104 | parser.add_argument('--JK', type=str, default="last",
105 | help='how the node features across layers are combined. last, sum, max or concat')
106 | parser.add_argument('--dataset', type=str, default = 'chembl_filtered', help='root directory of dataset. For now, only classification.')
107 | parser.add_argument('--gnn_type', type=str, default="gin")
108 | parser.add_argument('--input_model_file', type=str, default = '', help='filename to read the model (if there is any)')
109 | parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model')
110 | parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading')
111 | args = parser.parse_args()
112 |
113 |
114 | torch.manual_seed(0)
115 | np.random.seed(0)
116 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
117 | if torch.cuda.is_available():
118 | torch.cuda.manual_seed_all(0)
119 |
120 | #Bunch of classification tasks
121 | if args.dataset == "chembl_filtered":
122 | num_tasks = 1310
123 | else:
124 | raise ValueError("Invalid dataset name.")
125 |
126 | #set up dataset
127 | dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset)
128 |
129 | loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
130 |
131 | #set up model
132 | model = GNN_graphpred(args.num_layer, args.emb_dim, num_tasks, JK = args.JK, drop_ratio = args.dropout_ratio, graph_pooling = args.graph_pooling, gnn_type = args.gnn_type)
133 | if not args.input_model_file == "":
134 | model.from_pretrained(args.input_model_file + ".pth")
135 |
136 | model.to(device)
137 |
138 | #set up optimizer
139 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay)
140 | print(optimizer)
141 |
142 |
143 | for epoch in range(1, args.epochs+1):
144 | print("====epoch " + str(epoch))
145 |
146 | train(args, model, device, loader, optimizer)
147 |
148 | if not args.output_model_file == "":
149 | torch.save(model.gnn.state_dict(), args.output_model_file + ".pth")
150 |
151 |
152 | if __name__ == "__main__":
153 | main()
154 |
--------------------------------------------------------------------------------
/transfer_learning/chem/run.sh:
--------------------------------------------------------------------------------
1 | #### GIN fine-tuning
2 |
3 | nohup ./finetune.sh bbbp > log_bbbp &
4 | nohup ./finetune.sh sider > log_sider &
5 | nohup ./finetune.sh toxcast > log_toxcast &
--------------------------------------------------------------------------------
/unsupervised_TU/Accuracy.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/Accuracy.txt
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/arguments.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/arguments.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/aug.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/aug.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/evaluate_embedding.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/gin.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/gin.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/losses.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/__pycache__/model.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/arguments.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | def arg_parse():
4 | parser = argparse.ArgumentParser(description='GcnInformax Arguments.')
5 | parser.add_argument('--DS', dest='DS', default='NCI1', help='NCI1,PTC_MR,IMDB-BINARY,IMDB-MULTI,REDDIT-BINARY')
6 | parser.add_argument('--local', dest='local', action='store_const',
7 | const=True, default=False)
8 | parser.add_argument('--glob', dest='glob', action='store_const',
9 | const=True, default=False)
10 | parser.add_argument('--prior', dest='prior', action='store_const',
11 | const=True, default=False)
12 | parser.add_argument('--device', default='cuda:6', type=str, help='gpu device ids')
13 | parser.add_argument('--lr', dest='lr', type=float, default= 0.01,
14 | help='Learning rate.')
15 | parser.add_argument('--alpha', default=1.2, type=float, help='stregnth for regularization')
16 | parser.add_argument('--num-gc-layers', dest='num_gc_layers', type=int, default=5,
17 | help='Number of graph convolution layers before each pooling')
18 | parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=32, help='')
19 | parser.add_argument('--seed', type=int, default=0)
20 | parser.add_argument('--epochs', type=int, default=20)
21 | # Random
22 | parser.add_argument('--eta', type=float, default=1.0, help='0.1, 1.0, 10, 100, 1000')
23 | parser.add_argument('--batch_size', type=int, default=128, help='128, 256, 512, 1024')
24 |
25 | return parser.parse_args()
26 |
27 |
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/configs/convnets.py:
--------------------------------------------------------------------------------
1 | '''Basic convnet hyperparameters.
2 |
3 | conv_args are in format (dim_h, f_size, stride, pad batch_norm, dropout, nonlinearity, pool)
4 | fc_args are in format (dim_h, batch_norm, dropout, nonlinearity)
5 |
6 | '''
7 |
8 | from cortex_DIM.nn_modules.encoder import ConvnetEncoder, FoldedConvnetEncoder
9 |
10 |
11 | # Basic DCGAN-like encoders
12 |
13 | _basic28x28 = dict(
14 | Encoder=ConvnetEncoder,
15 | conv_args=[(64, 5, 2, 2, True, False, 'ReLU', None),
16 | (128, 5, 2, 2, True, False, 'ReLU', None)],
17 | fc_args=[(1024, True, False, 'ReLU', None)],
18 | local_idx=1,
19 | fc_idx=0
20 | )
21 |
22 | _basic32x32 = dict(
23 | Encoder=ConvnetEncoder,
24 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
25 | (128, 4, 2, 1, True, False, 'ReLU', None),
26 | (256, 4, 2, 1, True, False, 'ReLU', None)],
27 | fc_args=[(1024, True, False, 'ReLU')],
28 | local_idx=1,
29 | conv_idx=2,
30 | fc_idx=0
31 | )
32 |
33 | _basic64x64 = dict(
34 | Encoder=ConvnetEncoder,
35 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
36 | (128, 4, 2, 1, True, False, 'ReLU', None),
37 | (256, 4, 2, 1, True, False, 'ReLU', None),
38 | (512, 4, 2, 1, True, False, 'ReLU', None)],
39 | fc_args=[(1024, True, False, 'ReLU')],
40 | local_idx=2,
41 | conv_idx=3,
42 | fc_idx=0
43 | )
44 |
45 | # Alexnet-like encoders
46 |
47 | _alex64x64 = dict(
48 | Encoder=ConvnetEncoder,
49 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
50 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
51 | (384, 3, 1, 1, True, False, 'ReLU', None),
52 | (384, 3, 1, 1, True, False, 'ReLU', None),
53 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
54 | fc_args=[(4096, True, False, 'ReLU'),
55 | (4096, True, False, 'ReLU')],
56 | local_idx=2,
57 | conv_idx=4,
58 | fc_idx=1
59 | )
60 |
61 | _foldalex64x64 = dict(
62 | Encoder=FoldedConvnetEncoder,
63 | crop_size=16,
64 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
65 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
66 | (384, 3, 1, 1, True, False, 'ReLU', None),
67 | (384, 3, 1, 1, True, False, 'ReLU', None),
68 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
69 | fc_args=[(4096, True, False, 'ReLU'),
70 | (4096, True, False, 'ReLU')],
71 | local_idx=4,
72 | fc_idx=1
73 | )
74 |
75 | _foldmultialex64x64 = dict(
76 | Encoder=FoldedConvnetEncoder,
77 | crop_size=16,
78 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
79 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
80 | (384, 3, 1, 1, True, False, 'ReLU', None),
81 | (384, 3, 1, 1, True, False, 'ReLU', None),
82 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
83 | (192, 3, 1, 0, True, False, 'ReLU', None),
84 | (192, 1, 1, 0, True, False, 'ReLU', None)],
85 | fc_args=[(4096, True, False, 'ReLU')],
86 | local_idx=4,
87 | multi_idx=6,
88 | fc_idx=1
89 | )
90 |
91 | configs = dict(
92 | basic28x28=_basic28x28,
93 | basic32x32=_basic32x32,
94 | basic64x64=_basic64x64,
95 | alex64x64=_alex64x64,
96 | foldalex64x64=_foldalex64x64,
97 | foldmultialex64x64=_foldmultialex64x64
98 | )
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/configs/resnets.py:
--------------------------------------------------------------------------------
1 | """Configurations for ResNets
2 |
3 | """
4 |
5 | from cortex_DIM.nn_modules.encoder import ResnetEncoder, FoldedResnetEncoder
6 |
7 |
8 | _resnet19_32x32 = dict(
9 | Encoder=ResnetEncoder,
10 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
11 | res_args=[
12 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
13 | (64, 3, 1, 1, True, False, 'ReLU', None),
14 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
15 | 1),
16 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
17 | (64, 3, 1, 1, True, False, 'ReLU', None),
18 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
19 | 1),
20 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
21 | (128, 3, 2, 1, True, False, 'ReLU', None),
22 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
23 | 1),
24 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
25 | (128, 3, 1, 1, True, False, 'ReLU', None),
26 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
27 | 1),
28 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
29 | (256, 3, 2, 1, True, False, 'ReLU', None),
30 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
31 | 1),
32 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
33 | (256, 3, 1, 1, True, False, 'ReLU', None),
34 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
35 | 1)
36 | ],
37 | fc_args=[(1024, True, False, 'ReLU')],
38 | local_idx=4,
39 | fc_idx=0
40 | )
41 |
42 | _foldresnet19_32x32 = dict(
43 | Encoder=FoldedResnetEncoder,
44 | crop_size=8,
45 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
46 | res_args=[
47 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
48 | (64, 3, 1, 1, True, False, 'ReLU', None),
49 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
50 | 1),
51 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
52 | (64, 3, 1, 1, True, False, 'ReLU', None),
53 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
54 | 1),
55 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
56 | (128, 3, 2, 1, True, False, 'ReLU', None),
57 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
58 | 1),
59 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
60 | (128, 3, 1, 1, True, False, 'ReLU', None),
61 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
62 | 1),
63 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
64 | (256, 3, 2, 1, True, False, 'ReLU', None),
65 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
66 | 1),
67 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
68 | (256, 3, 1, 1, True, False, 'ReLU', None),
69 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
70 | 1)
71 | ],
72 | fc_args=[(1024, True, False, 'ReLU')],
73 | local_idx=6,
74 | fc_idx=0
75 | )
76 |
77 | _resnet34_32x32 = dict(
78 | Encoder=ResnetEncoder,
79 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
80 | res_args=[
81 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
82 | (64, 3, 1, 1, True, False, 'ReLU', None),
83 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
84 | 1),
85 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
86 | (64, 3, 1, 1, True, False, 'ReLU', None),
87 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
88 | 2),
89 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
90 | (128, 3, 2, 1, True, False, 'ReLU', None),
91 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
92 | 1),
93 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
94 | (128, 3, 1, 1, True, False, 'ReLU', None),
95 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
96 | 5),
97 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
98 | (256, 3, 2, 1, True, False, 'ReLU', None),
99 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
100 | 1),
101 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
102 | (256, 3, 1, 1, True, False, 'ReLU', None),
103 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
104 | 2)
105 | ],
106 | fc_args=[(1024, True, False, 'ReLU')],
107 | local_idx=2,
108 | fc_idx=0
109 | )
110 |
111 | _foldresnet34_32x32 = dict(
112 | Encoder=FoldedResnetEncoder,
113 | crop_size=8,
114 | conv_before_args=[(64, 3, 2, 1, True, False, 'ReLU', None)],
115 | res_args=[
116 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
117 | (64, 3, 1, 1, True, False, 'ReLU', None),
118 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
119 | 1),
120 | ([(64, 1, 1, 0, True, False, 'ReLU', None),
121 | (64, 3, 1, 1, True, False, 'ReLU', None),
122 | (64 * 4, 1, 1, 0, True, False, 'ReLU', None)],
123 | 2),
124 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
125 | (128, 3, 2, 1, True, False, 'ReLU', None),
126 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
127 | 1),
128 | ([(128, 1, 1, 0, True, False, 'ReLU', None),
129 | (128, 3, 1, 1, True, False, 'ReLU', None),
130 | (128 * 4, 1, 1, 0, True, False, 'ReLU', None)],
131 | 5),
132 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
133 | (256, 3, 2, 1, True, False, 'ReLU', None),
134 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
135 | 1),
136 | ([(256, 1, 1, 0, True, False, 'ReLU', None),
137 | (256, 3, 1, 1, True, False, 'ReLU', None),
138 | (256 * 4, 1, 1, 0, True, False, 'ReLU', None)],
139 | 2)
140 | ],
141 | fc_args=[(1024, True, False, 'ReLU')],
142 | local_idx=12,
143 | fc_idx=0
144 | )
145 |
146 | configs = dict(
147 | resnet19_32x32=_resnet19_32x32,
148 | resnet34_32x32=_resnet34_32x32,
149 | foldresnet19_32x32=_foldresnet19_32x32,
150 | foldresnet34_32x32=_foldresnet34_32x32
151 | )
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/gan_losses.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/functions/__pycache__/misc.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/gan_losses.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | """
4 |
5 | import math
6 |
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from cortex_DIM.functions.misc import log_sum_exp
11 |
12 |
13 | def raise_measure_error(measure):
14 | supported_measures = ['GAN', 'JSD', 'X2', 'KL', 'RKL', 'DV', 'H2', 'W1']
15 | raise NotImplementedError(
16 | 'Measure `{}` not supported. Supported: {}'.format(measure,
17 | supported_measures))
18 |
19 |
20 | def get_positive_expectation(p_samples, measure, average=True):
21 | """Computes the positive part of a divergence / difference.
22 |
23 | Args:
24 | p_samples: Positive samples.
25 | measure: Measure to compute for.
26 | average: Average the result over samples.
27 |
28 | Returns:
29 | torch.Tensor
30 |
31 | """
32 | log_2 = math.log(2.)
33 |
34 | if measure == 'GAN':
35 | Ep = - F.softplus(-p_samples)
36 | elif measure == 'JSD':
37 | Ep = log_2 - F.softplus(- p_samples)
38 | elif measure == 'X2':
39 | Ep = p_samples ** 2
40 | elif measure == 'KL':
41 | Ep = p_samples + 1.
42 | elif measure == 'RKL':
43 | Ep = -torch.exp(-p_samples)
44 | elif measure == 'DV':
45 | Ep = p_samples
46 | elif measure == 'H2':
47 | Ep = 1. - torch.exp(-p_samples)
48 | elif measure == 'W1':
49 | Ep = p_samples
50 | else:
51 | raise_measure_error(measure)
52 |
53 | if average:
54 | return Ep.mean()
55 | else:
56 | return Ep
57 |
58 |
59 | def get_negative_expectation(q_samples, measure, average=True):
60 | """Computes the negative part of a divergence / difference.
61 |
62 | Args:
63 | q_samples: Negative samples.
64 | measure: Measure to compute for.
65 | average: Average the result over samples.
66 |
67 | Returns:
68 | torch.Tensor
69 |
70 | """
71 | log_2 = math.log(2.)
72 |
73 | if measure == 'GAN':
74 | Eq = F.softplus(-q_samples) + q_samples
75 | elif measure == 'JSD':
76 | Eq = F.softplus(-q_samples) + q_samples - log_2
77 | elif measure == 'X2':
78 | Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
79 | elif measure == 'KL':
80 | Eq = torch.exp(q_samples)
81 | elif measure == 'RKL':
82 | Eq = q_samples - 1.
83 | elif measure == 'DV':
84 | Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
85 | elif measure == 'H2':
86 | Eq = torch.exp(q_samples) - 1.
87 | elif measure == 'W1':
88 | Eq = q_samples
89 | else:
90 | raise_measure_error(measure)
91 |
92 | if average:
93 | return Eq.mean()
94 | else:
95 | return Eq
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/functions/misc.py:
--------------------------------------------------------------------------------
1 | """Miscilaneous functions.
2 |
3 | """
4 |
5 | import torch
6 |
7 |
8 | def log_sum_exp(x, axis=None):
9 | """Log sum exp function
10 |
11 | Args:
12 | x: Input.
13 | axis: Axis over which to perform sum.
14 |
15 | Returns:
16 | torch.Tensor: log sum exp
17 |
18 | """
19 | x_max = torch.max(x, axis)[0]
20 | y = torch.log((torch.exp(x - x_max)).sum(axis)) + x_max
21 | return y
22 |
23 |
24 | def random_permute(X):
25 | """Randomly permutes a tensor.
26 |
27 | Args:
28 | X: Input tensor.
29 |
30 | Returns:
31 | torch.Tensor
32 |
33 | """
34 | X = X.transpose(1, 2)
35 | b = torch.rand((X.size(0), X.size(1))).cuda()
36 | idx = b.sort(0)[1]
37 | adx = torch.range(0, X.size(1) - 1).long()
38 | X = X[idx, adx[None, :]].transpose(1, 2)
39 | return X
40 |
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/mi_networks.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/cortex_DIM/nn_modules/__pycache__/misc.cpython-37.pyc
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/encoder.py:
--------------------------------------------------------------------------------
1 | '''Basic cortex_DIM encoder.
2 |
3 | '''
4 |
5 | import torch
6 |
7 | from cortex_DIM.nn_modules.convnet import Convnet, FoldedConvnet
8 | from cortex_DIM.nn_modules.resnet import ResNet, FoldedResNet
9 |
10 |
11 | def create_encoder(Module):
12 | class Encoder(Module):
13 | '''Encoder used for cortex_DIM.
14 |
15 | '''
16 |
17 | def __init__(self, *args, local_idx=None, multi_idx=None, conv_idx=None, fc_idx=None, **kwargs):
18 | '''
19 |
20 | Args:
21 | args: Arguments for parent class.
22 | local_idx: Index in list of convolutional layers for local features.
23 | multi_idx: Index in list of convolutional layers for multiple globals.
24 | conv_idx: Index in list of convolutional layers for intermediate features.
25 | fc_idx: Index in list of fully-connected layers for intermediate features.
26 | kwargs: Keyword arguments for the parent class.
27 | '''
28 |
29 | super().__init__(*args, **kwargs)
30 |
31 | if local_idx is None:
32 | raise ValueError('`local_idx` must be set')
33 |
34 | conv_idx = conv_idx or local_idx
35 |
36 | self.local_idx = local_idx
37 | self.multi_idx = multi_idx
38 | self.conv_idx = conv_idx
39 | self.fc_idx = fc_idx
40 |
41 | def forward(self, x: torch.Tensor):
42 | '''
43 |
44 | Args:
45 | x: Input tensor.
46 |
47 | Returns:
48 | local_out, multi_out, hidden_out, global_out
49 |
50 | '''
51 |
52 | outs = super().forward(x, return_full_list=True)
53 | if len(outs) == 2:
54 | conv_out, fc_out = outs
55 | else:
56 | conv_before_out, res_out, conv_after_out, fc_out = outs
57 | conv_out = conv_before_out + res_out + conv_after_out
58 |
59 | local_out = conv_out[self.local_idx]
60 |
61 | if self.multi_idx is not None:
62 | multi_out = conv_out[self.multi_idx]
63 | else:
64 | multi_out = None
65 |
66 | if len(fc_out) > 0:
67 | if self.fc_idx is not None:
68 | hidden_out = fc_out[self.fc_idx]
69 | else:
70 | hidden_out = None
71 | global_out = fc_out[-1]
72 | else:
73 | hidden_out = None
74 | global_out = None
75 |
76 | conv_out = conv_out[self.conv_idx]
77 |
78 | return local_out, conv_out, multi_out, hidden_out, global_out
79 |
80 | return Encoder
81 |
82 |
83 | class ConvnetEncoder(create_encoder(Convnet)):
84 | pass
85 |
86 |
87 | class FoldedConvnetEncoder(create_encoder(FoldedConvnet)):
88 | pass
89 |
90 |
91 | class ResnetEncoder(create_encoder(ResNet)):
92 | pass
93 |
94 |
95 | class FoldedResnetEncoder(create_encoder(FoldedResNet)):
96 | pass
97 |
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/mi_networks.py:
--------------------------------------------------------------------------------
1 | """Module for networks used for computing MI.
2 |
3 | """
4 |
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 |
9 | from cortex_DIM.nn_modules.misc import Permute
10 |
11 |
12 | class MIFCNet(nn.Module):
13 | """Simple custom network for computing MI.
14 |
15 | """
16 | def __init__(self, n_input, n_units):
17 | """
18 |
19 | Args:
20 | n_input: Number of input units.
21 | n_units: Number of output units.
22 | """
23 | super().__init__()
24 |
25 | assert(n_units >= n_input)
26 |
27 | self.linear_shortcut = nn.Linear(n_input, n_units)
28 | self.block_nonlinear = nn.Sequential(
29 | nn.Linear(n_input, n_units),
30 | nn.BatchNorm1d(n_units),
31 | nn.ReLU(),
32 | nn.Linear(n_units, n_units)
33 | )
34 |
35 | # initialize the initial projection to a sort of noisy copy
36 | eye_mask = np.zeros((n_units, n_input), dtype=np.uint8)
37 | for i in range(n_input):
38 | eye_mask[i, i] = 1
39 |
40 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
41 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
42 |
43 | def forward(self, x):
44 | """
45 |
46 | Args:
47 | x: Input tensor.
48 |
49 | Returns:
50 | torch.Tensor: network output.
51 |
52 | """
53 | h = self.block_nonlinear(x) + self.linear_shortcut(x)
54 | return h
55 |
56 |
57 | class MI1x1ConvNet(nn.Module):
58 | """Simple custorm 1x1 convnet.
59 |
60 | """
61 | def __init__(self, n_input, n_units):
62 | """
63 |
64 | Args:
65 | n_input: Number of input units.
66 | n_units: Number of output units.
67 | """
68 |
69 | super().__init__()
70 |
71 | self.block_nonlinear = nn.Sequential(
72 | nn.Conv1d(n_input, n_units, kernel_size=1, stride=1, padding=0, bias=False),
73 | nn.BatchNorm1d(n_units),
74 | nn.ReLU(),
75 | nn.Conv1d(n_units, n_units, kernel_size=1, stride=1, padding=0, bias=True),
76 | )
77 |
78 | self.block_ln = nn.Sequential(
79 | Permute(0, 2, 1),
80 | nn.LayerNorm(n_units),
81 | Permute(0, 2, 1)
82 | )
83 |
84 | self.linear_shortcut = nn.Conv1d(n_input, n_units, kernel_size=1,
85 | stride=1, padding=0, bias=False)
86 |
87 | # initialize shortcut to be like identity (if possible)
88 | if n_units >= n_input:
89 | eye_mask = np.zeros((n_units, n_input, 1), dtype=np.uint8)
90 | for i in range(n_input):
91 | eye_mask[i, i, 0] = 1
92 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
93 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
94 |
95 | def forward(self, x):
96 | """
97 |
98 | Args:
99 | x: Input tensor.
100 |
101 | Returns:
102 | torch.Tensor: network output.
103 |
104 | """
105 | h = self.block_ln(self.block_nonlinear(x) + self.linear_shortcut(x))
106 | return h
107 |
--------------------------------------------------------------------------------
/unsupervised_TU/cortex_DIM/nn_modules/misc.py:
--------------------------------------------------------------------------------
1 | '''Various miscellaneous modules
2 |
3 | '''
4 |
5 | import torch
6 |
7 |
8 | class View(torch.nn.Module):
9 | """Basic reshape module.
10 |
11 | """
12 | def __init__(self, *shape):
13 | """
14 |
15 | Args:
16 | *shape: Input shape.
17 | """
18 | super().__init__()
19 | self.shape = shape
20 |
21 | def forward(self, input):
22 | """Reshapes tensor.
23 |
24 | Args:
25 | input: Input tensor.
26 |
27 | Returns:
28 | torch.Tensor: Flattened tensor.
29 |
30 | """
31 | return input.view(*self.shape)
32 |
33 |
34 | class Unfold(torch.nn.Module):
35 | """Module for unfolding tensor.
36 |
37 | Performs strided crops on 2d (image) tensors. Stride is assumed to be half the crop size.
38 |
39 | """
40 | def __init__(self, img_size, fold_size):
41 | """
42 |
43 | Args:
44 | img_size: Input size.
45 | fold_size: Crop size.
46 | """
47 | super().__init__()
48 |
49 | fold_stride = fold_size // 2
50 | self.fold_size = fold_size
51 | self.fold_stride = fold_stride
52 | self.n_locs = 2 * (img_size // fold_size) - 1
53 | self.unfold = torch.nn.Unfold((self.fold_size, self.fold_size),
54 | stride=(self.fold_stride, self.fold_stride))
55 |
56 | def forward(self, x):
57 | """Unfolds tensor.
58 |
59 | Args:
60 | x: Input tensor.
61 |
62 | Returns:
63 | torch.Tensor: Unfolded tensor.
64 |
65 | """
66 | N = x.size(0)
67 | x = self.unfold(x).reshape(N, -1, self.fold_size, self.fold_size, self.n_locs * self.n_locs)\
68 | .permute(0, 4, 1, 2, 3)\
69 | .reshape(N * self.n_locs * self.n_locs, -1, self.fold_size, self.fold_size)
70 | return x
71 |
72 |
73 | class Fold(torch.nn.Module):
74 | """Module (re)folding tensor.
75 |
76 | Undoes the strided crops above. Works only on 1x1.
77 |
78 | """
79 | def __init__(self, img_size, fold_size):
80 | """
81 |
82 | Args:
83 | img_size: Images size.
84 | fold_size: Crop size.
85 | """
86 | super().__init__()
87 | self.n_locs = 2 * (img_size // fold_size) - 1
88 |
89 | def forward(self, x):
90 | """(Re)folds tensor.
91 |
92 | Args:
93 | x: Input tensor.
94 |
95 | Returns:
96 | torch.Tensor: Refolded tensor.
97 |
98 | """
99 | dim_c, dim_x, dim_y = x.size()[1:]
100 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)
101 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)\
102 | .permute(0, 2, 3, 1)\
103 | .reshape(-1, dim_c * dim_x * dim_y, self.n_locs, self.n_locs).contiguous()
104 | return x
105 |
106 |
107 | class Permute(torch.nn.Module):
108 | """Module for permuting axes.
109 |
110 | """
111 | def __init__(self, *perm):
112 | """
113 |
114 | Args:
115 | *perm: Permute axes.
116 | """
117 | super().__init__()
118 | self.perm = perm
119 |
120 | def forward(self, input):
121 | """Permutes axes of tensor.
122 |
123 | Args:
124 | input: Input tensor.
125 |
126 | Returns:
127 | torch.Tensor: permuted tensor.
128 |
129 | """
130 | return input.permute(*self.perm)
131 |
--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/data.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/data.pt
--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_filter.pt
--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junxia97/SimGRACE/badf37130438416b094f7f58dbd8311123a3950b/unsupervised_TU/data/NCI1/NCI1/processed/pre_transform.pt
--------------------------------------------------------------------------------
/unsupervised_TU/data/NCI1/NCI1/raw/README.txt:
--------------------------------------------------------------------------------
1 | README for dataset NCI1
2 |
3 |
4 | === Usage ===
5 |
6 | This folder contains the following comma separated text files
7 | (replace DS by the name of the dataset):
8 |
9 | n = total number of nodes
10 | m = total number of edges
11 | N = number of graphs
12 |
13 | (1) DS_A.txt (m lines)
14 | sparse (block diagonal) adjacency matrix for all graphs,
15 | each line corresponds to (row, col) resp. (node_id, node_id)
16 |
17 | (2) DS_graph_indicator.txt (n lines)
18 | column vector of graph identifiers for all nodes of all graphs,
19 | the value in the i-th line is the graph_id of the node with node_id i
20 |
21 | (3) DS_graph_labels.txt (N lines)
22 | class labels for all graphs in the dataset,
23 | the value in the i-th line is the class label of the graph with graph_id i
24 |
25 | (4) DS_node_labels.txt (n lines)
26 | column vector of node labels,
27 | the value in the i-th line corresponds to the node with node_id i
28 |
29 | There are OPTIONAL files if the respective information is available:
30 |
31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
32 | labels for the edges in DS_A_sparse.txt
33 |
34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt)
35 | attributes for the edges in DS_A.txt
36 |
37 | (7) DS_node_attributes.txt (n lines)
38 | matrix of node attributes,
39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i
40 |
41 | (8) DS_graph_attributes.txt (N lines)
42 | regression values for all graphs in the dataset,
43 | the value in the i-th line is the attribute of the graph with graph_id i
44 |
45 |
46 | === Description ===
47 |
48 | NCI1 and NCI109 represent two balanced subsets of datasets of chemical compounds screened
49 | for activity against non-small cell lung cancer and ovarian cancer cell lines respectively
50 | (Wale and Karypis (2006) and http://pubchem.ncbi.nlm.nih.gov).
51 |
52 |
53 | === Previous Use of the Dataset ===
54 |
55 | Neumann, M., Garnett R., Bauckhage Ch., Kersting K.: Propagation Kernels: Efficient Graph
56 | Kernels from Propagated Information. Under review at MLJ.
57 |
58 | Neumann, M., Patricia, N., Garnett, R., Kersting, K.: Efficient Graph Kernels by
59 | Randomization. In: P.A. Flach, T.D. Bie, N. Cristianini (eds.) ECML/PKDD, Notes in
60 | Computer Science, vol. 7523, pp. 378-393. Springer (2012).
61 |
62 | Shervashidze, N., Schweitzer, P., van Leeuwen, E., Mehlhorn, K., Borgwardt, K.:
63 | Weisfeiler-Lehman Graph Kernels. Journal of Machine Learning Research 12, 2539-2561 (2011)
64 |
65 |
66 | === References ===
67 |
68 | N. Wale and G. Karypis. Comparison of descriptor spaces for chemical compound retrieval and
69 | classification. In Proc. of ICDM, pages 678–689, Hong Kong, 2006.
70 |
71 |
--------------------------------------------------------------------------------
/unsupervised_TU/deepinfomax.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch
3 | from torch.autograd import Variable
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import numpy as np
7 | import json
8 | # from core.encoders import *
9 |
10 | from torch_geometric.datasets import TUDataset
11 | from torch_geometric.data import DataLoader
12 | import sys
13 | import json
14 | from torch import optim
15 |
16 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet
17 | from losses import *
18 | from gin import Encoder
19 | from evaluate_embedding import evaluate_embedding
20 | from model import *
21 |
22 | from arguments import arg_parse
23 |
24 | class GcnInfomax(nn.Module):
25 | def __init__(self, hidden_dim, num_gc_layers, alpha=0.5, beta=1., gamma=.1):
26 | super(GcnInfomax, self).__init__()
27 |
28 | self.alpha = alpha
29 | self.beta = beta
30 | self.gamma = gamma
31 | self.prior = args.prior
32 |
33 | self.embedding_dim = mi_units = hidden_dim * num_gc_layers
34 | self.encoder = Encoder(dataset_num_features, hidden_dim, num_gc_layers)
35 |
36 | self.local_d = FF(self.embedding_dim)
37 | self.global_d = FF(self.embedding_dim)
38 | # self.local_d = MI1x1ConvNet(self.embedding_dim, mi_units)
39 | # self.global_d = MIFCNet(self.embedding_dim, mi_units)
40 |
41 | if self.prior:
42 | self.prior_d = PriorDiscriminator(self.embedding_dim)
43 |
44 | self.init_emb()
45 |
46 | def init_emb(self):
47 | initrange = -1.5 / self.embedding_dim
48 | for m in self.modules():
49 | if isinstance(m, nn.Linear):
50 | torch.nn.init.xavier_uniform_(m.weight.data)
51 | if m.bias is not None:
52 | m.bias.data.fill_(0.0)
53 |
54 |
55 | def forward(self, x, edge_index, batch, num_graphs):
56 |
57 | # batch_size = data.num_graphs
58 | if x is None:
59 | x = torch.ones(batch.shape[0]).to(device)
60 |
61 | y, M = self.encoder(x, edge_index, batch)
62 |
63 | g_enc = self.global_d(y)
64 | l_enc = self.local_d(M)
65 |
66 | mode='fd'
67 | measure='JSD'
68 | local_global_loss = local_global_loss_(l_enc, g_enc, edge_index, batch, measure)
69 |
70 | if self.prior:
71 | prior = torch.rand_like(y)
72 | term_a = torch.log(self.prior_d(prior)).mean()
73 | term_b = torch.log(1.0 - self.prior_d(y)).mean()
74 | PRIOR = - (term_a + term_b) * self.gamma
75 | else:
76 | PRIOR = 0
77 |
78 | return local_global_loss + PRIOR
79 |
80 | if __name__ == '__main__':
81 |
82 | args = arg_parse()
83 | # accuracies = {'logreg':[], 'svc':[], 'linearsvc':[], 'randomforest':[]}
84 | accuracies = accuracies = {'val':[], 'test':[]}
85 | epochs = 20
86 | log_interval = 1
87 | batch_size = 128
88 | lr = args.lr
89 | DS = args.DS
90 | path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', DS)
91 | # kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
92 |
93 | dataset = TUDataset(path, name=DS).shuffle()
94 | try:
95 | dataset_num_features = dataset.num_features
96 | except:
97 | dataset_num_features = 1
98 |
99 | dataloader = DataLoader(dataset, batch_size=batch_size)
100 |
101 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
102 | model = GcnInfomax(args.hidden_dim, args.num_gc_layers).to(device)
103 | optimizer = torch.optim.Adam(model.parameters(), lr=lr)
104 |
105 | print('================')
106 | print('lr: {}'.format(lr))
107 | print('num_features: {}'.format(dataset_num_features))
108 | print('hidden_dim: {}'.format(args.hidden_dim))
109 | print('num_gc_layers: {}'.format(args.num_gc_layers))
110 | print('================')
111 |
112 |
113 | model.eval()
114 | emb, y = model.encoder.get_embeddings(dataloader)
115 | res = evaluate_embedding(emb, y)
116 | # accuracies['logreg'].append(res[0])
117 | # accuracies['svc'].append(res[1])
118 | # accuracies['linearsvc'].append(res[2])
119 | # accuracies['randomforest'].append(res[3])
120 | accuracies['val'].append(res[0])
121 | accuracies['test'].append(res[1])
122 |
123 |
124 | for epoch in range(1, epochs+1):
125 | loss_all = 0
126 | model.train()
127 | for data in dataloader:
128 | data = data.to(device)
129 | optimizer.zero_grad()
130 | loss = model(data.x, data.edge_index, data.batch, data.num_graphs)
131 | loss_all += loss.item() * data.num_graphs
132 | loss.backward()
133 | optimizer.step()
134 | print('Epoch {}, Loss {}'.format(epoch, loss_all / len(dataloader)))
135 |
136 | if epoch % log_interval == 0:
137 | model.eval()
138 | emb, y = model.encoder.get_embeddings(dataloader)
139 | res = evaluate_embedding(emb, y)
140 | # accuracies['logreg'].append(res[0])
141 | # accuracies['svc'].append(res[1])
142 | # accuracies['linearsvc'].append(res[2])
143 | # accuracies['randomforest'].append(res[3])
144 | accuracies['val'].append(res[0])
145 | accuracies['test'].append(res[1])
146 |
147 | print(accuracies)
148 |
149 | tpe = ('local' if args.local else '') + ('prior' if args.prior else '')
150 | with open('new_log', 'a+') as f:
151 | s = json.dumps(accuracies)
152 | f.write('{},{},{},{},{},{},{}\n'.format(args.DS, tpe, args.num_gc_layers, epochs, log_interval, lr, s))
153 |
--------------------------------------------------------------------------------
/unsupervised_TU/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | for seed in 0 1 2 3 4
4 | do
5 | CUDA_VISIBLE_DEVICES=$1 python simgrace.py --DS $2 --lr 0.01 --local --num-gc-layers 5 --eta$3 --seed $seed
6 | done
7 |
8 |
--------------------------------------------------------------------------------
/unsupervised_TU/losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from cortex_DIM.functions.gan_losses import get_positive_expectation, get_negative_expectation
5 |
6 | def local_global_loss_(l_enc, g_enc, edge_index, batch, measure):
7 | '''
8 | Args:
9 | l: Local feature map.
10 | g: Global features.
11 | measure: Type of f-divergence. For use with mode `fd`
12 | mode: Loss mode. Fenchel-dual `fd`, NCE `nce`, or Donsker-Vadadhan `dv`.
13 | Returns:
14 | torch.Tensor: Loss.
15 | '''
16 | num_graphs = g_enc.shape[0]
17 | num_nodes = l_enc.shape[0]
18 |
19 | pos_mask = torch.zeros((num_nodes, num_graphs)).cuda()
20 | neg_mask = torch.ones((num_nodes, num_graphs)).cuda()
21 | for nodeidx, graphidx in enumerate(batch):
22 | pos_mask[nodeidx][graphidx] = 1.
23 | neg_mask[nodeidx][graphidx] = 0.
24 |
25 | res = torch.mm(l_enc, g_enc.t())
26 |
27 | E_pos = get_positive_expectation(res * pos_mask, measure, average=False).sum()
28 | E_pos = E_pos / num_nodes
29 | E_neg = get_negative_expectation(res * neg_mask, measure, average=False).sum()
30 | E_neg = E_neg / (num_nodes * (num_graphs - 1))
31 |
32 | return E_neg - E_pos
33 |
34 | def adj_loss_(l_enc, g_enc, edge_index, batch):
35 | num_graphs = g_enc.shape[0]
36 | num_nodes = l_enc.shape[0]
37 |
38 | adj = torch.zeros((num_nodes, num_nodes)).cuda()
39 | mask = torch.eye(num_nodes).cuda()
40 | for node1, node2 in zip(edge_index[0], edge_index[1]):
41 | adj[node1.item()][node2.item()] = 1.
42 | adj[node2.item()][node1.item()] = 1.
43 |
44 | res = torch.sigmoid((torch.mm(l_enc, l_enc.t())))
45 | res = (1-mask) * res
46 | # print(res.shape, adj.shape)
47 | # input()
48 |
49 | loss = nn.BCELoss()(res, adj)
50 | return loss
51 |
--------------------------------------------------------------------------------
/unsupervised_TU/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import numpy as np
6 | # from core.encoders import *
7 | import json
8 | from torch import optim
9 |
10 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet
11 | from losses import *
12 |
13 |
14 | class GlobalDiscriminator(nn.Module):
15 | def __init__(self, args, input_dim):
16 | super().__init__()
17 |
18 | self.l0 = nn.Linear(32, 32)
19 | self.l1 = nn.Linear(32, 32)
20 |
21 | self.l2 = nn.Linear(512, 1)
22 | def forward(self, y, M, data):
23 |
24 | adj = Variable(data['adj'].float(), requires_grad=False).cuda()
25 | # h0 = Variable(data['feats'].float()).cuda()
26 | batch_num_nodes = data['num_nodes'].int().numpy()
27 | M, _ = self.encoder(M, adj, batch_num_nodes)
28 | # h = F.relu(self.c0(M))
29 | # h = self.c1(h)
30 | # h = h.view(y.shape[0], -1)
31 | h = torch.cat((y, M), dim=1)
32 | h = F.relu(self.l0(h))
33 | h = F.relu(self.l1(h))
34 | return self.l2(h)
35 |
36 | class PriorDiscriminator(nn.Module):
37 | def __init__(self, input_dim):
38 | super().__init__()
39 | self.l0 = nn.Linear(input_dim, input_dim)
40 | self.l1 = nn.Linear(input_dim, input_dim)
41 | self.l2 = nn.Linear(input_dim, 1)
42 |
43 | def forward(self, x):
44 | h = F.relu(self.l0(x))
45 | h = F.relu(self.l1(h))
46 | return torch.sigmoid(self.l2(h))
47 |
48 | class FF(nn.Module):
49 | def __init__(self, input_dim):
50 | super().__init__()
51 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1)
52 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1)
53 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1)
54 | self.block = nn.Sequential(
55 | nn.Linear(input_dim, input_dim),
56 | nn.ReLU(),
57 | nn.Linear(input_dim, input_dim),
58 | nn.ReLU(),
59 | nn.Linear(input_dim, input_dim),
60 | nn.ReLU()
61 | )
62 | self.linear_shortcut = nn.Linear(input_dim, input_dim)
63 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1, stride=1, padding=0)
64 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1, stride=1, padding=0)
65 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1, stride=1, padding=0)
66 |
67 | def forward(self, x):
68 | return self.block(x) + self.linear_shortcut(x)
69 |
70 |
--------------------------------------------------------------------------------
/unsupervised_TU/readme.md:
--------------------------------------------------------------------------------
1 | ## Dependencies
2 | * [PyTorch Geometric](https://github.com/rusty1s/pytorch_geometric#installation)==1.7.0
3 |
4 | ## Training & Evaluation
5 |
6 | ```
7 | ./go.sh $GPU_ID $DATASET_NAME $ETA
8 | ```
9 |
10 | ```$DATASET_NAME``` is the dataset name (please refer to https://chrsmrrs.github.io/datasets/docs/datasets/), ```$GPU_ID``` is the lanched GPU ID and ```$ETA``` could be tuned among {0.1, 1.0, 10.0, 100.0}.
11 |
12 | ## Acknowledgements
13 | - https://github.com/Shen-Lab/GraphCL/tree/master/unsupervised_TU
14 |
15 | - https://github.com/fanyun-sun/InfoGraph/tree/master/unsupervised.
16 |
--------------------------------------------------------------------------------
/unsupervised_TU/test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import numpy as np
3 | import json
4 | import pandas as pd
5 | import collections
6 |
7 | if __name__ == '__main__':
8 |
9 | for epoch in [20, 100]:
10 | print(epoch)
11 | real_res = {'logreg':[-1], 'svc':[-1], 'linearsvc':[-1], 'randomforest':[-1]}
12 | for gc in [3, 5, 8, 16]:
13 | for lr in [0.01, 0.1, 0.001]:
14 | for tpe in ['local', 'localprior']:
15 | res = collections.defaultdict(lambda :collections.defaultdict(list))
16 | with open(sys.argv[1], 'r') as f:
17 | for line in f:
18 | x = line.strip().split(',', 6)
19 | if x[1] != tpe:
20 | continue
21 | if x[2] != str(gc):
22 | continue
23 | if x[3] != str(epoch):
24 | continue
25 | if x[5] != str(lr):
26 | continue
27 | tmp = json.loads(x[-1])
28 |
29 | DS = x[0]
30 | res[DS]['logreg'].append(tmp['logreg'])
31 | res[DS]['svc'].append(tmp['svc'])
32 | res[DS]['linearsvc'].append(tmp['linearsvc'])
33 | res[DS]['randomforest'].append(tmp['randomforest'])
34 |
35 | for DS, lst in res.items():
36 | if DS != sys.argv[2]:
37 | continue
38 | # print('====================')
39 | # print(DS)
40 | for clf, v in lst.items():
41 | mn = np.mean(np.array(v[:5]), axis=0)
42 | std = np.std(np.array(v[:5]), axis=0)
43 |
44 | idx = np.argmax(mn)
45 | if mn[idx] > real_res[clf][0] and len(v) > 1:
46 | real_res[clf] = [mn[idx], std[idx], epoch, lr, gc, idx, len(v)]
47 | # print(epoch, lr, gc, clf, idx, mn[idx], std[idx], len(v))
48 | print(real_res)
49 |
50 |
--------------------------------------------------------------------------------
|