├── README.md
├── data
    └── subgraphs
    │   └── yelp_sub.npz
└── sw
    ├── README.md
    └── redundancy_reduction
        └── rr.py


/README.md:
--------------------------------------------------------------------------------
 1 | # GraphACT: Accelerating GCN Training on CPU-FPGA Heterogeneous Platforms
 2 | 
 3 | Hanqing Zeng, Viktor Prasanna
 4 | 
 5 | Contact: 
 6 | 
 7 | Hanqing Zeng (zengh@usc.edu)
 8 | 
 9 | **Updates**
10 | 
11 | 03/05/2021: We have released the IP cores for GraphACT at [this repository](https://github.com/GraphSAINT/GNN-ARCH). 
12 |  * The IP cores improve upon the GraphACT design by supporting two computation orders of feature aggregation and weight transformation. See [our ASAP paper](https://ieeexplore.ieee.org/abstract/document/9153263) for description of the two orders.
13 |  * The IP cores now support both the training and inference algorithms on FPGA. We will add in the current repo soon the complete training architecture with those IP cores as the building block. 
14 | 
15 | We will also soon release the C++ parallel implementation of the redundancy reduction algorithm in the current repo. 
16 | 
17 | **NOTE**
18 | 
19 | * The GCN training algorithm, together with the implementation is based on the paper ``Accurate, Efficient and Scalable Graph Embedding'' in IEEE/IPDPS '19.
20 |   * Or, you can refer to our more recent [ICLR '20 paper](https://arxiv.org/abs/1907.04931) (and its [implementation](https://github.com/GraphSAINT/GraphSAINT)) for a better graph sampling based minibatch training algorithm. 
21 | * The implementation for redundancy reduction algorithm, FPGA architecture and the performance model will be uploaded soon. 
22 | 
23 | 
24 | **Citation**
25 | 
26 | ```
27 | @inproceedings{graphact,
28 |   author = {Zeng, Hanqing and Prasanna, Viktor},
29 |   title = {GraphACT: Accelerating GCN Training on CPU-FPGA Heterogeneous Platforms},
30 |   year = {2020},
31 |   isbn = {9781450370998},
32 |   publisher = {Association for Computing Machinery},
33 |   address = {New York, NY, USA},
34 |   url = {https://doi.org/10.1145/3373087.3375312},
35 |   doi = {10.1145/3373087.3375312},
36 |   booktitle = {Proceedings of the 2020 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
37 |   pages = {255–265},
38 |   numpages = {11},
39 |   location = {Seaside, CA, USA},
40 |   series = {FPGA '20}
41 | }
42 | ```
43 | 


--------------------------------------------------------------------------------
/data/subgraphs/yelp_sub.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GraphSAINT/GraphACT/ede8b95c703a3310e0f2a1ec4c77e324c6a15840/data/subgraphs/yelp_sub.npz


--------------------------------------------------------------------------------
/sw/README.md:
--------------------------------------------------------------------------------
 1 | ### Redundancy Reduction
 2 | 
 3 | Currently, the (slow) python implementation of redundancy reduction is provided. The parallel C++ implementation will come soon. 
 4 | 
 5 | To run the python redundancy reduction, go into the `redundancy_reduction` directory and execute:
 6 | 
 7 | ```
 8 | python rr.py --adj <path to subgraph adj> --round <number of rounds>
 9 | ```
10 | 
11 | An example subgraph adj can be found at `../data/subgraphs/yelp_sub.npz`
12 | 


--------------------------------------------------------------------------------
/sw/redundancy_reduction/rr.py:
--------------------------------------------------------------------------------
  1 | import scipy.sparse as sp
  2 | import scipy
  3 | import numpy as np
  4 | import argparse
  5 | 
  6 | from operator import itemgetter
  7 | import time
  8 | 
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(description='arguments for redundancy reduction')
 12 |     parser.add_argument('--adj', type=str, required=True, help='the path to the adj file (scipy.csr_matrix stored as npz)')
 13 |     parser.add_argument('--round', type=int, required=True, help='total number of rounds to perform reduction')
 14 |     args = parser.parse_args()
 15 |     return args
 16 | 
 17 | 
 18 | 
 19 | def construct_ga(adj_gs):
 20 |     t1 = time.time()
 21 |     assert adj_gs.shape[0] == adj_gs.shape[1], "diagonal of subgraph should be 0"
 22 |     num_v = adj_gs.shape[0]
 23 |     weight_edges = dict()
 24 |     for v in range(num_v):
 25 |         neigh = np.sort(adj_gs.indices[adj_gs.indptr[v]:adj_gs.indptr[v+1]])
 26 |         #assert v not in neigh
 27 |         for iu,u in enumerate(neigh):
 28 |             for w in neigh[iu+1:]:
 29 |                 if (u,w) not in weight_edges:
 30 |                     weight_edges[(u,w)] = 1
 31 |                 else:
 32 |                     weight_edges[(u,w)] += 1
 33 |     return weight_edges, num_v
 34 | 
 35 | 
 36 | def obtain_precompute_edges(weight_edges,num_v):
 37 |     """
 38 |     operate on ga
 39 |     """
 40 |     M = []
 41 |     H = {k:v for k,v in weight_edges.items() if v>2}
 42 |     H_sorted = sorted(H.items(),key=itemgetter(1,0),reverse=True)
 43 |     S = np.ones(num_v)
 44 |     _W = 0
 45 |     for (u,v),weight in H_sorted:
 46 |         if not (S[u] and S[v]):
 47 |             continue
 48 |         _W += weight-1
 49 |         S[u] = 0; S[v] = 0
 50 |         M.append((u,v))
 51 |         if len(M) == int(num_v/2):
 52 |             break
 53 |     return M,_W
 54 | 
 55 | 
 56 | def obtain_compact_mat(adj_gs,M,feat):
 57 |     """
 58 |     obtain updated gs from M
 59 |     """
 60 |     ret_feat = np.zeros(feat.size+len(M))
 61 |     ret_feat[:feat.size] = feat
 62 |     idx = 0
 63 |     deg = np.ediff1d(adj_gs.indptr)
 64 |     num_v = deg.size
 65 |     # transpose adj first
 66 |     t1 = time.time()
 67 |     e_list = [[] for v in range(adj_gs.shape[0])]
 68 |     for v in range(adj_gs.shape[0]):
 69 |         n_list = adj_gs.indices[adj_gs.indptr[v]:adj_gs.indptr[v+1]]
 70 |         for n in n_list:
 71 |             e_list[n].append(v)
 72 |     e_list_full = []
 73 |     gs_t_indptr = np.zeros(adj_gs.shape[0]+1).astype(np.int32)       # indptr for adj_gs.T
 74 |     for i,el in enumerate(e_list):
 75 |         e_list_full.extend(sorted(el))
 76 |         gs_t_indptr[i+1] = gs_t_indptr[i] + len(el)
 77 |     gs_t_indices = np.array(e_list_full).astype(np.int32)            # indices for adj_gs.T
 78 |     # prepare I_edges here, after identifying the large-weight edges
 79 |     I_edges = dict()
 80 |     for (aggr1,aggr2) in M:
 81 |         # intersection of aggr1's neighbor and aggr2's neighbor
 82 |         _neigh1 = gs_t_indices[gs_t_indptr[aggr1]:gs_t_indptr[aggr1+1]]
 83 |         _neigh2 = gs_t_indices[gs_t_indptr[aggr2]:gs_t_indptr[aggr2+1]]
 84 |         I_edges[(aggr1,aggr2)] = np.intersect1d(_neigh1,_neigh2,assume_unique=True)
 85 |     for (aggr1,aggr2) in M:
 86 |         v_root = I_edges[(aggr1,aggr2)]
 87 |         ret_feat[num_v+idx] = ret_feat[aggr1]+ret_feat[aggr2]
 88 |         for v in v_root:
 89 |             neigh = adj_gs.indices[adj_gs.indptr[v]:adj_gs.indptr[v+1]]
 90 |             i1 = np.where(neigh==aggr1)[0][0]
 91 |             i2 = np.where(neigh==aggr2)[0][0]       # searchsorted not applicable here since we insert -1
 92 |             adj_gs.indices[adj_gs.indptr[v]+i1] = num_v+idx
 93 |             adj_gs.indices[adj_gs.indptr[v]+i2] = -1
 94 |             deg[v] -= 1
 95 |         idx += 1
 96 |     _indptr_new = np.cumsum(deg)
 97 |     indptr_new = np.zeros(num_v+idx+1)
 98 |     indptr_new[1:num_v+1] = _indptr_new
 99 |     indptr_new[num_v+1:] = _indptr_new[-1]
100 |     indices_new = adj_gs.indices[np.where(adj_gs.indices>-1)]
101 |     assert indices_new.size == indptr_new[-1]
102 |     data_new = np.ones(indices_new.size)
103 |     ret_adj = sp.csr_matrix((data_new,indices_new,indptr_new),shape=(num_v+len(M),num_v+len(M)))
104 |     return ret_adj, ret_feat
105 | 
106 | 
107 | f_tot_ops = lambda adj: adj.size-np.where(np.ediff1d(adj.indptr)>0)[0].size
108 | f_tot_read = lambda adj: adj.size#-np.where(np.ediff1d(adj.indptr)==1)[0].size
109 | max_deg = lambda adj: np.ediff1d(adj.indptr).max()
110 | mean_deg = lambda adj: np.ediff1d(adj.indptr).mean()
111 | sigma_deg2 = lambda adj: (np.ediff1d(adj.indptr)**2).sum()/adj.shape[0]
112 | 
113 | 
114 | def main(adj, num_round):
115 |     adj_gs = sp.load_npz(adj)
116 |     num_v_orig = adj_gs.shape[0]
117 |     tot_ops_orig = f_tot_ops(adj_gs)
118 |     tot_read_orig = f_tot_read(adj_gs)
119 |     feat = np.random.rand(adj_gs.shape[0])
120 |     ground_truth = adj_gs@feat.reshape(-1,1)
121 |     cnt_precompute = 0
122 |     cnt_preread = 0
123 |     for r in range(num_round):
124 |         print("max deg: {}, avg deg: {:.2f}, (\Sigma deg^2)/|V|: {}".format(max_deg(adj_gs),mean_deg(adj_gs),sigma_deg2(adj_gs)))
125 |         ops_prev = f_tot_ops(adj_gs)
126 |         weight_edges,num_v = construct_ga(adj_gs)
127 |         M,_W = obtain_precompute_edges(weight_edges,num_v)
128 |         cnt_precompute += len(M)
129 |         cnt_preread += 2*len(M)
130 |         adj_gs,feat = obtain_compact_mat(adj_gs,M,feat)
131 |         ops_new = f_tot_ops(adj_gs) + cnt_precompute
132 |         read_new = f_tot_read(adj_gs) + cnt_preread
133 |         print("previous ops: ", ops_prev)
134 |         print("new ops: ", ops_new)
135 |         print("match size: ",len(M))
136 |         print("reduction comp compared to original: {:.2f} (precompute {:.3f} of original total ops, temp buffer {:.3f}% of |V|)"\
137 |             .format(tot_ops_orig/ops_new,cnt_precompute/tot_ops_orig,cnt_precompute/num_v_orig*100))
138 |         print("reduction comm compared to original: {:.2f}".format(tot_read_orig/read_new))
139 |     optimized_result = adj_gs@feat.reshape(-1,1)
140 |     np.testing.assert_allclose(ground_truth, optimized_result[:ground_truth.size], rtol=1e-8, atol=0)
141 |     print("RESULT CORRECT!")
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     args = parse_args()
146 |     main(args.adj, args.round)
147 | 


--------------------------------------------------------------------------------