├── README.md
├── louvain_baseline.py
├── data-analysis-cnm.py
├── data_processing.ipynb
└── GNN.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # CS224W_Hollywood_Graph_Networks
2 | Community detection and node classification on Hollywood actors using various models: Louvain, Clauset-Newman-Moore, GCN, GraphSage, and GAT. 
3 | 


--------------------------------------------------------------------------------
/louvain_baseline.py:
--------------------------------------------------------------------------------
 1 | import snap
 2 | import numpy as np
 3 | import networkx as nx
 4 | import matplotlib.pyplot as plt
 5 | import community
 6 | 
 7 | def main():
 8 | 	num_nodes = [] # list of number of nodes in each community
 9 | 	edges_file = "edges-100k_copy.txt"
10 | 	G = nx.read_edgelist(edges_file) # nodetype=int
11 | 	print(G.number_of_edges())
12 | 	partition = community.best_partition(G)
13 | 
14 | 	size = float(len(set(partition.values())))
15 | 	pos = nx.spring_layout(G)
16 | 	count = 0.
17 | 	for com in set(partition.values()):
18 | 		count += 1.
19 | 		list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com]
20 | 		print(list_nodes)
21 | 		print("\n")
22 | 		num_nodes.append(len(list_nodes))
23 | 		nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20, node_color = str(count / size))
24 | 
25 | 	plt.hist(num_nodes, log=True)
26 | 	plt.xlabel = "Size of Community"
27 | 	plt.ylabel = "Number of Communities"
28 | 
29 | 	nx.draw_networkx_edges(G, pos, alpha=0.5)
30 | 	plt.show()
31 | 
32 | 
33 | main()


--------------------------------------------------------------------------------
/data-analysis-cnm.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | 
 3 | import snap
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | G1 = snap.LoadEdgeList(snap.PUNGraph, "edges-100k.txt", 0, 1)
 7 | 
 8 | print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges()))
 9 | print("Number of Nodes: %d" % G1.GetNodes())
10 | 
11 | # 1.6 number of nodes of zero degree
12 | print("Number of nodes of zero degree: %d" % snap.CntDegNodes(G1, 0))
13 | 
14 | # Get in degree distribution
15 | DegToCntV = snap.TIntPrV()
16 | snap.GetDegCnt(G1, DegToCntV)
17 | degree = []
18 | numNodes = []
19 | sumDegrees = 0
20 | for item in DegToCntV:
21 |     degree.append(item.GetVal1())
22 |     numNodes.append(item.GetVal2())
23 |     sumDegrees += item.GetVal1()*item.GetVal2()
24 |     #print("%d nodes with in-degree %d" % (item.GetVal2(), item.GetVal1()))
25 | 
26 | plt.plot(degree, numNodes)
27 | plt.yscale('log')
28 | plt.xscale('log')
29 | plt.ylabel('frequency')
30 | plt.xlabel('degree')
31 | plt.title('Degree distribution')
32 | plt.savefig('degreeDist.png')
33 | plt.clf()
34 | 
35 | # Get average degree
36 | print("Average degree:", sumDegrees/float(sum(numNodes)))
37 | 
38 | # Get largest strongly connected component
39 | MxScc = snap.GetMxScc(G1)
40 | print("Size of largest strongly connected component:", MxScc.GetNodes())
41 | 
42 | # Get strongly connected components
43 | Components = snap.TCnComV()
44 | snap.GetWccs(G1, Components)
45 | wcc_sizes = []
46 | for CnCom in Components:
47 |     wcc_sizes.append(CnCom.Len())
48 | 
49 | print("Number of connected components:", len(wcc_sizes))
50 | 
51 | # Clauset-Newman-Moore community detection
52 | CmtyV = snap.TCnComV()
53 | modularity = snap.CommunityCNM(G1, CmtyV)
54 | count = 0
55 | sizes = []
56 | communities = []
57 | for Cmty in CmtyV:
58 |     listcmty = []
59 |     for NI in Cmty:
60 |         listcmty.append(NI)
61 | 
62 |     communities.append(listcmty)
63 |     count += 1
64 |     sizes.append(len(listcmty))
65 | print("Number of communities:", count)
66 | print("Largest community:", max(sizes))
67 | print("Smallest community:", min(sizes))
68 | print("Community 21:", communities[21])
69 | print("Community 101:", communities[101])
70 | print("Community 10,000:", communities[10000])
71 | 
72 | # plot histogram of community sizes
73 | sizes.sort()
74 | plt.hist(sizes, log=True)
75 | plt.xlabel("Size of community")
76 | plt.ylabel("Number of communities")
77 | plt.title("Sizes of CNM communities")
78 | plt.show()
79 | plt.savefig("cnm-sizes.png")
80 | #print("Size of communities:", sizes)
81 | print("The modularity of the network is %f" % modularity)
82 | 


--------------------------------------------------------------------------------
/data_processing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "224.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "code",
 18 |       "metadata": {
 19 |         "id": "BGjJro5-IN3D",
 20 |         "colab_type": "code",
 21 |         "outputId": "4ae092e4-ec9d-4817-d4dc-60bda20187ce",
 22 |         "colab": {
 23 |           "base_uri": "https://localhost:8080/",
 24 |           "height": 122
 25 |         }
 26 |       },
 27 |       "source": [
 28 |         "pip install snap-stanford"
 29 |       ],
 30 |       "execution_count": 0,
 31 |       "outputs": [
 32 |         {
 33 |           "output_type": "stream",
 34 |           "text": [
 35 |             "Collecting snap-stanford\n",
 36 |             "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/0d/18/4694293d1d58ee92a1f85fa09b4b1348b849d1f35470cf296b238fa20a8d/snap_stanford-5.0.0-cp36-cp36m-manylinux1_x86_64.whl (11.2MB)\n",
 37 |             "\u001b[K     |████████████████████████████████| 11.2MB 2.6MB/s \n",
 38 |             "\u001b[?25hInstalling collected packages: snap-stanford\n",
 39 |             "Successfully installed snap-stanford-5.0.0\n"
 40 |           ],
 41 |           "name": "stdout"
 42 |         }
 43 |       ]
 44 |     },
 45 |     {
 46 |       "cell_type": "code",
 47 |       "metadata": {
 48 |         "id": "6iOxcCFuIeco",
 49 |         "colab_type": "code",
 50 |         "outputId": "4ce0d394-d392-40aa-8c26-53421c5e5fdb",
 51 |         "colab": {
 52 |           "base_uri": "https://localhost:8080/",
 53 |           "height": 122
 54 |         }
 55 |       },
 56 |       "source": [
 57 |         "from google.colab import drive\n",
 58 |         "drive.mount('/content/drive')"
 59 |       ],
 60 |       "execution_count": 0,
 61 |       "outputs": [
 62 |         {
 63 |           "output_type": "stream",
 64 |           "text": [
 65 |             "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
 66 |             "\n",
 67 |             "Enter your authorization code:\n",
 68 |             "··········\n",
 69 |             "Mounted at /content/drive\n"
 70 |           ],
 71 |           "name": "stdout"
 72 |         }
 73 |       ]
 74 |     },
 75 |     {
 76 |       "cell_type": "code",
 77 |       "metadata": {
 78 |         "id": "22xju41GH9ZC",
 79 |         "colab_type": "code",
 80 |         "colab": {}
 81 |       },
 82 |       "source": [
 83 |         "import snap\n",
 84 |         "import pandas as pd\n",
 85 |         "\n",
 86 |         " # prefix for Sophia: My Drive; prefix for Nidhi & Flora: Shared with me\n",
 87 |         "\n",
 88 |         "def read_data():\n",
 89 |         "\tfilename = \"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\n",
 90 |         "\tdf = pd.read_csv(filename)\n",
 91 |         "\n",
 92 |         "\tid_counter = 0\n",
 93 |         "\tname_2_id = {}\n",
 94 |         "\tf_edges = open('drive/My Drive/CS224W/Colab Notebooks/edges.txt', 'w')\n",
 95 |         "\n",
 96 |         "\tG = snap.TUNGraph.New()\n",
 97 |         "\n",
 98 |         "\tfor idx, row in df.iterrows():\n",
 99 |         "\t\tmovie_cast = [] # id's of actors that worked on movie together\n",
100 |         "\t\tcast_blobs = row['cast'].split(\"}\")\n",
101 |         "\t\tfor actor in cast_blobs:\n",
102 |         "\t\t\tstart_ind = actor.find(\"\\'name\\':\")\n",
103 |         "\t\t\tname = actor[start_ind:].split(\",\")[0][9:-1]\n",
104 |         "\t\t\tname = name.strip()\n",
105 |         "\n",
106 |         "\t\t\tif name != \"\" and name not in name_2_id:\n",
107 |         "\t\t\t\tname_2_id[name] = id_counter\n",
108 |         "\t\t\t\tid_counter += 1\n",
109 |         "\t\t\t\n",
110 |         "\t\t\tif name != \"\":\n",
111 |         "\t\t\t\tmovie_cast.append(name_2_id[name])\n",
112 |         "\t\t\n",
113 |         "\t\tfor id_ in movie_cast:\n",
114 |         "\t\t\tif not G.IsNode(id_):\n",
115 |         "\t\t\t\tG.AddNode(id_)\n",
116 |         "\n",
117 |         "\t\tfor id_1 in movie_cast:\n",
118 |         "\t\t\tfor id_2 in movie_cast:\n",
119 |         "\t\t\t\tif id_1 != id_2 and not G.IsEdge(id_1, id_2):\n",
120 |         "\t\t\t\t\tG.AddEdge(id_1, id_2)\n",
121 |         "\t\t\t\t\tf_edges.write(str(id_1) + ' ' + str(id_2) + '\\n')\n",
122 |         "\t\n",
123 |         "\treturn name_2_id\n",
124 |         "\t\n",
125 |         "\tprint(G.GetEdges()) # 6183763\n",
126 |         "\tprint(G.GetNodes()) # 202747; same as len(name_2_id)\n",
127 |         "\n",
128 |         "\tf_actors = open('drive/My Drive/CS224W/Colab Notebooks/actor_name_to_id.txt', 'w')\n",
129 |         "\tfor key,val in name_2_id.items():\n",
130 |         "\t\tf_actors.write(key + ', ' + str(val) + '\\n')\n",
131 |         "\n",
132 |         "actor_name_2_id = read_data()"
133 |       ],
134 |       "execution_count": 0,
135 |       "outputs": []
136 |     },
137 |     {
138 |       "cell_type": "code",
139 |       "metadata": {
140 |         "id": "ArSvzXsPg0IV",
141 |         "colab_type": "code",
142 |         "outputId": "a1052926-08b5-4d53-8be7-af9f0a7e9979",
143 |         "colab": {
144 |           "base_uri": "https://localhost:8080/",
145 |           "height": 238
146 |         }
147 |       },
148 |       "source": [
149 |         "ids = [179, 183, 2496, 2525, 2530, 2511, 2524, 17341, 66647, 71523, 109179, 184005, 154303]\n",
150 |         "for name, node_id in actor_name_2_id.items():    # for name, age in dictionary.iteritems():  (for Python 2.x)\n",
151 |         "    if node_id in ids:\n",
152 |         "        print(name, node_id)\n"
153 |       ],
154 |       "execution_count": 0,
155 |       "outputs": [
156 |         {
157 |           "output_type": "stream",
158 |           "text": [
159 |             "Anthony Mondal 179\n",
160 |             "Michael Cline 183\n",
161 |             "Dina Meyer 2496\n",
162 |             "Warren Sulatycky 2511\n",
163 |             "Coyote Shivers 2524\n",
164 |             "Lynne Adams 2525\n",
165 |             "Glenn Bang 2530\n",
166 |             "Javon Barnwell 17341\n",
167 |             "Sonny Marinelli 66647\n",
168 |             "Beverly Murray 71523\n",
169 |             "Bronwen Booth 109179\n",
170 |             "Denis Sandler 154303\n",
171 |             "Antoni Petorozliev 184005\n"
172 |           ],
173 |           "name": "stdout"
174 |         }
175 |       ]
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "metadata": {
180 |         "id": "SAt3qEs9ZrpG",
181 |         "colab_type": "code",
182 |         "outputId": "72eff771-8bb2-49e3-bc09-37d7cb27bbf1",
183 |         "colab": {
184 |           "base_uri": "https://localhost:8080/",
185 |           "height": 334
186 |         }
187 |       },
188 |       "source": [
189 |         "def create_movie_list():\n",
190 |         "  filename = \"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\n",
191 |         "  #filename = \"drive/My Drive/Colab Notebooks/credits.csv\"\n",
192 |         "  df = pd.read_csv(filename)\n",
193 |         "  name_2_movies = {}\n",
194 |         "  \n",
195 |         "  for idx, row in df.iterrows():\n",
196 |         "    cast_blobs = row['cast'].split(\"}\")\n",
197 |         "    movie_id = row['id']\n",
198 |         "    \n",
199 |         "    for actor in cast_blobs:\n",
200 |         "      start_ind = actor.find(\"\\'name\\':\")\n",
201 |         "      name = actor[start_ind:].split(\",\")[0][9:-1]\n",
202 |         "      \n",
203 |         "      if name == \"\":\n",
204 |         "        continue\n",
205 |         "      if name in name_2_movies:\n",
206 |         "        movies_list_so_far = name_2_movies[name]\n",
207 |         "        movies_list_so_far.append(movie_id)\n",
208 |         "        name_2_movies[name] = movies_list_so_far\n",
209 |         "      else:\n",
210 |         "        name_2_movies[name] = [movie_id]\n",
211 |         "\n",
212 |         "  return name_2_movies\n",
213 |         "name_2_movies = create_movie_list()\n",
214 |         "# print(len(name_2_movies)) # 202747 actors (nodes)"
215 |       ],
216 |       "execution_count": 0,
217 |       "outputs": [
218 |         {
219 |           "output_type": "error",
220 |           "ename": "NameError",
221 |           "evalue": "ignored",
222 |           "traceback": [
223 |             "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
224 |             "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
225 |             "\u001b[0;32m<ipython-input-3-0a307f263a2f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m   \u001b[0;32mreturn\u001b[0m \u001b[0mname_2_movies\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mname_2_movies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_movie_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     26\u001b[0m \u001b[0;31m# print(len(name_2_movies)) # 202747 actors (nodes)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
226 |             "\u001b[0;32m<ipython-input-3-0a307f263a2f>\u001b[0m in \u001b[0;36mcreate_movie_list\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m   \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m   \u001b[0;31m#filename = \"drive/My Drive/Colab Notebooks/credits.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m   \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m   \u001b[0mname_2_movies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
227 |             "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined"
228 |           ]
229 |         }
230 |       ]
231 |     },
232 |     {
233 |       "cell_type": "code",
234 |       "metadata": {
235 |         "id": "bsnNuO4jZyce",
236 |         "colab_type": "code",
237 |         "outputId": "96bb07b0-38e7-4f85-f237-f5797b0f402c",
238 |         "colab": {
239 |           "base_uri": "https://localhost:8080/",
240 |           "height": 89
241 |         }
242 |       },
243 |       "source": [
244 |         "def create_genre_list(name_2_movies):\n",
245 |         "  #filename = \"drive/My Drive/CS224W/Colab Notebooks/movies_metadata.csv\"\n",
246 |         "  filename = \"drive/My Drive/Colab Notebooks/movies_metadata.csv\"\n",
247 |         "  df = pd.read_csv(filename)\n",
248 |         "\n",
249 |         "  genre_2_id = {} # map of each genre to unique, arbitrary id\n",
250 |         "  id_ctr = 0\n",
251 |         "  # create map of movie id to genre list\n",
252 |         "  movie_2_genres = {}\n",
253 |         "  for idx, row in df.iterrows():\n",
254 |         "    movie_id = row['id']\n",
255 |         "\n",
256 |         "    genres_for_movie = []\n",
257 |         "    genre_blob = row['genres'].split(\"}\")\n",
258 |         "\n",
259 |         "    for genre in genre_blob:\n",
260 |         "      start_ind = genre.find(\"\\'name\\':\")\n",
261 |         "      genre_name = genre[start_ind:].split(\",\")[0][9:-1]\n",
262 |         "\n",
263 |         "      if genre_name != \"\":\n",
264 |         "        genres_for_movie.append(genre_name)\n",
265 |         "\n",
266 |         "        if genre_name not in genre_2_id:\n",
267 |         "          genre_2_id[genre_name] = id_ctr\n",
268 |         "          id_ctr += 1\n",
269 |         "\n",
270 |         "    movie_2_genres[movie_id] = genres_for_movie\n",
271 |         "  return movie_2_genres\n",
272 |         "\n",
273 |         "movie_2_genres = create_genre_list(name_2_movies)"
274 |       ],
275 |       "execution_count": 0,
276 |       "outputs": [
277 |         {
278 |           "output_type": "stream",
279 |           "text": [
280 |             "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
281 |             "  if self.run_code(code, result):\n"
282 |           ],
283 |           "name": "stderr"
284 |         },
285 |         {
286 |           "output_type": "stream",
287 |           "text": [
288 |             "{'Animation': 0, 'Comedy': 1, 'Family': 2, 'Adventure': 3, 'Fantasy': 4, 'Romance': 5, 'Drama': 6, 'Action': 7, 'Crime': 8, 'Thriller': 9, 'Horror': 10, 'History': 11, 'Science Fiction': 12, 'Mystery': 13, 'War': 14, 'Foreign': 15, 'Music': 16, 'Documentary': 17, 'Western': 18, 'TV Movie': 19, 'Carousel Productions': 20, 'Vision View Entertainment': 21, 'Telescene Film Group Productions': 22, 'Aniplex': 23, 'GoHands': 24, 'BROSTA TV': 25, 'Mardock Scramble Production Committee': 26, 'Sentai Filmworks': 27, 'Odyssey Media': 28, 'Pulser Productions': 29, 'Rogue State': 30, 'The Cartel': 31}\n"
289 |           ],
290 |           "name": "stdout"
291 |         }
292 |       ]
293 |     },
294 |     {
295 |       "cell_type": "code",
296 |       "metadata": {
297 |         "id": "TMMlcwEdISrm",
298 |         "colab_type": "code",
299 |         "colab": {}
300 |       },
301 |       "source": [
302 |         "def create_actor_genres(movie_2_genres):\n",
303 |         "  name_2_genres_map = {}\n",
304 |         "  for actor, movies_list in name_2_movies.items():\n",
305 |         "    genres_map_for_the_actor = {}\n",
306 |         "    for movie in movies_list:\n",
307 |         "      movie = str(movie)\n",
308 |         "      list_genres = movie_2_genres[movie]\n",
309 |         "      for genre in list_genres:\n",
310 |         "        if genre not in genres_map_for_the_actor:\n",
311 |         "          genres_map_for_the_actor[genre] = 1\n",
312 |         "        else:\n",
313 |         "          genres_map_for_the_actor[genre] += 1\n",
314 |         "    name_2_genres_map[actor] = genres_map_for_the_actor\n",
315 |         "\n",
316 |         "  return name_2_genres_map\n",
317 |         "\n",
318 |         "name_2_genres = create_actor_genres(movie_2_genres)"
319 |       ],
320 |       "execution_count": 0,
321 |       "outputs": []
322 |     },
323 |     {
324 |       "cell_type": "code",
325 |       "metadata": {
326 |         "id": "5niHYsN1aZcR",
327 |         "colab_type": "code",
328 |         "outputId": "7889541b-66fc-4f54-c51a-aacd59ae9be2",
329 |         "colab": {
330 |           "base_uri": "https://localhost:8080/",
331 |           "height": 102
332 |         }
333 |       },
334 |       "source": [
335 |         "def get_top_genre_per_actor(name_2_genres):\n",
336 |         "  actor_2_top_genre = {}\n",
337 |         "  other_actors = []\n",
338 |         "  for actor, genre_map in name_2_genres.items():\n",
339 |         "    actor = actor.strip()\n",
340 |         "    top_genre = \"\"\n",
341 |         "    max_val = 0\n",
342 |         "\n",
343 |         "    for genre, count in genre_map.items():\n",
344 |         "      if count > max_val:\n",
345 |         "        max_val = count\n",
346 |         "        top_genre = genre\n",
347 |         "\n",
348 |         "    if top_genre != \"\":\n",
349 |         "      actor_2_top_genre[actor] = top_genre\n",
350 |         "    else:\n",
351 |         "      other_actors.append(actor)\n",
352 |         "  return actor_2_top_genre, other_actors\n",
353 |         "\n",
354 |         "actor_2_top_genre, other_actors = get_top_genre_per_actor(name_2_genres)\n",
355 |         "# print(len(actor_2_top_genre)) # 199710 actors (nodes)\n",
356 |         "# print(actor_2_top_genre['Tom Hanks'])\n",
357 |         "unique_top_genres = set()\n",
358 |         "for actor, top_genre in actor_2_top_genre.items():\n",
359 |         "  unique_top_genres.add(top_genre)\n",
360 |         "print(unique_top_genres)\n",
361 |         "print(len(unique_top_genres))\n",
362 |         "\n",
363 |         "def new_genre_id_map(unique_top_genres):\n",
364 |         "  id_ctr = 0\n",
365 |         "  top_genre_2_id = {}\n",
366 |         "  for genre in unique_top_genres:\n",
367 |         "    top_genre_2_id[genre] = id_ctr\n",
368 |         "    id_ctr += 1\n",
369 |         "  top_genre_2_id[\"MISC\"] = id_ctr\n",
370 |         "  return top_genre_2_id\n",
371 |         "\n",
372 |         "top_genre_2_id = new_genre_id_map(unique_top_genres)\n",
373 |         "print(top_genre_2_id)"
374 |       ],
375 |       "execution_count": 0,
376 |       "outputs": [
377 |         {
378 |           "output_type": "stream",
379 |           "text": [
380 |             "Comedy\n",
381 |             "{'Foreign', 'Science Fiction', 'Drama', 'TV Movie', 'Animation', 'Crime', 'Comedy', 'Documentary', 'Adventure', 'Western', 'Thriller', 'Action', 'Horror', 'War', 'Romance', 'Mystery', 'Fantasy', 'Music', 'Family', 'History'}\n",
382 |             "20\n",
383 |             "{'Foreign': 0, 'Science Fiction': 1, 'Drama': 2, 'TV Movie': 3, 'Animation': 4, 'Crime': 5, 'Comedy': 6, 'Documentary': 7, 'Adventure': 8, 'Western': 9, 'Thriller': 10, 'Action': 11, 'Horror': 12, 'War': 13, 'Romance': 14, 'Mystery': 15, 'Fantasy': 16, 'Music': 17, 'Family': 18, 'History': 19, 'MISC': 20}\n"
384 |           ],
385 |           "name": "stdout"
386 |         }
387 |       ]
388 |     },
389 |     {
390 |       "cell_type": "code",
391 |       "metadata": {
392 |         "id": "7h2QwD0ObNc5",
393 |         "colab_type": "code",
394 |         "outputId": "5dca18aa-bc58-4611-c119-2901f0dfc5de",
395 |         "colab": {
396 |           "base_uri": "https://localhost:8080/",
397 |           "height": 33
398 |         }
399 |       },
400 |       "source": [
401 |         "def get_actor_top_genre_id(actor_2_top_genre, top_genre_2_id):\n",
402 |         "  actor_2_genre_id = {}\n",
403 |         "  for actor, top_genre in actor_2_top_genre.items():\n",
404 |         "    actor_2_genre_id[actor] = top_genre_2_id[top_genre]\n",
405 |         "  return actor_2_genre_id\n",
406 |         "\n",
407 |         "actor_2_genre_id = get_actor_top_genre_id(actor_2_top_genre, top_genre_2_id)\n",
408 |         "print(len(actor_2_genre_id))"
409 |       ],
410 |       "execution_count": 0,
411 |       "outputs": [
412 |         {
413 |           "output_type": "stream",
414 |           "text": [
415 |             "199687\n"
416 |           ],
417 |           "name": "stdout"
418 |         }
419 |       ]
420 |     },
421 |     {
422 |       "cell_type": "code",
423 |       "metadata": {
424 |         "id": "jZEVkR8Io5Si",
425 |         "colab_type": "code",
426 |         "outputId": "37f16fd0-cfb7-4ec7-c6eb-4d49b9228741",
427 |         "colab": {
428 |           "base_uri": "https://localhost:8080/",
429 |           "height": 118
430 |         }
431 |       },
432 |       "source": [
433 |         "def get_node_id_top_genre_id(actor_2_genre_id):\n",
434 |         "  filename = \"drive/My Drive/Colab Notebooks/actor_name_to_id.txt\"\n",
435 |         "  #filename = \"drive/My Drive/CS224W/Colab Notebooks/actor_name_to_id.txt\"\n",
436 |         "  df = pd.read_csv(filename, sep=\", \", header=None)\n",
437 |         "  df.columns = [\"name\", 'id']\n",
438 |         "  df.drop_duplicates(subset =\"name\", keep = False, inplace = True)\n",
439 |         "\n",
440 |         "  print(df.shape)\n",
441 |         "  print(len(actor_2_genre_id))\n",
442 |         "\n",
443 |         "  node_id_2_genre_id = {}\n",
444 |         "\n",
445 |         "  for idx, row in df.iterrows():\n",
446 |         "    name = row[0]\n",
447 |         "    node_id = row[1]\n",
448 |         "    \n",
449 |         "    if name in actor_2_genre_id:\n",
450 |         "      genre_id = actor_2_genre_id[name]\n",
451 |         "      node_id_2_genre_id[node_id] = genre_id\n",
452 |         "    else:\n",
453 |         "      node_id_2_genre_id[node_id] = len(top_genre_2_id) - 1\n",
454 |         "\n",
455 |         "  return node_id_2_genre_id\n",
456 |         "\n",
457 |         "node_id_2_genre_id = get_node_id_top_genre_id(actor_2_genre_id)\n",
458 |         "print(len(node_id_2_genre_id))"
459 |       ],
460 |       "execution_count": 0,
461 |       "outputs": [
462 |         {
463 |           "output_type": "stream",
464 |           "text": [
465 |             "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n",
466 |             "  after removing the cwd from sys.path.\n"
467 |           ],
468 |           "name": "stderr"
469 |         },
470 |         {
471 |           "output_type": "stream",
472 |           "text": [
473 |             "(202721, 2)\n",
474 |             "199687\n",
475 |             "202721\n"
476 |           ],
477 |           "name": "stdout"
478 |         }
479 |       ]
480 |     },
481 |     {
482 |       "cell_type": "code",
483 |       "metadata": {
484 |         "id": "92TIVoLkpzeq",
485 |         "colab_type": "code",
486 |         "colab": {}
487 |       },
488 |       "source": [
489 |         "# def make_new_edge_list(node_id_2_genre_id, actor_name_2_id): # recreate edges.txt file (just including nodes which have a top genre)\n",
490 |         "\n",
491 |         "#   f_edges_old = open('drive/My Drive/Colab Notebooks/edges.txt', 'r')\n",
492 |         "#   f_edges_new = open('drive/My Drive/Colab Notebooks/new_edges.txt', 'w')\n",
493 |         "\n",
494 |         "#   for line in f_edges_old:\n",
495 |         "#     ids = line.split(\" \")\n",
496 |         "#     id_1 = ids[0]\n",
497 |         "#     id_2 = ids[1]\n",
498 |         "#     if int(id_1) in node_id_2_genre_id and int(id_2) in node_id_2_genre_id:\n",
499 |         "#       f_edges_new.write(id_1 + ' ' + id_2 + '\\n')\n",
500 |         "\n",
501 |         "# make_new_edge_list(node_id_2_genre_id, actor_name_2_id) # 6156707 edges now"
502 |       ],
503 |       "execution_count": 0,
504 |       "outputs": []
505 |     },
506 |     {
507 |       "cell_type": "code",
508 |       "metadata": {
509 |         "id": "X1qp1PARU0oU",
510 |         "colab_type": "code",
511 |         "colab": {}
512 |       },
513 |       "source": [
514 |         "def get_true_labels(node_id_2_genre_id):\n",
515 |         "  f_labels = open('drive/My Drive/Colab Notebooks/labels_final.txt', 'w')\n",
516 |         "\n",
517 |         "  for i in range(len(node_id_2_genre_id)):\n",
518 |         "    f_labels.write(str(node_id_2_genre_id[i]) + '\\n')\n",
519 |         "\n",
520 |         "get_true_labels(node_id_2_genre_id)"
521 |       ],
522 |       "execution_count": 0,
523 |       "outputs": []
524 |     },
525 |     {
526 |       "cell_type": "code",
527 |       "metadata": {
528 |         "id": "ZILaAqkZXXXr",
529 |         "colab_type": "code",
530 |         "colab": {}
531 |       },
532 |       "source": [
533 |         "# f_labels = open('drive/My Drive/Colab Notebooks/labels_final.txt', 'r')\n",
534 |         "\n",
535 |         "# for line in f_labels:\n",
536 |         "#   if int(line) > 20:\n",
537 |         "#     print(\"ISSUE\")"
538 |       ],
539 |       "execution_count": 0,
540 |       "outputs": []
541 |     },
542 |     {
543 |       "cell_type": "code",
544 |       "metadata": {
545 |         "id": "x3A9PG6TcgKV",
546 |         "colab_type": "code",
547 |         "colab": {}
548 |       },
549 |       "source": [
550 |         ""
551 |       ],
552 |       "execution_count": 0,
553 |       "outputs": []
554 |     }
555 |   ]
556 | }


--------------------------------------------------------------------------------
/GNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "GNN.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "machine_shape": "hm"
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "h8e9tfWcjPUs",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "Make a copy of this notebook. When running this notebook on Colab, ensure that you've set your Runtime > Change runtime type to Python 3 and GPU.\n",
 26 |         "\n",
 27 |         "---\n",
 28 |         "\n"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "code",
 33 |       "metadata": {
 34 |         "id": "7oLWZWPPqo-l",
 35 |         "colab_type": "code",
 36 |         "colab": {}
 37 |       },
 38 |       "source": [
 39 |         "!pip install --verbose --no-cache-dir torch-scatter\n",
 40 |         "!pip install --verbose --no-cache-dir torch-sparse\n",
 41 |         "!pip install --verbose --no-cache-dir torch-cluster\n",
 42 |         "!pip install torch-geometric\n",
 43 |         "!pip install tensorboardX\n",
 44 |         "!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip\n",
 45 |         "!unzip ngrok-stable-linux-amd64.zip"
 46 |       ],
 47 |       "execution_count": 0,
 48 |       "outputs": []
 49 |     },
 50 |     {
 51 |       "cell_type": "code",
 52 |       "metadata": {
 53 |         "id": "STDeeipoLBCS",
 54 |         "colab_type": "code",
 55 |         "colab": {}
 56 |       },
 57 |       "source": [
 58 |         "from torch_geometric.data import InMemoryDataset\n",
 59 |         "from torch_geometric.data import Data\n",
 60 |         "import os.path as osp\n",
 61 |         "import torch.optim as optim"
 62 |       ],
 63 |       "execution_count": 0,
 64 |       "outputs": []
 65 |     },
 66 |     {
 67 |       "cell_type": "code",
 68 |       "metadata": {
 69 |         "id": "guFJsTigq0bS",
 70 |         "colab_type": "code",
 71 |         "colab": {}
 72 |       },
 73 |       "source": [
 74 |         "def build_optimizer(args, params):\n",
 75 |         "    weight_decay = args.weight_decay\n",
 76 |         "    filter_fn = filter(lambda p : p.requires_grad, params)\n",
 77 |         "    if args.opt == 'adam':\n",
 78 |         "        optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)\n",
 79 |         "    elif args.opt == 'sgd':\n",
 80 |         "        optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)\n",
 81 |         "    elif args.opt == 'rmsprop':\n",
 82 |         "        optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)\n",
 83 |         "    elif args.opt == 'adagrad':\n",
 84 |         "        optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)\n",
 85 |         "    if args.opt_scheduler == 'none':\n",
 86 |         "        return None, optimizer\n",
 87 |         "    elif args.opt_scheduler == 'step':\n",
 88 |         "        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)\n",
 89 |         "    elif args.opt_scheduler == 'cos':\n",
 90 |         "        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)\n",
 91 |         "    return scheduler, optimizer"
 92 |       ],
 93 |       "execution_count": 0,
 94 |       "outputs": []
 95 |     },
 96 |     {
 97 |       "cell_type": "code",
 98 |       "metadata": {
 99 |         "id": "qzQL0UJMqva1",
100 |         "colab_type": "code",
101 |         "colab": {}
102 |       },
103 |       "source": [
104 |         "import torch\n",
105 |         "import torch.nn as nn\n",
106 |         "import torch.nn.functional as F\n",
107 |         "\n",
108 |         "import torch_geometric.nn as pyg_nn\n",
109 |         "import torch_geometric.utils as pyg_utils\n",
110 |         "\n",
111 |         "class GNNStack(torch.nn.Module):\n",
112 |         "    def __init__(self, input_dim, hidden_dim, output_dim, args, task='node'):\n",
113 |         "        super(GNNStack, self).__init__()\n",
114 |         "        conv_model = self.build_conv_model(args.model_type)\n",
115 |         "        self.convs = nn.ModuleList()\n",
116 |         "        self.convs.append(conv_model(input_dim, hidden_dim))\n",
117 |         "        assert (args.num_layers >= 1), 'Number of layers is not >=1'\n",
118 |         "\n",
119 |         "        for l in range(args.num_layers-1):\n",
120 |         "            self.convs.append(conv_model(hidden_dim, hidden_dim))\n",
121 |         "\n",
122 |         "        # post-message-passing\n",
123 |         "        self.post_mp = nn.Sequential(\n",
124 |         "            nn.Linear(hidden_dim, hidden_dim), nn.Dropout(args.dropout), \n",
125 |         "            nn.Linear(hidden_dim, output_dim))\n",
126 |         "\n",
127 |         "        self.task = task\n",
128 |         "        if not (self.task == 'node' or self.task == 'graph'):\n",
129 |         "            raise RuntimeError('Unknown task.')\n",
130 |         "\n",
131 |         "        self.dropout = args.dropout\n",
132 |         "        self.num_layers = args.num_layers\n",
133 |         "\n",
134 |         "    def build_conv_model(self, model_type):\n",
135 |         "        if model_type == 'GCN':\n",
136 |         "            return pyg_nn.GCNConv\n",
137 |         "        elif model_type == 'GraphSage':\n",
138 |         "            return GraphSage\n",
139 |         "        elif model_type == 'GAT':\n",
140 |         "            # When applying GAT with num heads > 1, one needs to modify the \n",
141 |         "            # input and output dimension of the conv layers (self.convs),\n",
142 |         "            # to ensure that the input dim of the next layer is num heads\n",
143 |         "            # multiplied by the output dim of the previous layer.\n",
144 |         "            # HINT: In case you want to play with multiheads, you need to change the for-loop when builds up self.convs to be\n",
145 |         "            # self.convs.append(conv_model(hidden_dim * num_heads, hidden_dim)), \n",
146 |         "            # and also the first nn.Linear(hidden_dim * num_heads, hidden_dim) in post-message-passing.\n",
147 |         "            return GAT\n",
148 |         "\n",
149 |         "    def forward(self, data):\n",
150 |         "        x, edge_index = data.x, data.edge_index\n",
151 |         "        batch = len(data)\n",
152 |         "\n",
153 |         "        ############################################################################\n",
154 |         "        # TODO: Your code here! \n",
155 |         "        # Each layer in GNN should consist of a convolution (specified in model_type),\n",
156 |         "        # a non-linearity (use RELU), and dropout. \n",
157 |         "        # HINT: the __init__ function contains parameters you will need. For whole\n",
158 |         "        # graph classification (as specified in self.task) apply max pooling over\n",
159 |         "        # all of the nodes with pyg_nn.global_max_pool as the final layer.\n",
160 |         "        # Our implementation is ~6 lines, but don't worry if you deviate from this.\n",
161 |         "\n",
162 |         "        \n",
163 |         "        for i in range(self.num_layers):\n",
164 |         "            x = self.convs[i](x, edge_index)\n",
165 |         "            x = F.relu(x)\n",
166 |         "            x = F.dropout(x, p=self.dropout, training = self.training)\n",
167 |         "\n",
168 |         "        # pools\n",
169 |         "        if self.task == 'graph':\n",
170 |         "            x = pyg_nn.global_mean_pool(x, batch)\n",
171 |         "\n",
172 |         "        ############################################################################\n",
173 |         "\n",
174 |         "        x = self.post_mp(x)\n",
175 |         "\n",
176 |         "        return F.log_softmax(x, dim=1)\n",
177 |         "\n",
178 |         "    def loss(self, pred, label):\n",
179 |         "        return F.nll_loss(pred, label)\n",
180 |         "\n",
181 |         "\n",
182 |         "class GraphSage(pyg_nn.MessagePassing):\n",
183 |         "    \"\"\"Non-minibatch version of GraphSage.\"\"\"\n",
184 |         "    def __init__(self, in_channels, out_channels, reducer='mean', \n",
185 |         "                 normalize_embedding=True):\n",
186 |         "        super(GraphSage, self).__init__(aggr='mean')\n",
187 |         "\n",
188 |         "        ############################################################################\n",
189 |         "        # TODO: Your code here! \n",
190 |         "        # Define the layers needed for the message and update functions below.\n",
191 |         "        # self.lin is the linear transformation that you apply to each neighbor before aggregating them\n",
192 |         "        # self.agg_lin is the linear transformation you apply to the concatenated self embedding (skip connection) and mean aggregated neighbors\n",
193 |         "        # Our implementation is ~2 lines, but don't worry if you deviate from this.\n",
194 |         "\n",
195 |         "        self.agg_lin = nn.Linear(in_channels + out_channels, out_channels, bias = False) # TODO\n",
196 |         "        self.lin = nn.Linear(in_channels, out_channels) # TODO\n",
197 |         "\n",
198 |         "        ############################################################################\n",
199 |         "\n",
200 |         "        if normalize_embedding:\n",
201 |         "            self.normalize_emb = True\n",
202 |         "\n",
203 |         "    def forward(self, x, edge_index):\n",
204 |         "        num_nodes = x.size(0)\n",
205 |         "        # x has shape [N, in_channels]\n",
206 |         "        # edge_index has shape [2, E]\n",
207 |         "\n",
208 |         "        return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)\n",
209 |         "\n",
210 |         "    def message(self, x_j, edge_index, size):\n",
211 |         "        # x_j has shape [E, in_channels]\n",
212 |         "        # edge_index has shape [2, E]\n",
213 |         "        \n",
214 |         "        ############################################################################\n",
215 |         "        # TODO: Your code here! \n",
216 |         "        # Given x_j, perform the aggregation of a dense layer followed by a RELU non-linearity.\n",
217 |         "        # Notice that the aggregator operation will be done in self.propagate. \n",
218 |         "        # HINT: It may be useful to read the pyg_nn implementation of GCNConv,\n",
219 |         "        # https://pytorch-geometric.readthedocs.io/en/latest/notes/create_gnn.html\n",
220 |         "        # Our implementation is ~1 line, but don't worry if you deviate from this.\n",
221 |         "\n",
222 |         "        x_j = self.lin(x_j) # TODO\n",
223 |         "        x_j = F.relu(x_j)\n",
224 |         "\n",
225 |         "\n",
226 |         "        ############################################################################\n",
227 |         "\n",
228 |         "        return x_j\n",
229 |         "\n",
230 |         "    def update(self, aggr_out, x):\n",
231 |         "        # aggr_out has shape [N, out_channels]\n",
232 |         "        # x has shape [N, in_channels]\n",
233 |         "        \n",
234 |         "        ############################################################################\n",
235 |         "        # TODO: Your code here! Perform the update step here. \n",
236 |         "        # Perform a MLP with skip-connection, that is a concatenation followed by \n",
237 |         "        # a linear layer and a RELU non-linearity.\n",
238 |         "        # Finally, remember to normalize as vector as shown in GraphSage algorithm.\n",
239 |         "        # Our implementation is ~4 lines, but don't worry if you deviate from this.\n",
240 |         "        \n",
241 |         "        if self.normalize_emb:\n",
242 |         "            aggr_out = torch.cat((aggr_out, x), 1)\n",
243 |         "            aggr_out = self.agg_lin(aggr_out)\n",
244 |         "            aggr_out = F.relu(aggr_out)\n",
245 |         "            aggr_out = F.normalize(aggr_out) # TODO\n",
246 |         "\n",
247 |         "        ############################################################################\n",
248 |         "\n",
249 |         "        return aggr_out\n",
250 |         "\n",
251 |         "\n",
252 |         "class GAT(pyg_nn.MessagePassing):\n",
253 |         "    # Please run code with num_heads=1. \n",
254 |         "    def __init__(self, in_channels, out_channels, num_heads=1, concat=True,\n",
255 |         "                 dropout=0, bias=True, **kwargs):\n",
256 |         "        super(GAT, self).__init__(aggr='add', **kwargs)\n",
257 |         "\n",
258 |         "        self.in_channels = in_channels\n",
259 |         "        self.out_channels = out_channels\n",
260 |         "        self.heads = num_heads\n",
261 |         "        self.concat = concat \n",
262 |         "        self.dropout = dropout\n",
263 |         "\n",
264 |         "        ############################################################################\n",
265 |         "        #  TODO: Your code here!\n",
266 |         "        # Use nn.Linear the layers needed for the forward function. \n",
267 |         "        # Remember that the shape of the output depends on the number of heads and out_channels.\n",
268 |         "        # Our implementation is ~1 line, but don't worry if you deviate from this.\n",
269 |         "\n",
270 |         "        self.lin = nn.Linear(in_channels, self.heads*out_channels) # TODO\n",
271 |         "\n",
272 |         "        ############################################################################\n",
273 |         "\n",
274 |         "        ############################################################################\n",
275 |         "        #  TODO: Your code here!\n",
276 |         "        # The attention mechanism is a single feed-forward neural network parametrized\n",
277 |         "        # by weight vector self.att. Define self.att using nn.Parameter needed for the attention\n",
278 |         "        # mechanism here. Remember to consider number of heads and out_channels for dimension!\n",
279 |         "        # Also remember that that the attention mechanism is applied to the concatenation\n",
280 |         "        # of node feaures of two nodes for dimension.\n",
281 |         "        # Our implementation is ~1 line, but don't worry if you deviate from this.\n",
282 |         "\n",
283 |         "        self.att = nn.Parameter(torch.Tensor(2*out_channels, 1))\n",
284 |         "\n",
285 |         "        ############################################################################\n",
286 |         "\n",
287 |         "        if bias and concat:\n",
288 |         "            self.bias = nn.Parameter(torch.Tensor(self.heads * out_channels))\n",
289 |         "        elif bias and not concat:\n",
290 |         "            self.bias = nn.Parameter(torch.Tensor(out_channels))\n",
291 |         "        else:\n",
292 |         "            self.register_parameter('bias', None)\n",
293 |         "\n",
294 |         "        nn.init.xavier_uniform_(self.att)\n",
295 |         "        nn.init.zeros_(self.bias)\n",
296 |         "\n",
297 |         "        ############################################################################\n",
298 |         "\n",
299 |         "    def forward(self, x, edge_index, size=None):\n",
300 |         "        # x has shape [N, in_channels]\n",
301 |         "        # edge_index has shape [2, E]\n",
302 |         "        \n",
303 |         "        ############################################################################\n",
304 |         "        #  TODO: Your code here!\n",
305 |         "        # Apply your linear transformation to the node feature matrix x before starting\n",
306 |         "        # to propagate messages.\n",
307 |         "        # Our implementation is ~1 line, but don't worry if you deviate from this.\n",
308 |         "        \n",
309 |         "        x = self.lin(x) # TODO\n",
310 |         "        ############################################################################\n",
311 |         "\n",
312 |         "        # Start propagating messages.\n",
313 |         "        return self.propagate(edge_index, size=size, x=x)\n",
314 |         "\n",
315 |         "    def message(self, edge_index_i, x_i, x_j, size_i):\n",
316 |         "        # Constructs messages to node i for each edge (j, i).\n",
317 |         "        # edge_index_i has shape [E]\n",
318 |         "        \n",
319 |         "        ############################################################################\n",
320 |         "        #  TODO: Your code here! Compute the attention coefficients alpha as described\n",
321 |         "        # in equation (7). Remember to be careful of the number of heads with dimension!\n",
322 |         "        # HINT: torch_geometric.utils.softmax may help to calculate softmax for neighbors of i. \n",
323 |         "        # https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.softmax\n",
324 |         "        # Our implementation is ~5 lines, but don't worry if you deviate from this.\n",
325 |         "        \n",
326 |         "        x_i = x_i.view(-1, self.heads, self.out_channels)\n",
327 |         "        x_j = x_j.view(-1, self.heads, self.out_channels)\n",
328 |         "        \n",
329 |         "        e_ij = torch.cat([x_i, x_j], dim = 2)\n",
330 |         "        e_ij = torch.einsum(\"abc,cd->abd\", (e_ij, self.att))\n",
331 |         "        \n",
332 |         "        m = nn.LeakyReLU(0.2)\n",
333 |         "        e_ij = m(e_ij)\n",
334 |         "\n",
335 |         "        alpha = pyg_utils.softmax(e_ij, edge_index_i) # TODO\n",
336 |         "\n",
337 |         "        ############################################################################\n",
338 |         "\n",
339 |         "        alpha = F.dropout(alpha, p=self.dropout, training=self.training)\n",
340 |         "\n",
341 |         "        return x_j * alpha.view(-1, self.heads, 1)\n",
342 |         "        \n",
343 |         "    def update(self, aggr_out):\n",
344 |         "        # Updates node embedings.\n",
345 |         "        if self.concat is True:\n",
346 |         "            aggr_out = aggr_out.view(-1, self.heads * self.out_channels)\n",
347 |         "        else:\n",
348 |         "            aggr_out = aggr_out.mean(dim=1)\n",
349 |         "\n",
350 |         "        if self.bias is not None:\n",
351 |         "            aggr_out = aggr_out + self.bias\n",
352 |         "        return aggr_out\n"
353 |       ],
354 |       "execution_count": 0,
355 |       "outputs": []
356 |     },
357 |     {
358 |       "cell_type": "code",
359 |       "metadata": {
360 |         "id": "ucAnFus2hqfK",
361 |         "colab_type": "code",
362 |         "colab": {}
363 |       },
364 |       "source": [
365 |         "from google.colab import drive\n",
366 |         "drive.mount('/content/drive')"
367 |       ],
368 |       "execution_count": 0,
369 |       "outputs": []
370 |     },
371 |     {
372 |       "cell_type": "code",
373 |       "metadata": {
374 |         "id": "l0vXQNT5K9ks",
375 |         "colab_type": "code",
376 |         "colab": {}
377 |       },
378 |       "source": [
379 |         "def data_obj():\n",
380 |         "  edges = open(\"edges-100k.txt\", 'r').readlines()\n",
381 |         "  labels = open(\"labels_final.txt\", 'r').readlines()\n",
382 |         "  source_nodes = []\n",
383 |         "  target_nodes = []\n",
384 |         "\n",
385 |         "  for line in edges:\n",
386 |         "    x = line.split()\n",
387 |         "    source_nodes.append(int(x[0]))\n",
388 |         "    target_nodes.append(int(x[1]))\n",
389 |         "\n",
390 |         "  labels = [int(line) for line in labels]\n",
391 |         "\n",
392 |         "  features = [[1]*NUM_FEATURES for i in range(len(labels))]\n",
393 |         "  x = torch.tensor(features, dtype=torch.float)\n",
394 |         "\n",
395 |         "  y = torch.LongTensor(labels) #dtype=torch.long\n",
396 |         "\n",
397 |         "  edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.long)\n",
398 |         "\n",
399 |         "  data = Data(x=x, edge_index=edge_index, y=y, batch=torch.tensor([i for i in range(len(labels))])) # num_classes = NUM_LABELS\n",
400 |         "  return data"
401 |       ],
402 |       "execution_count": 0,
403 |       "outputs": []
404 |     },
405 |     {
406 |       "cell_type": "code",
407 |       "metadata": {
408 |         "id": "ogDp4jyLqfd2",
409 |         "colab_type": "code",
410 |         "colab": {}
411 |       },
412 |       "source": [
413 |         "import time\n",
414 |         "\n",
415 |         "import networkx as nx\n",
416 |         "import numpy as np\n",
417 |         "import torch\n",
418 |         "import torch.optim as optim\n",
419 |         "\n",
420 |         "from torch_geometric.datasets import Planetoid\n",
421 |         "from torch_geometric.data import DataLoader\n",
422 |         "\n",
423 |         "import torch_geometric.nn as pyg_nn\n",
424 |         "\n",
425 |         "NUM_FEATURES = 1433\n",
426 |         "NUM_LABELS = 21\n",
427 |         "\n",
428 |         "GCN_acc = []\n",
429 |         "GraphSage_acc = []\n",
430 |         "GAT_acc = []\n",
431 |         "\n",
432 |         "def train(dataset, task, args):\n",
433 |         "\n",
434 |         "    # build model\n",
435 |         "    model = GNNStack(NUM_FEATURES, args.hidden_dim, NUM_LABELS, args, task=task)\n",
436 |         "    scheduler, opt = build_optimizer(args, model.parameters())\n",
437 |         "\n",
438 |         "    # train\n",
439 |         "    for epoch in range(args.epochs):\n",
440 |         "        total_loss = 0\n",
441 |         "        model.train()\n",
442 |         "        opt.zero_grad()\n",
443 |         "        pred = model(dataset)\n",
444 |         "        label = dataset.y\n",
445 |         "\n",
446 |         "        loss = model.loss(pred, label)\n",
447 |         "        loss.backward()\n",
448 |         "        opt.step()\n",
449 |         "        total_loss += loss.item()\n",
450 |         "        total_loss /= len(dataset)\n",
451 |         "\n",
452 |         "        if epoch % 5 == 0:\n",
453 |         "            test_acc = test(dataset, model, args)\n",
454 |         "            # print(test_acc,   '  test')\n",
455 |         "\n",
456 |         "def test(test_dataset, model, args):\n",
457 |         "    model.eval()\n",
458 |         "\n",
459 |         "    correct = 0\n",
460 |         "    with torch.no_grad():\n",
461 |         "        # max(dim=1) returns values, indices tuple; only need indices\n",
462 |         "        pred = model(test_dataset).max(dim=1)[1]\n",
463 |         "        label = test_dataset.y\n",
464 |         "\n",
465 |         "    if args.model_type == 'GCN':  \n",
466 |         "        f = open(\"GCN_pred.txt\", 'w')\n",
467 |         "        f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n",
468 |         "        f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n",
469 |         "        f.close()\n",
470 |         "    elif args.model_type == 'GraphSage':\n",
471 |         "        f = open(\"GraphSage_pred.txt\", 'w')\n",
472 |         "        f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n",
473 |         "        f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n",
474 |         "        f.close()\n",
475 |         "    elif args.model_type == 'GAT':\n",
476 |         "        f = open(\"GAT_pred.txt\", 'w')\n",
477 |         "        f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n",
478 |         "        f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n",
479 |         "        f.close()\n",
480 |         "\n",
481 |         "    correct += pred.eq(label).sum().item()\n",
482 |         "    total = len(label)\n",
483 |         "    return correct / total\n",
484 |         "\n",
485 |         "class objectview(object):\n",
486 |         "    def __init__(self, d):\n",
487 |         "        self.__dict__ = d\n",
488 |         "\n",
489 |         "def main():\n",
490 |         "  for args in [\n",
491 |         "      {'model_type': 'GCN', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n",
492 |         "      {'model_type': 'GraphSage', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n",
493 |         "      {'model_type': 'GAT', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n",
494 |         "  ]:\n",
495 |         "    args = objectview(args)\n",
496 |         "    task = 'node'\n",
497 |         "    dataset = data_obj()\n",
498 |         "    train(dataset, task, args)\n",
499 |         "\n",
500 |         "if __name__ == '__main__':\n",
501 |         "    main()\n"
502 |       ],
503 |       "execution_count": 0,
504 |       "outputs": []
505 |     },
506 |     {
507 |       "cell_type": "code",
508 |       "metadata": {
509 |         "id": "kAvQJiLukFMk",
510 |         "colab_type": "code",
511 |         "outputId": "903aadd6-5d91-4d0f-9d27-e0fa880df361",
512 |         "colab": {
513 |           "base_uri": "https://localhost:8080/",
514 |           "height": 265
515 |         }
516 |       },
517 |       "source": [
518 |         "from matplotlib import pyplot as plt\n",
519 |         "\n",
520 |         "plt.plot(GCN_acc ,label = \"GCN_acc\")\n",
521 |         "plt.plot(GraphSage_acc ,label = \"GraphSage_acc\")\n",
522 |         "plt.plot(GAT_acc ,label = \"GAT_acc\")\n",
523 |         "plt.legend()\n",
524 |         "#plt.show()\n",
525 |         "plt.savefig(\"output.png\")"
526 |       ],
527 |       "execution_count": 0,
528 |       "outputs": [
529 |         {
530 |           "output_type": "display_data",
531 |           "data": {
532 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3de5xVdb3/8ddn7op4QajjjwEHkwd3\nBmTATKNEKUwDTwePiv0O8dD4QUIWx6OoxVGLTlS/OtWDn0GlWCcEtSwohBRRj5bJoMjNSC6jDnFg\nBGUGmRnm8vn9sddsFuNc9gwDe+D7fj4e+zF7fddlf/dS1nuv68fcHRERCU9GujsgIiLpoQAQEQmU\nAkBEJFAKABGRQCkAREQClZXuDrRF9+7dvaCgIN3dEBE5qaxbt+4dd+/RuP2kCoCCggKKi4vT3Q0R\nkZOKmb3ZVLsOAYmIBEoBICISKAWAiEigFAAiIoFSAIiIBEoBICISKAWAiEigTqr7ADrSocO1vLnv\nECXvvA9bV3DOe5ubnK4e59mMt6ng8AnuoYjIEV8c+x9c0GtQhy4ziAB4cds7bNx1gJJ33mfnO+9T\nsu999pRXJ8cX595Ldyun3u0D8z7d5TQe+XB3AEy1E0QkTa7e96YCoD0efGEnq/+6l3O75FDQvQuX\nXdiDPt1Pp6B7Fwq6nc65D1bCx75KxpX3HjVfvdfzwLJ/oqC+lt9O+C2ZGZnp6L6IyHERRADM/cch\n/CA3kzPzsj84sqYK6msgt+sHRj315lNse28b3/74t7XxF5FTThAB8A9n5TU/sro88Tf3zKOa6+rr\neGD9A1xw1gWMKxh3HHsnIpIeugqouiLxt9EewB/f/CPbD2xneuF0/foXkVOSAqCJPYC6+joeeO0B\nLjz7Qj5V8Kk0dUxE5PhSADSxB7CyZCU7D+xkWuE0MkyrSEROTdq6NQqAuvo6fvLaT+h7Tl/Gnj82\njR0TETm+FABVDYeAEgGwYucKSspL+FLhl/TrX0ROadrCJfcAzqS2vpafvPYT+p3TjzG9x6S3XyIi\nx5kCoOEkcN6Z/GHHH3ir4i2mD5uuX/8icsrTVq66AjJzqM3IZMGGBQzoNoAxvfTrX0ROfSkFgJmN\nM7OtZrbNzGY3MX6amW00s/Vm9oKZDYzax5rZumjcOjMbE5vn2WiZ66PXhzrua7VBdTnkdmX59uW8\nXfE2Xxr2Jcw++EwgEZFTTat3AptZJjAfGAuUAmvNbJm7b4lNttjdfxJNPx74PjAOeAf4rLv/3cwG\nA6uAnrH5bnL34o75Ku1UXUFN7hks2LCAQecO4hP5n0hrd0RETpRU9gBGAdvcfYe7HwaWABPiE7h7\neWywC+BR+6vu/veofTNwmpnlHnu3O1B1BctOz2PXwV369S8iQUklAHoCb8eGSzn6VzwAZnarmW0H\nvgN8uYnl/BPwirtXx9oeig7/fN2a2fKa2VQzKzaz4rKyshS620bVFfwyu5bB5w7m4z0/3vHLFxHp\npDrsJLC7z3f3jwB3Al+LjzOzQcA84P/Emm9y9yHAx6PX/25muQvdvcjdi3r06NFR3T2iupw9Vsew\nDw3Tr38RCUoqAbAL6BUbzo/amrMEuLZhwMzygSeAf3H37Q3t7r4r+lsBLCZxqOmE86oDVFLPaVmn\npePjRUTSJpUAWAv0NbM+ZpYD3AAsi09gZn1jg1cDb0TtZwN/AGa7+4ux6bPMrHv0Phu4Bth0LF+k\nvWqqD1IHCgARCU6rVwG5e62ZzSBxBU8m8KC7bzaz+4Fid18GzDCzK4Ea4F1gcjT7DOBCYI6ZzYna\nPgW8D6yKNv6ZwNPATzvwe6XGncqag8AZCgARCU5KBWHcfQWwolHbnNj725qZ75vAN5tZ7IgU+3j8\n1FZT6XWA9gBEJDxh3wlcXc6hjMSJXwWAiIQm8ACooDK68icvq4WykSIip6DAA6CcyozEKtAegIiE\nJvAAOLIHoAAQkdAoABQAIhKosAOg6sghoNOzTk9zZ0RETqywAyC+B5CtPQARCUvgAVBOpS4DFZFA\nBR4AFVRmZgOQl6nLQEUkLIEHQDmVWTnkZuaSmZGZ7t6IiJxQgQdABZVZuTr8IyJBUgBkZikARCRI\nCgAFgIgEKvAAKKcyI1MBICJBCjsAqsqpNFMAiEiQwg6A6goqMxQAIhKmlALAzMaZ2VYz22Zms5sY\nP83MNprZejN7wcwGxsbdFc231cw+neoyjzv3RADgCgARCVKrAWBmmcB84CpgIHBjfAMfWezuQ9x9\nGPAd4PvRvANJ1BAeBIwD/p+ZZaa4zOOrthrqa6h0FYQXkTClsgcwCtjm7jvc/TCwBJgQn8Ddy2OD\nXQCP3k8Alrh7tbvvBLZFy2t1mcdddaLLldQpAEQkSKnUBO4JvB0bLgUubjyRmd0KzAJygDGxeV9q\nNG/P6H2ryzyuqisAqKyv0YPgRCRIHXYS2N3nu/tHgDuBr3XUcs1sqpkVm1lxWVlZRy0WqsupAw67\n9gBEJEypBMAuoFdsOD9qa84S4NpW5k15me6+0N2L3L2oR48eKXQ3RbFHQasWgIiEKJUAWAv0NbM+\nZpZD4qTusvgEZtY3Nng18Eb0fhlwg5nlmlkfoC/wcirLPO6iS0BBj4IWkTC1eg7A3WvNbAawCsgE\nHnT3zWZ2P1Ds7suAGWZ2JVADvAtMjubdbGaPAluAWuBWd68DaGqZHf/1WlBVTqWpILyIhCuVk8C4\n+wpgRaO2ObH3t7Uw71xgbirLPKG0ByAigQv3TuDqchWEF5GgBRwAFRzKygEUACISpoADoJzKnMTV\nP3lZKgcpIuEJOAAqqIxuANMegIiEKPAASPzyVwCISIjCDoCsXEABICJhCjgAyqmMTgLrTmARCVG4\nAVBVTmVWFlmWRXZmdrp7IyJywoUbANUVVGaoILyIhCulO4FPOQ3VwDIyOM0UACISpjADoKEamOkE\nsIiEK8xDQA3VwBQAIhKwQAMgqgamgvAiErBAAyDaA1BBeBEJWKABEO0BeK0CQESCFXYA1NcoAEQk\nWGEGQFV0CEgBICIBSykAzGycmW01s21mNruJ8bPMbIuZbTCz1WZ2ftR+uZmtj72qzOzaaNwiM9sZ\nGzesY79aCxr2AOqqFQAiEqxW7wMws0xgPjAWKAXWmtkyd98Sm+xVoMjdD5nZdOA7wPXuvgYYFi2n\nG7AN+GNsvn9z98c75qu0QXU5DlQpAEQkYKnsAYwCtrn7Dnc/DCwBJsQncPc17n4oGnwJyG9iOROB\nJ2PTpU91BVVZObguAxWRgKUSAD2Bt2PDpVFbc24Gnmyi/QbgkUZtc6PDRj8ws9ymFmZmU82s2MyK\ny8rKUuhuCqrLqcw9E9CNYCISrg49CWxmnweKgO82aj8PGAKsijXfBfQHRgLdgDubWqa7L3T3Incv\n6tGjR8d0tLqCyrwzAAWAiIQrlQDYBfSKDedHbUcxsyuBe4Dx7l7daPQ/A0+4e01Dg7vv9oRq4CES\nh5pOjOoKKnO6AHBatgJARMKUSgCsBfqaWR8zyyFxKGdZfAIzGw4sILHx39vEMm6k0eGfaK8AMzPg\nWmBT27vfTtUVyYLwKgYjIqFq9Sogd681sxkkDt9kAg+6+2Yzux8odvdlJA75nAE8ltie85a7jwcw\nswISexDPNVr0r8ysB2DAemBah3yjVFSXU9n1HKjVISARCVdKj4N29xXAikZtc2Lvr2xh3hKaOGns\n7mNS7mVHqyqnstt5CgARCVqYdwJXVyTrASsARCRU4QVAQzWwqA6wAkBEQhVeAETVwA5lJo5+KQBE\nJFThBUBDLYCMTADysvLS2RsRkbQJMACiB8FlZGAYeZkKABEJU4ABcKQecF5WHtFlqyIiwQkwABrq\nAev4v4iELdwAMNUDFpGwhRcADdXAvE4BICJBCy8AkvWA6/QcIBEJWoAB0FAP+LD2AEQkaAEGQAVk\n5qoesIgEL8AAKIfcrlTWVioARCRoAQZAxZEAUDEYEQlY2AGgPQARCViQAeAKABGR1ALAzMaZ2VYz\n22Zms5sYP8vMtpjZBjNbbWbnx8bVmdn66LUs1t7HzP4SLXNpVG7y+Ksupya3K3W6D0BEAtdqAJhZ\nJjAfuAoYCNxoZgMbTfYqUOTuQ4HHge/ExlW6+7DoNT7WPg/4gbtfCLwL3HwM3yN1VeVU5kYF4RUA\nIhKwVPYARgHb3H2Hux8GlgAT4hO4+xp3PxQNvgTkt7TAqBD8GBJhAfAwicLwx191BZXRyV8FgIiE\nLJUA6Am8HRsupYkavzE3A0/GhvPMrNjMXjKzho38ucB77l7b2jLNbGo0f3FZWVkK3W1BQzWw7MQj\noBUAIhKylIrCp8rMPg8UAZ+INZ/v7rvM7ALgGTPbCBxIdZnuvhBYCFBUVOTH1MGoGlhldi6gABCR\nsKWyB7AL6BUbzo/ajmJmVwL3AOPdvbqh3d13RX93AM8Cw4F9wNlm1hBATS6zwzU8BkIF4UVEUgqA\ntUDf6KqdHOAGYFl8AjMbDiwgsfHfG2s/x8xyo/fdgUuBLe7uwBpgYjTpZOB3x/plWtXwILhMBYCI\nSKsBEB2nnwGsAl4HHnX3zWZ2v5k1XNXzXeAM4LFGl3sOAIrN7DUSG/xvu/uWaNydwCwz20binMDP\nO+xbNadhDyAzUQ9YASAiIUvpHIC7rwBWNGqbE3t/ZTPz/QkY0sy4HSSuMDpxkvWAEwGgx0GLSMjC\nuhM4GQCJOsB6FpCIhCysAKg6UhAedAhIRMIWVgDECsID5GXmpa8vIiJpFlgAHKkHnJuZS2Z0LkBE\nJESBBUCiGtih+sPkZenXv4iELbAAUDUwEZEGgQWAisGIiDRQAIiIBCq8AMg7SwEgIkJwAaBzACIi\nDcIKgCoFgIhIg7ACQOcARESSwgmAqBoYuWcqAERECCkAompg5HalsqZSTwIVkeCFEwDRYyDqcrpw\nuP6w9gBEJHgBBUD0ILjoEdAKABEJXUABoHrAIiJxKQWAmY0zs61mts3MZjcxfpaZbTGzDWa22szO\nj9qHmdmfzWxzNO762DyLzGxnVEJyvZkN67iv1YTkHkAUACoGIyKBazUAzCwTmA9cBQwEbjSzgY0m\nexUocvehwOPAd6L2Q8C/uPsgYBzwn2Z2dmy+f3P3YdFr/TF+l5YlC8InqmBqD0BEQpfKHsAoYJu7\n73D3w8ASYEJ8Andf4+6HosGXgPyo/W/u/kb0/u/AXqBHR3W+TRqqgWWoILyICKQWAD2Bt2PDpVFb\nc24GnmzcaGajgBxge6x5bnRo6AdmltvUwsxsqpkVm1lxWVlZCt1tRrIesPYARESgg08Cm9nngSLg\nu43azwN+CUxx9/qo+S6gPzAS6Abc2dQy3X2huxe5e1GPHsew81CtesAiInGpBMAuoFdsOD9qO4qZ\nXQncA4x39+pY+5nAH4B73P2lhnZ33+0J1cBDJA41HT9RNbBKrwUUACIiqQTAWqCvmfUxsxzgBmBZ\nfAIzGw4sILHx3xtrzwGeAH7h7o83mue86K8B1wKbjuWLtCr2JFBQAIiIZLU2gbvXmtkMYBWQCTzo\n7pvN7H6g2N2XkTjkcwbwWGJ7zlvuPh74Z2A0cK6ZfSFa5BeiK35+ZWY9AAPWA9M69qs1EnsQHCgA\nRERaDQAAd18BrGjUNif2/spm5vsv4L+aGTcm9W52gOoKyDszGQB6FpCIhC6gO4GPPAk0y7LIzsxO\nd49ERNIqoABQMRgRkbhwAkDVwEREjhJOADScBK6p1HOAREQIJQBUDUxE5APCCIB4NbDaSvIy89Ld\nIxGRtAsjAKLHQOgcgIjIEYEEQOJBcOSeyaHaQwoAERGCCYBoDyC6EUwngUVEggmAhj0AHQISEWmg\nABARCVQYARBVA/OcM6iqrVIAiIgQSgBEewBVWXk4rgAQESGYAIiqgWWqHrCISINAAiCqBkYdoEdB\ni4hAMAFQnnwOEGgPQEQEUgwAMxtnZlvNbJuZzW5i/Cwz22JmG8xstZmdHxs32czeiF6TY+0jzGxj\ntMwfRaUhjw9VAxMR+YBWA8DMMoH5wFXAQOBGMxvYaLJXgSJ3Hwo8Dnwnmrcb8O/AxSSKvv+7mZ0T\nzfMA8EWgb/Qad8zfpjmNqoEpAEREUtsDGAVsc/cd7n4YWAJMiE/g7mvc/VA0+BKQH73/NPCUu+93\n93eBp4BxUUH4M939JXd34BckCsMfHyOmwKW3KQBERGJSCYCewNux4dKorTk3A0+2Mm/P6H2ryzSz\nqWZWbGbFZWVlKXS3Cf3GweB/UgCIiMR06ElgM/s8UAR8t6OW6e4L3b3I3Yt69OhxTMtKBoCeBSQi\nklIA7AJ6xYbzo7ajmNmVwD3AeHevbmXeXRw5TNTsMjua9gBERI5IJQDWAn3NrI+Z5QA3AMviE5jZ\ncGABiY3/3tioVcCnzOyc6OTvp4BV7r4bKDezj0ZX//wL8LsO+D4tUgCIiByR1doE7l5rZjNIbMwz\ngQfdfbOZ3Q8Uu/syEod8zgAei67mfMvdx7v7fjP7BokQAbjf3fdH778ELAJOI3HO4EmOs8raSgxT\nRTAREVIIAAB3XwGsaNQ2J/b+yhbmfRB4sIn2YmBwyj3tAJW1leRl5XE8bzkQETlZhHEncESPghYR\nOUIBICISKAWAiEigggsAPQlURCQhpZPApwrtAYg0r6amhtLSUqqqqtLdFWmnvLw88vPzyc7OTmn6\n4ALgrNPPSnc3RDql0tJSunbtSkFBga6UOwm5O/v27aO0tJQ+ffqkNE9wh4DysnQPgEhTqqqqOPfc\nc7XxP0mZGeeee26b9uDCCoAaHQISaYk2/ie3tv73CysAdA5ARCRJASAiEqhgAqCmroZar1UAiHRy\ne/bsYdKkSVxwwQWMGDGCSy65hCeeeAKAl19+mdGjR9OvXz+GDx/OLbfcwqFDh1i0aBEZGRls2LAh\nuZzBgwdTUlKSpm9xcggmAA7VJgqWKQBEOi9359prr2X06NHs2LGDdevWsWTJEkpLS9mzZw/XXXcd\n8+bNY+vWrbz66quMGzeOiooKAPLz85k7d26av8HJJZjLQFUMRiR19y3fzJa/l3foMgf+rzP5988O\nanGaZ555hpycHKZNm5ZsO//885k5cyZz5sxh8uTJXHLJJclxEydOTL6/5ppreP7559m6dSv9+vVr\ntT/Tp09n7dq1VFZWMnHiRO677z4A1q5dy2233cb7779Pbm4uq1ev5vTTT+fOO+9k5cqVZGRk8MUv\nfpGZM2e2dRV0OuEFgPYARDqtzZs3c9FFFzU5btOmTUyePLnZeTMyMrjjjjv41re+xcMPP9zqZ82d\nO5du3bpRV1fHFVdcwYYNG+jfvz/XX389S5cuZeTIkZSXl3PaaaexcOFCSkpKWL9+PVlZWezfv7/V\n5Z8MFAAi8gGt/VI/UW699VZeeOEFcnJy6NWrV6vTT5o0iblz57Jz585Wp3300UdZuHAhtbW17N69\nmy1btmBmnHfeeYwcORKAM888E4Cnn36aadOmkZWV2GR269btGL5V5xHMOQAFgEjnN2jQIF555ZXk\n8Pz581m9ejVlZWUMGjSIdevWtTh/VlYW//qv/8q8efNanG7nzp1873vfY/Xq1WzYsIGrr746yEdg\npBQAZjbOzLaa2TYzm93E+NFm9oqZ1ZrZxFj75Wa2PvaqMrNro3GLzGxnbNywjvtaH9QQAHoYnEjn\nNWbMGKqqqnjggQeSbYcOJS7gmDFjBg8//DB/+ctfkuN+85vfsGfPnqOW8YUvfIGnn36asrKyZj+n\nvLycLl26cNZZZ7Fnzx6efDJRkLBfv37s3r2btWsTRQwrKiqora1l7NixLFiwgNraWoBT5hBQqwFg\nZpnAfOAqYCBwo5kNbDTZW8AXgMXxRndf4+7D3H0YMAY4BPwxNsm/NYx39/Xt/xqt0x6ASOdnZvz2\nt7/lueeeo0+fPowaNYrJkyczb948PvzhD7NkyRJuv/12+vXrx4ABA1i1ahVdu3Y9ahk5OTl8+ctf\nZu/evc18ChQWFjJ8+HD69+/PpEmTuPTSS5PzLl26lJkzZ1JYWMjYsWOpqqrilltuoXfv3gwdOpTC\nwkIWL17c7LJPJubuLU9gdglwr7t/Ohq+C8Dd/6OJaRcBv3f3x5sYNxX4hLvf1Nq0zSkqKvLi4uJU\nJz/Ksu3LuOeFe/jDP/6B3mf2btcyRE5lr7/+OgMGDEh3N+QYNfXf0czWuXtR42lTOQTUE3g7Nlwa\ntbXVDcAjjdrmmtkGM/uBmeW2Y5kpq6zRHoCISNwJuQrIzM4DhgCrYs13Af8D5AALgTuB+5uYdyow\nFaB37/b/ctchIJEwXXzxxVRXVx/V9stf/pIhQ4akqUedRyoBsAuIX3+VH7W1xT8DT7h7TUODu++O\n3lab2UPA7U3N6O4LSQQERUVFLR+vaoECQCRM8ZPGcrRUDgGtBfqaWR8zyyFxKGdZGz/nRhod/on2\nCrDE80uvBTa1cZltUllbSU5GDpkZmcfzY0REThqtBoC71wIzSBy+eR141N03m9n9ZjYewMxGmlkp\ncB2wwMw2N8xvZgUk9iCea7ToX5nZRmAj0B345rF/neYdqj2kx0CIiMSkdA7A3VcAKxq1zYm9X0vi\n0FBT85bQxEljdx/Tlo4eq6raKh3+ERGJCepOYAWAiMgRCgAR6VRaqgdwrAoKCnjnnXea/MxrrrmG\nwsJCBg4cyGc+85kO+bzOLqiHwSkARFL05Gz4n40du8x/GAJXfbvFSRrqAUyePDl5t+2bb77JsmVH\nX3dSW1ubfDBbR5gzZw5jx47ltttuAziqsMypTHsAItJptFQPYNGiRYwfP54xY8ZwxRVXcPDgQa64\n4gouuugihgwZwu9+9zsASkpK6N+/PzfddBMDBgxg4sSJyecJAfz4xz9OzvPXv/4VgN27d5Off+Q0\n5tChQwGa/QyAb3zjG/Tr14/LLruMG2+8ke9973sAbN++nXHjxjFixAg+/vGPJz+jKcuXL+fiiy9m\n+PDhXHnllcnnGh08eJApU6YwZMgQhg4dyq9//WsAVq5cyUUXXURhYSFXXHHFMa1rIJG4J8trxIgR\n3l7jnxjvX13z1XbPL3Kq27JlS7q74D/84Q/9K1/5SpPjHnroIe/Zs6fv27fP3d1ramr8wIED7u5e\nVlbmH/nIR7y+vt537tzpgL/wwgvu7j5lyhT/7ne/6+7u559/vv/oRz9yd/f58+f7zTff7O7uK1eu\n9LPOOss/+clP+je/+U3ftWtXi5/x8ssve2FhoVdWVnp5eblfeOGFyc8YM2aM/+1vf3N395deeskv\nv/zyZr/v/v37vb6+3t3df/rTn/qsWbPc3f2OO+7w22677ajp9u7d6/n5+b5jxw539+R6aKyp/45A\nsTexTdUhIBHptOL1AG699VbGjh2bfBa/u3P33Xfz/PPPk5GRwa5du5K/oHv16pV8wNvnP/95fvSj\nH3H77Yl7TT/3uc8BMGLECH7zm98A8OlPf5odO3awcuVKnnzySYYPH86mTZs4++yzm/yMF198kQkT\nJpCXl0deXh6f/exngcQv9z/96U9cd911ye/Q+C7kuNLSUq6//np2797N4cOH6dOnD5CoP7BkyZLk\ndOeccw7Lly9n9OjRyWk6oiaBAkBEOo1BgwYlD3dAoh7AO++8Q1FR4jlmXbp0SY771a9+RVlZGevW\nrSM7O5uCgoLkM/0T95ceER/OzU08diwzMzP5eGdIbFAnTZrEpEmTkuUlKyoqmv2MptTX13P22Wez\nfn1qDzeeOXMms2bNYvz48Tz77LPce++9Kc3XUYI6B6BaACKdW0v1ABo7cOAAH/rQh8jOzmbNmjW8\n+eabyXFvvfUWf/7znwFYvHgxl112WYuf+8wzzyQ/p6Kigu3bt9O7d+9mP+PSSy9l+fLlVFVVcfDg\nQX7/+98DiQpiffr04bHHHgMSeymvvfZas5974MABevZM3CYVL2M5duxY5s+fnxx+9913+ehHP8rz\nzz+frHbWETUJggiAuvo6quuqycvKS3dXRKQFLdUDaOymm26iuLiYIUOG8Itf/IL+/fsnx/Xr14/5\n8+czYMAA3n33XaZPn97i565bt46ioiKGDh3KJZdcwi233MLIkSOb/YyRI0cyfvx4hg4dylVXXcWQ\nIUM466yzgMSeyc9//nMKCwsZNGjQUSeOG7v33nu57rrrGDFiBN27d0+2f+1rX+Pdd99l8ODBFBYW\nsmbNGnr06MHChQv53Oc+R2FhIddff32b1m1TWq0H0Jm0tx7A+zXv89HFH2XWiFlMGTzlOPRM5OR3\nqtQDKCkp4ZprrmHTpuP6eDEOHjzIGWecwaFDhxg9ejQLFy5stqD9idSWegBBnAPQk0BFpKNNnTqV\nLVu2UFVVxeTJkzvFxr+twggAFYMRCUZBQcFx//UPtKks5Ny5c5PnBRpcd9113HPPPR3drTYJIgAO\n1SZO7igARCQd7rnnnrRv7JsSxElgHQISEfkgBYCISKDCCgAVhBERSQorALQHICKSlFIAmNk4M9tq\nZtvMbHYT40eb2StmVmtmExuNqzOz9dFrWay9j5n9JVrm0qje8HHREAC6E1ik82utHsBXvvIVevbs\nSX19PQAPPfQQw4YNY9iwYeTk5DBkyBCGDRvG7Nkf2FRJI61eBWRmmcB8YCxQCqw1s2XuviU22VvA\nF4Dbm1hEpbsPa6J9HvADd19iZj8BbgYeaGK6Y6Y9AJG2mffyPP66v/nHGLdH/279uXPUnS1O463U\nA6ivr+eJJ56gV69ePPfcc1x++eVMmTKFKVMSN3gWFBSwZs2ao+6qlealsgcwCtjm7jvc/TCwBJgQ\nn8DdS9x9A1Cfyoda4slMY4DHo6aHgWtT7nUbKQBETg4t1QMAePbZZxk0aBDTp0/nkUceaddnvPzy\ny1xyySUMHz6cj33sY2zduhWAuro6br/9dgYPHszQoUP58Y9/DMDatWv52Mc+RmFhIaNGjaKiouIY\nv2Xnkcp9AD2Bt2PDpcDFbfiMPDMrBmqBb7v7b4FzgffcveFRfKU0UTgewMymAlMBevfu3YaPPaKy\ntpJMyyQ7I7td84uEprVf6sfL5s2bW7yj9pFHHuHGG29kwoQJ3H333dTU1JCd3bZ/1/379+e///u/\nycrK4umnn+buu+/m17/+NTG9rzYAAAcTSURBVAsXLqSkpIT169eTlZXF/v37OXz4MNdffz1Lly5l\n5MiRlJeXc9ppp84PyRNxI9j57r7LzC4AnjGzjcCBVGd294XAQkg8C6g9HWh4FHTjR8SKSOcWrwfw\n4osvsmLFCr7//e/TtWtXLr74YlatWsU111zTpmUeOHCAyZMn88Ybb2Bm1NTUAIln8E+bNi1ZarJb\nt25s3LiR8847j5EjRwKJp32eSlI5BLQL6BUbzo/aUuLuu6K/O4BngeHAPuBsM2sIoDYts61UC0Dk\n5DBo0CBeeeWV5PD8+fNZvXo1ZWVlrFq1ivfee48hQ4ZQUFDACy+80K7DQF//+te5/PLL2bRpU/KR\nzqFKJQDWAn2jq3ZygBuAZa3MA4CZnWNmudH77sClwJaoRNkaoOGKoclA889MPUaVNQoAkZNBS/UA\nHnnkEX72s59RUlJCSUkJO3fu5Kmnnmq2XkBz4s/gX7RoUbJ97NixLFiwIFkkZv/+/fTr14/du3ez\ndu1aIFErIF5E5mTXagBEx+lnAKuA14FH3X2zmd1vZuMBzGykmZUC1wELzGxzNPsAoNjMXiOxwf92\n7OqhO4FZZraNxDmBn3fkF4vTHoDIyaG5egD33XcfK1eu5Oqrr05O26VLFy677DKWL1/eps+44447\nuOuuuxg+fPhRG/NbbrmF3r17M3ToUAoLC1m8eDE5OTksXbqUmTNnUlhYyNixY0+pPYYg6gH8bOPP\nqDhcwVdHfPU49Erk1HCq1AMIneoBNHLLkFvS3QURkU4niAAQkfA89NBD/PCHPzyq7dJLLz2q1m7o\nFAAikuTup8zl0vE7hEPR1kP6QTwMTkRal5eXx759+9q8EZHOwd3Zt28feXl5Kc+jPQARASA/P5/S\n0lLKysrS3RVpp7y8PPLz81OeXgEgIgBkZ2fTp0+fdHdDTiAdAhIRCZQCQEQkUAoAEZFAnVR3AptZ\nGfBmO2fvDrzTgd3pSOpb+6hv7aO+tc/J3Lfz3b1H48aTKgCOhZkVN3UrdGegvrWP+tY+6lv7nIp9\n0yEgEZFAKQBERAIVUgAsTHcHWqC+tY/61j7qW/uccn0L5hyAiIgcLaQ9ABERiVEAiIgEKogAMLNx\nZrbVzLaZ2ex09yfOzErMbKOZrTeztpc769i+PGhme81sU6ytm5k9ZWZvRH/P6UR9u9fMdkXrbr2Z\nfSZNfetlZmvMbIuZbTaz26L2tK+7FvqW9nVnZnlm9rKZvRb17b6ovY+Z/SX697o0qkXeWfq2yMx2\nxtbbsBPdt6gfmWb2qpn9Phpu3zpz91P6BWQC24ELgBzgNWBguvsV618J0D3d/Yj6Mhq4CNgUa/sO\nMDt6PxuY14n6di9weydYb+cBF0XvuwJ/AwZ2hnXXQt/Svu4AA86I3mcDfwE+CjwK3BC1/wSY3on6\ntgiY2An+n5sFLAZ+Hw23a52FsAcwCtjm7jvc/TCwBJiQ5j51Su7+PLC/UfME4OHo/cPAtSe0U5Fm\n+tYpuPtud38lel8BvA70pBOsuxb6lnaecDAazI5eDowBHo/a07Xemutb2plZPnA18LNo2GjnOgsh\nAHoCb8eGS+kk/wAiDvzRzNaZ2dR0d6YJH3b33dH7/wE+nM7ONGGGmW2IDhGl5fBUnJkVAMNJ/GLs\nVOuuUd+gE6y76FDGemAv8BSJvfX33L02miRt/14b983dG9bb3Gi9/cDMctPQtf8E7gDqo+Fzaec6\nCyEAOrvL3P0i4CrgVjMbne4ONccT+5ed4ldQ5AHgI8AwYDfwf9PZGTM7A/g18BV3L4+PS/e6a6Jv\nnWLduXuduw8D8knsrfdPRz+a0rhvZjYYuItEH0cC3YA7T2SfzOwaYK+7r+uI5YUQALuAXrHh/Kit\nU3D3XdHfvcATJP4RdCZ7zOw8gOjv3jT3J8nd90T/SOuBn5LGdWdm2SQ2sL9y999EzZ1i3TXVt860\n7qL+vAesAS4BzjazhmJVaf/3GuvbuOiQmrt7NfAQJ369XQqMN7MSEoezxwA/pJ3rLIQAWAv0jc6S\n5wA3AMvS3CcAzKyLmXVteA98CtjU8lwn3DJgcvR+MvC7NPblKA0b18g/kqZ1Fx2D/Tnwurt/PzYq\n7euuub51hnVnZj3M7Ozo/WnAWBLnKNYAE6PJ0rXemurbX2OBbiSOs5/Q9ebud7l7vrsXkNiWPePu\nN9HedZbus9kn4gV8hsTVD9uBe9Ldn1i/LiBxVdJrwOZ09w14hMThgBoSxxFvJnF8cTXwBvA00K0T\n9e2XwEZgA4mN7Xlp6ttlJA7vbADWR6/PdIZ110Lf0r7ugKHAq1EfNgFzovYLgJeBbcBjQG4n6tsz\n0XrbBPwX0ZVCafr/7pMcuQqoXetMj4IQEQlUCIeARESkCQoAEZFAKQBERAKlABARCZQCQEQkUAoA\nEZFAKQBERAL1/wGqq0GHDF+q3gAAAABJRU5ErkJggg==\n",
533 |             "text/plain": [
534 |               "<Figure size 432x288 with 1 Axes>"
535 |             ]
536 |           },
537 |           "metadata": {
538 |             "tags": []
539 |           }
540 |         }
541 |       ]
542 |     }
543 |   ]
544 | }


--------------------------------------------------------------------------------