├── README.md ├── louvain_baseline.py ├── data-analysis-cnm.py ├── data_processing.ipynb └── GNN.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # CS224W_Hollywood_Graph_Networks 2 | Community detection and node classification on Hollywood actors using various models: Louvain, Clauset-Newman-Moore, GCN, GraphSage, and GAT. 3 | -------------------------------------------------------------------------------- /louvain_baseline.py: -------------------------------------------------------------------------------- 1 | import snap 2 | import numpy as np 3 | import networkx as nx 4 | import matplotlib.pyplot as plt 5 | import community 6 | 7 | def main(): 8 | num_nodes = [] # list of number of nodes in each community 9 | edges_file = "edges-100k_copy.txt" 10 | G = nx.read_edgelist(edges_file) # nodetype=int 11 | print(G.number_of_edges()) 12 | partition = community.best_partition(G) 13 | 14 | size = float(len(set(partition.values()))) 15 | pos = nx.spring_layout(G) 16 | count = 0. 17 | for com in set(partition.values()): 18 | count += 1. 19 | list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com] 20 | print(list_nodes) 21 | print("\n") 22 | num_nodes.append(len(list_nodes)) 23 | nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20, node_color = str(count / size)) 24 | 25 | plt.hist(num_nodes, log=True) 26 | plt.xlabel = "Size of Community" 27 | plt.ylabel = "Number of Communities" 28 | 29 | nx.draw_networkx_edges(G, pos, alpha=0.5) 30 | plt.show() 31 | 32 | 33 | main() -------------------------------------------------------------------------------- /data-analysis-cnm.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | 3 | import snap 4 | import matplotlib.pyplot as plt 5 | 6 | G1 = snap.LoadEdgeList(snap.PUNGraph, "edges-100k.txt", 0, 1) 7 | 8 | print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges())) 9 | print("Number of Nodes: %d" % G1.GetNodes()) 10 | 11 | # 1.6 number of nodes of zero degree 12 | print("Number of nodes of zero degree: %d" % snap.CntDegNodes(G1, 0)) 13 | 14 | # Get in degree distribution 15 | DegToCntV = snap.TIntPrV() 16 | snap.GetDegCnt(G1, DegToCntV) 17 | degree = [] 18 | numNodes = [] 19 | sumDegrees = 0 20 | for item in DegToCntV: 21 | degree.append(item.GetVal1()) 22 | numNodes.append(item.GetVal2()) 23 | sumDegrees += item.GetVal1()*item.GetVal2() 24 | #print("%d nodes with in-degree %d" % (item.GetVal2(), item.GetVal1())) 25 | 26 | plt.plot(degree, numNodes) 27 | plt.yscale('log') 28 | plt.xscale('log') 29 | plt.ylabel('frequency') 30 | plt.xlabel('degree') 31 | plt.title('Degree distribution') 32 | plt.savefig('degreeDist.png') 33 | plt.clf() 34 | 35 | # Get average degree 36 | print("Average degree:", sumDegrees/float(sum(numNodes))) 37 | 38 | # Get largest strongly connected component 39 | MxScc = snap.GetMxScc(G1) 40 | print("Size of largest strongly connected component:", MxScc.GetNodes()) 41 | 42 | # Get strongly connected components 43 | Components = snap.TCnComV() 44 | snap.GetWccs(G1, Components) 45 | wcc_sizes = [] 46 | for CnCom in Components: 47 | wcc_sizes.append(CnCom.Len()) 48 | 49 | print("Number of connected components:", len(wcc_sizes)) 50 | 51 | # Clauset-Newman-Moore community detection 52 | CmtyV = snap.TCnComV() 53 | modularity = snap.CommunityCNM(G1, CmtyV) 54 | count = 0 55 | sizes = [] 56 | communities = [] 57 | for Cmty in CmtyV: 58 | listcmty = [] 59 | for NI in Cmty: 60 | listcmty.append(NI) 61 | 62 | communities.append(listcmty) 63 | count += 1 64 | sizes.append(len(listcmty)) 65 | print("Number of communities:", count) 66 | print("Largest community:", max(sizes)) 67 | print("Smallest community:", min(sizes)) 68 | print("Community 21:", communities[21]) 69 | print("Community 101:", communities[101]) 70 | print("Community 10,000:", communities[10000]) 71 | 72 | # plot histogram of community sizes 73 | sizes.sort() 74 | plt.hist(sizes, log=True) 75 | plt.xlabel("Size of community") 76 | plt.ylabel("Number of communities") 77 | plt.title("Sizes of CNM communities") 78 | plt.show() 79 | plt.savefig("cnm-sizes.png") 80 | #print("Size of communities:", sizes) 81 | print("The modularity of the network is %f" % modularity) 82 | -------------------------------------------------------------------------------- /data_processing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "224.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "code", 18 | "metadata": { 19 | "id": "BGjJro5-IN3D", 20 | "colab_type": "code", 21 | "outputId": "4ae092e4-ec9d-4817-d4dc-60bda20187ce", 22 | "colab": { 23 | "base_uri": "https://localhost:8080/", 24 | "height": 122 25 | } 26 | }, 27 | "source": [ 28 | "pip install snap-stanford" 29 | ], 30 | "execution_count": 0, 31 | "outputs": [ 32 | { 33 | "output_type": "stream", 34 | "text": [ 35 | "Collecting snap-stanford\n", 36 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/0d/18/4694293d1d58ee92a1f85fa09b4b1348b849d1f35470cf296b238fa20a8d/snap_stanford-5.0.0-cp36-cp36m-manylinux1_x86_64.whl (11.2MB)\n", 37 | "\u001b[K |████████████████████████████████| 11.2MB 2.6MB/s \n", 38 | "\u001b[?25hInstalling collected packages: snap-stanford\n", 39 | "Successfully installed snap-stanford-5.0.0\n" 40 | ], 41 | "name": "stdout" 42 | } 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "metadata": { 48 | "id": "6iOxcCFuIeco", 49 | "colab_type": "code", 50 | "outputId": "4ce0d394-d392-40aa-8c26-53421c5e5fdb", 51 | "colab": { 52 | "base_uri": "https://localhost:8080/", 53 | "height": 122 54 | } 55 | }, 56 | "source": [ 57 | "from google.colab import drive\n", 58 | "drive.mount('/content/drive')" 59 | ], 60 | "execution_count": 0, 61 | "outputs": [ 62 | { 63 | "output_type": "stream", 64 | "text": [ 65 | "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n", 66 | "\n", 67 | "Enter your authorization code:\n", 68 | "··········\n", 69 | "Mounted at /content/drive\n" 70 | ], 71 | "name": "stdout" 72 | } 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "metadata": { 78 | "id": "22xju41GH9ZC", 79 | "colab_type": "code", 80 | "colab": {} 81 | }, 82 | "source": [ 83 | "import snap\n", 84 | "import pandas as pd\n", 85 | "\n", 86 | " # prefix for Sophia: My Drive; prefix for Nidhi & Flora: Shared with me\n", 87 | "\n", 88 | "def read_data():\n", 89 | "\tfilename = \"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\n", 90 | "\tdf = pd.read_csv(filename)\n", 91 | "\n", 92 | "\tid_counter = 0\n", 93 | "\tname_2_id = {}\n", 94 | "\tf_edges = open('drive/My Drive/CS224W/Colab Notebooks/edges.txt', 'w')\n", 95 | "\n", 96 | "\tG = snap.TUNGraph.New()\n", 97 | "\n", 98 | "\tfor idx, row in df.iterrows():\n", 99 | "\t\tmovie_cast = [] # id's of actors that worked on movie together\n", 100 | "\t\tcast_blobs = row['cast'].split(\"}\")\n", 101 | "\t\tfor actor in cast_blobs:\n", 102 | "\t\t\tstart_ind = actor.find(\"\\'name\\':\")\n", 103 | "\t\t\tname = actor[start_ind:].split(\",\")[0][9:-1]\n", 104 | "\t\t\tname = name.strip()\n", 105 | "\n", 106 | "\t\t\tif name != \"\" and name not in name_2_id:\n", 107 | "\t\t\t\tname_2_id[name] = id_counter\n", 108 | "\t\t\t\tid_counter += 1\n", 109 | "\t\t\t\n", 110 | "\t\t\tif name != \"\":\n", 111 | "\t\t\t\tmovie_cast.append(name_2_id[name])\n", 112 | "\t\t\n", 113 | "\t\tfor id_ in movie_cast:\n", 114 | "\t\t\tif not G.IsNode(id_):\n", 115 | "\t\t\t\tG.AddNode(id_)\n", 116 | "\n", 117 | "\t\tfor id_1 in movie_cast:\n", 118 | "\t\t\tfor id_2 in movie_cast:\n", 119 | "\t\t\t\tif id_1 != id_2 and not G.IsEdge(id_1, id_2):\n", 120 | "\t\t\t\t\tG.AddEdge(id_1, id_2)\n", 121 | "\t\t\t\t\tf_edges.write(str(id_1) + ' ' + str(id_2) + '\\n')\n", 122 | "\t\n", 123 | "\treturn name_2_id\n", 124 | "\t\n", 125 | "\tprint(G.GetEdges()) # 6183763\n", 126 | "\tprint(G.GetNodes()) # 202747; same as len(name_2_id)\n", 127 | "\n", 128 | "\tf_actors = open('drive/My Drive/CS224W/Colab Notebooks/actor_name_to_id.txt', 'w')\n", 129 | "\tfor key,val in name_2_id.items():\n", 130 | "\t\tf_actors.write(key + ', ' + str(val) + '\\n')\n", 131 | "\n", 132 | "actor_name_2_id = read_data()" 133 | ], 134 | "execution_count": 0, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "metadata": { 140 | "id": "ArSvzXsPg0IV", 141 | "colab_type": "code", 142 | "outputId": "a1052926-08b5-4d53-8be7-af9f0a7e9979", 143 | "colab": { 144 | "base_uri": "https://localhost:8080/", 145 | "height": 238 146 | } 147 | }, 148 | "source": [ 149 | "ids = [179, 183, 2496, 2525, 2530, 2511, 2524, 17341, 66647, 71523, 109179, 184005, 154303]\n", 150 | "for name, node_id in actor_name_2_id.items(): # for name, age in dictionary.iteritems(): (for Python 2.x)\n", 151 | " if node_id in ids:\n", 152 | " print(name, node_id)\n" 153 | ], 154 | "execution_count": 0, 155 | "outputs": [ 156 | { 157 | "output_type": "stream", 158 | "text": [ 159 | "Anthony Mondal 179\n", 160 | "Michael Cline 183\n", 161 | "Dina Meyer 2496\n", 162 | "Warren Sulatycky 2511\n", 163 | "Coyote Shivers 2524\n", 164 | "Lynne Adams 2525\n", 165 | "Glenn Bang 2530\n", 166 | "Javon Barnwell 17341\n", 167 | "Sonny Marinelli 66647\n", 168 | "Beverly Murray 71523\n", 169 | "Bronwen Booth 109179\n", 170 | "Denis Sandler 154303\n", 171 | "Antoni Petorozliev 184005\n" 172 | ], 173 | "name": "stdout" 174 | } 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "metadata": { 180 | "id": "SAt3qEs9ZrpG", 181 | "colab_type": "code", 182 | "outputId": "72eff771-8bb2-49e3-bc09-37d7cb27bbf1", 183 | "colab": { 184 | "base_uri": "https://localhost:8080/", 185 | "height": 334 186 | } 187 | }, 188 | "source": [ 189 | "def create_movie_list():\n", 190 | " filename = \"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\n", 191 | " #filename = \"drive/My Drive/Colab Notebooks/credits.csv\"\n", 192 | " df = pd.read_csv(filename)\n", 193 | " name_2_movies = {}\n", 194 | " \n", 195 | " for idx, row in df.iterrows():\n", 196 | " cast_blobs = row['cast'].split(\"}\")\n", 197 | " movie_id = row['id']\n", 198 | " \n", 199 | " for actor in cast_blobs:\n", 200 | " start_ind = actor.find(\"\\'name\\':\")\n", 201 | " name = actor[start_ind:].split(\",\")[0][9:-1]\n", 202 | " \n", 203 | " if name == \"\":\n", 204 | " continue\n", 205 | " if name in name_2_movies:\n", 206 | " movies_list_so_far = name_2_movies[name]\n", 207 | " movies_list_so_far.append(movie_id)\n", 208 | " name_2_movies[name] = movies_list_so_far\n", 209 | " else:\n", 210 | " name_2_movies[name] = [movie_id]\n", 211 | "\n", 212 | " return name_2_movies\n", 213 | "name_2_movies = create_movie_list()\n", 214 | "# print(len(name_2_movies)) # 202747 actors (nodes)" 215 | ], 216 | "execution_count": 0, 217 | "outputs": [ 218 | { 219 | "output_type": "error", 220 | "ename": "NameError", 221 | "evalue": "ignored", 222 | "traceback": [ 223 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 224 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 225 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mname_2_movies\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mname_2_movies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_movie_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0;31m# print(len(name_2_movies)) # 202747 actors (nodes)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 226 | "\u001b[0;32m\u001b[0m in \u001b[0;36mcreate_movie_list\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mfilename\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"drive/My Drive/CS224W/Colab Notebooks/credits.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#filename = \"drive/My Drive/Colab Notebooks/credits.csv\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mname_2_movies\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 227 | "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" 228 | ] 229 | } 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "metadata": { 235 | "id": "bsnNuO4jZyce", 236 | "colab_type": "code", 237 | "outputId": "96bb07b0-38e7-4f85-f237-f5797b0f402c", 238 | "colab": { 239 | "base_uri": "https://localhost:8080/", 240 | "height": 89 241 | } 242 | }, 243 | "source": [ 244 | "def create_genre_list(name_2_movies):\n", 245 | " #filename = \"drive/My Drive/CS224W/Colab Notebooks/movies_metadata.csv\"\n", 246 | " filename = \"drive/My Drive/Colab Notebooks/movies_metadata.csv\"\n", 247 | " df = pd.read_csv(filename)\n", 248 | "\n", 249 | " genre_2_id = {} # map of each genre to unique, arbitrary id\n", 250 | " id_ctr = 0\n", 251 | " # create map of movie id to genre list\n", 252 | " movie_2_genres = {}\n", 253 | " for idx, row in df.iterrows():\n", 254 | " movie_id = row['id']\n", 255 | "\n", 256 | " genres_for_movie = []\n", 257 | " genre_blob = row['genres'].split(\"}\")\n", 258 | "\n", 259 | " for genre in genre_blob:\n", 260 | " start_ind = genre.find(\"\\'name\\':\")\n", 261 | " genre_name = genre[start_ind:].split(\",\")[0][9:-1]\n", 262 | "\n", 263 | " if genre_name != \"\":\n", 264 | " genres_for_movie.append(genre_name)\n", 265 | "\n", 266 | " if genre_name not in genre_2_id:\n", 267 | " genre_2_id[genre_name] = id_ctr\n", 268 | " id_ctr += 1\n", 269 | "\n", 270 | " movie_2_genres[movie_id] = genres_for_movie\n", 271 | " return movie_2_genres\n", 272 | "\n", 273 | "movie_2_genres = create_genre_list(name_2_movies)" 274 | ], 275 | "execution_count": 0, 276 | "outputs": [ 277 | { 278 | "output_type": "stream", 279 | "text": [ 280 | "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n", 281 | " if self.run_code(code, result):\n" 282 | ], 283 | "name": "stderr" 284 | }, 285 | { 286 | "output_type": "stream", 287 | "text": [ 288 | "{'Animation': 0, 'Comedy': 1, 'Family': 2, 'Adventure': 3, 'Fantasy': 4, 'Romance': 5, 'Drama': 6, 'Action': 7, 'Crime': 8, 'Thriller': 9, 'Horror': 10, 'History': 11, 'Science Fiction': 12, 'Mystery': 13, 'War': 14, 'Foreign': 15, 'Music': 16, 'Documentary': 17, 'Western': 18, 'TV Movie': 19, 'Carousel Productions': 20, 'Vision View Entertainment': 21, 'Telescene Film Group Productions': 22, 'Aniplex': 23, 'GoHands': 24, 'BROSTA TV': 25, 'Mardock Scramble Production Committee': 26, 'Sentai Filmworks': 27, 'Odyssey Media': 28, 'Pulser Productions': 29, 'Rogue State': 30, 'The Cartel': 31}\n" 289 | ], 290 | "name": "stdout" 291 | } 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "metadata": { 297 | "id": "TMMlcwEdISrm", 298 | "colab_type": "code", 299 | "colab": {} 300 | }, 301 | "source": [ 302 | "def create_actor_genres(movie_2_genres):\n", 303 | " name_2_genres_map = {}\n", 304 | " for actor, movies_list in name_2_movies.items():\n", 305 | " genres_map_for_the_actor = {}\n", 306 | " for movie in movies_list:\n", 307 | " movie = str(movie)\n", 308 | " list_genres = movie_2_genres[movie]\n", 309 | " for genre in list_genres:\n", 310 | " if genre not in genres_map_for_the_actor:\n", 311 | " genres_map_for_the_actor[genre] = 1\n", 312 | " else:\n", 313 | " genres_map_for_the_actor[genre] += 1\n", 314 | " name_2_genres_map[actor] = genres_map_for_the_actor\n", 315 | "\n", 316 | " return name_2_genres_map\n", 317 | "\n", 318 | "name_2_genres = create_actor_genres(movie_2_genres)" 319 | ], 320 | "execution_count": 0, 321 | "outputs": [] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "metadata": { 326 | "id": "5niHYsN1aZcR", 327 | "colab_type": "code", 328 | "outputId": "7889541b-66fc-4f54-c51a-aacd59ae9be2", 329 | "colab": { 330 | "base_uri": "https://localhost:8080/", 331 | "height": 102 332 | } 333 | }, 334 | "source": [ 335 | "def get_top_genre_per_actor(name_2_genres):\n", 336 | " actor_2_top_genre = {}\n", 337 | " other_actors = []\n", 338 | " for actor, genre_map in name_2_genres.items():\n", 339 | " actor = actor.strip()\n", 340 | " top_genre = \"\"\n", 341 | " max_val = 0\n", 342 | "\n", 343 | " for genre, count in genre_map.items():\n", 344 | " if count > max_val:\n", 345 | " max_val = count\n", 346 | " top_genre = genre\n", 347 | "\n", 348 | " if top_genre != \"\":\n", 349 | " actor_2_top_genre[actor] = top_genre\n", 350 | " else:\n", 351 | " other_actors.append(actor)\n", 352 | " return actor_2_top_genre, other_actors\n", 353 | "\n", 354 | "actor_2_top_genre, other_actors = get_top_genre_per_actor(name_2_genres)\n", 355 | "# print(len(actor_2_top_genre)) # 199710 actors (nodes)\n", 356 | "# print(actor_2_top_genre['Tom Hanks'])\n", 357 | "unique_top_genres = set()\n", 358 | "for actor, top_genre in actor_2_top_genre.items():\n", 359 | " unique_top_genres.add(top_genre)\n", 360 | "print(unique_top_genres)\n", 361 | "print(len(unique_top_genres))\n", 362 | "\n", 363 | "def new_genre_id_map(unique_top_genres):\n", 364 | " id_ctr = 0\n", 365 | " top_genre_2_id = {}\n", 366 | " for genre in unique_top_genres:\n", 367 | " top_genre_2_id[genre] = id_ctr\n", 368 | " id_ctr += 1\n", 369 | " top_genre_2_id[\"MISC\"] = id_ctr\n", 370 | " return top_genre_2_id\n", 371 | "\n", 372 | "top_genre_2_id = new_genre_id_map(unique_top_genres)\n", 373 | "print(top_genre_2_id)" 374 | ], 375 | "execution_count": 0, 376 | "outputs": [ 377 | { 378 | "output_type": "stream", 379 | "text": [ 380 | "Comedy\n", 381 | "{'Foreign', 'Science Fiction', 'Drama', 'TV Movie', 'Animation', 'Crime', 'Comedy', 'Documentary', 'Adventure', 'Western', 'Thriller', 'Action', 'Horror', 'War', 'Romance', 'Mystery', 'Fantasy', 'Music', 'Family', 'History'}\n", 382 | "20\n", 383 | "{'Foreign': 0, 'Science Fiction': 1, 'Drama': 2, 'TV Movie': 3, 'Animation': 4, 'Crime': 5, 'Comedy': 6, 'Documentary': 7, 'Adventure': 8, 'Western': 9, 'Thriller': 10, 'Action': 11, 'Horror': 12, 'War': 13, 'Romance': 14, 'Mystery': 15, 'Fantasy': 16, 'Music': 17, 'Family': 18, 'History': 19, 'MISC': 20}\n" 384 | ], 385 | "name": "stdout" 386 | } 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "metadata": { 392 | "id": "7h2QwD0ObNc5", 393 | "colab_type": "code", 394 | "outputId": "5dca18aa-bc58-4611-c119-2901f0dfc5de", 395 | "colab": { 396 | "base_uri": "https://localhost:8080/", 397 | "height": 33 398 | } 399 | }, 400 | "source": [ 401 | "def get_actor_top_genre_id(actor_2_top_genre, top_genre_2_id):\n", 402 | " actor_2_genre_id = {}\n", 403 | " for actor, top_genre in actor_2_top_genre.items():\n", 404 | " actor_2_genre_id[actor] = top_genre_2_id[top_genre]\n", 405 | " return actor_2_genre_id\n", 406 | "\n", 407 | "actor_2_genre_id = get_actor_top_genre_id(actor_2_top_genre, top_genre_2_id)\n", 408 | "print(len(actor_2_genre_id))" 409 | ], 410 | "execution_count": 0, 411 | "outputs": [ 412 | { 413 | "output_type": "stream", 414 | "text": [ 415 | "199687\n" 416 | ], 417 | "name": "stdout" 418 | } 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "metadata": { 424 | "id": "jZEVkR8Io5Si", 425 | "colab_type": "code", 426 | "outputId": "37f16fd0-cfb7-4ec7-c6eb-4d49b9228741", 427 | "colab": { 428 | "base_uri": "https://localhost:8080/", 429 | "height": 118 430 | } 431 | }, 432 | "source": [ 433 | "def get_node_id_top_genre_id(actor_2_genre_id):\n", 434 | " filename = \"drive/My Drive/Colab Notebooks/actor_name_to_id.txt\"\n", 435 | " #filename = \"drive/My Drive/CS224W/Colab Notebooks/actor_name_to_id.txt\"\n", 436 | " df = pd.read_csv(filename, sep=\", \", header=None)\n", 437 | " df.columns = [\"name\", 'id']\n", 438 | " df.drop_duplicates(subset =\"name\", keep = False, inplace = True)\n", 439 | "\n", 440 | " print(df.shape)\n", 441 | " print(len(actor_2_genre_id))\n", 442 | "\n", 443 | " node_id_2_genre_id = {}\n", 444 | "\n", 445 | " for idx, row in df.iterrows():\n", 446 | " name = row[0]\n", 447 | " node_id = row[1]\n", 448 | " \n", 449 | " if name in actor_2_genre_id:\n", 450 | " genre_id = actor_2_genre_id[name]\n", 451 | " node_id_2_genre_id[node_id] = genre_id\n", 452 | " else:\n", 453 | " node_id_2_genre_id[node_id] = len(top_genre_2_id) - 1\n", 454 | "\n", 455 | " return node_id_2_genre_id\n", 456 | "\n", 457 | "node_id_2_genre_id = get_node_id_top_genre_id(actor_2_genre_id)\n", 458 | "print(len(node_id_2_genre_id))" 459 | ], 460 | "execution_count": 0, 461 | "outputs": [ 462 | { 463 | "output_type": "stream", 464 | "text": [ 465 | "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:4: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators (separators > 1 char and different from '\\s+' are interpreted as regex); you can avoid this warning by specifying engine='python'.\n", 466 | " after removing the cwd from sys.path.\n" 467 | ], 468 | "name": "stderr" 469 | }, 470 | { 471 | "output_type": "stream", 472 | "text": [ 473 | "(202721, 2)\n", 474 | "199687\n", 475 | "202721\n" 476 | ], 477 | "name": "stdout" 478 | } 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "metadata": { 484 | "id": "92TIVoLkpzeq", 485 | "colab_type": "code", 486 | "colab": {} 487 | }, 488 | "source": [ 489 | "# def make_new_edge_list(node_id_2_genre_id, actor_name_2_id): # recreate edges.txt file (just including nodes which have a top genre)\n", 490 | "\n", 491 | "# f_edges_old = open('drive/My Drive/Colab Notebooks/edges.txt', 'r')\n", 492 | "# f_edges_new = open('drive/My Drive/Colab Notebooks/new_edges.txt', 'w')\n", 493 | "\n", 494 | "# for line in f_edges_old:\n", 495 | "# ids = line.split(\" \")\n", 496 | "# id_1 = ids[0]\n", 497 | "# id_2 = ids[1]\n", 498 | "# if int(id_1) in node_id_2_genre_id and int(id_2) in node_id_2_genre_id:\n", 499 | "# f_edges_new.write(id_1 + ' ' + id_2 + '\\n')\n", 500 | "\n", 501 | "# make_new_edge_list(node_id_2_genre_id, actor_name_2_id) # 6156707 edges now" 502 | ], 503 | "execution_count": 0, 504 | "outputs": [] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "metadata": { 509 | "id": "X1qp1PARU0oU", 510 | "colab_type": "code", 511 | "colab": {} 512 | }, 513 | "source": [ 514 | "def get_true_labels(node_id_2_genre_id):\n", 515 | " f_labels = open('drive/My Drive/Colab Notebooks/labels_final.txt', 'w')\n", 516 | "\n", 517 | " for i in range(len(node_id_2_genre_id)):\n", 518 | " f_labels.write(str(node_id_2_genre_id[i]) + '\\n')\n", 519 | "\n", 520 | "get_true_labels(node_id_2_genre_id)" 521 | ], 522 | "execution_count": 0, 523 | "outputs": [] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "metadata": { 528 | "id": "ZILaAqkZXXXr", 529 | "colab_type": "code", 530 | "colab": {} 531 | }, 532 | "source": [ 533 | "# f_labels = open('drive/My Drive/Colab Notebooks/labels_final.txt', 'r')\n", 534 | "\n", 535 | "# for line in f_labels:\n", 536 | "# if int(line) > 20:\n", 537 | "# print(\"ISSUE\")" 538 | ], 539 | "execution_count": 0, 540 | "outputs": [] 541 | }, 542 | { 543 | "cell_type": "code", 544 | "metadata": { 545 | "id": "x3A9PG6TcgKV", 546 | "colab_type": "code", 547 | "colab": {} 548 | }, 549 | "source": [ 550 | "" 551 | ], 552 | "execution_count": 0, 553 | "outputs": [] 554 | } 555 | ] 556 | } -------------------------------------------------------------------------------- /GNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "GNN.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "machine_shape": "hm" 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "h8e9tfWcjPUs", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "Make a copy of this notebook. When running this notebook on Colab, ensure that you've set your Runtime > Change runtime type to Python 3 and GPU.\n", 26 | "\n", 27 | "---\n", 28 | "\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "metadata": { 34 | "id": "7oLWZWPPqo-l", 35 | "colab_type": "code", 36 | "colab": {} 37 | }, 38 | "source": [ 39 | "!pip install --verbose --no-cache-dir torch-scatter\n", 40 | "!pip install --verbose --no-cache-dir torch-sparse\n", 41 | "!pip install --verbose --no-cache-dir torch-cluster\n", 42 | "!pip install torch-geometric\n", 43 | "!pip install tensorboardX\n", 44 | "!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip\n", 45 | "!unzip ngrok-stable-linux-amd64.zip" 46 | ], 47 | "execution_count": 0, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "STDeeipoLBCS", 54 | "colab_type": "code", 55 | "colab": {} 56 | }, 57 | "source": [ 58 | "from torch_geometric.data import InMemoryDataset\n", 59 | "from torch_geometric.data import Data\n", 60 | "import os.path as osp\n", 61 | "import torch.optim as optim" 62 | ], 63 | "execution_count": 0, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": { 69 | "id": "guFJsTigq0bS", 70 | "colab_type": "code", 71 | "colab": {} 72 | }, 73 | "source": [ 74 | "def build_optimizer(args, params):\n", 75 | " weight_decay = args.weight_decay\n", 76 | " filter_fn = filter(lambda p : p.requires_grad, params)\n", 77 | " if args.opt == 'adam':\n", 78 | " optimizer = optim.Adam(filter_fn, lr=args.lr, weight_decay=weight_decay)\n", 79 | " elif args.opt == 'sgd':\n", 80 | " optimizer = optim.SGD(filter_fn, lr=args.lr, momentum=0.95, weight_decay=weight_decay)\n", 81 | " elif args.opt == 'rmsprop':\n", 82 | " optimizer = optim.RMSprop(filter_fn, lr=args.lr, weight_decay=weight_decay)\n", 83 | " elif args.opt == 'adagrad':\n", 84 | " optimizer = optim.Adagrad(filter_fn, lr=args.lr, weight_decay=weight_decay)\n", 85 | " if args.opt_scheduler == 'none':\n", 86 | " return None, optimizer\n", 87 | " elif args.opt_scheduler == 'step':\n", 88 | " scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.opt_decay_step, gamma=args.opt_decay_rate)\n", 89 | " elif args.opt_scheduler == 'cos':\n", 90 | " scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.opt_restart)\n", 91 | " return scheduler, optimizer" 92 | ], 93 | "execution_count": 0, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "qzQL0UJMqva1", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "import torch\n", 105 | "import torch.nn as nn\n", 106 | "import torch.nn.functional as F\n", 107 | "\n", 108 | "import torch_geometric.nn as pyg_nn\n", 109 | "import torch_geometric.utils as pyg_utils\n", 110 | "\n", 111 | "class GNNStack(torch.nn.Module):\n", 112 | " def __init__(self, input_dim, hidden_dim, output_dim, args, task='node'):\n", 113 | " super(GNNStack, self).__init__()\n", 114 | " conv_model = self.build_conv_model(args.model_type)\n", 115 | " self.convs = nn.ModuleList()\n", 116 | " self.convs.append(conv_model(input_dim, hidden_dim))\n", 117 | " assert (args.num_layers >= 1), 'Number of layers is not >=1'\n", 118 | "\n", 119 | " for l in range(args.num_layers-1):\n", 120 | " self.convs.append(conv_model(hidden_dim, hidden_dim))\n", 121 | "\n", 122 | " # post-message-passing\n", 123 | " self.post_mp = nn.Sequential(\n", 124 | " nn.Linear(hidden_dim, hidden_dim), nn.Dropout(args.dropout), \n", 125 | " nn.Linear(hidden_dim, output_dim))\n", 126 | "\n", 127 | " self.task = task\n", 128 | " if not (self.task == 'node' or self.task == 'graph'):\n", 129 | " raise RuntimeError('Unknown task.')\n", 130 | "\n", 131 | " self.dropout = args.dropout\n", 132 | " self.num_layers = args.num_layers\n", 133 | "\n", 134 | " def build_conv_model(self, model_type):\n", 135 | " if model_type == 'GCN':\n", 136 | " return pyg_nn.GCNConv\n", 137 | " elif model_type == 'GraphSage':\n", 138 | " return GraphSage\n", 139 | " elif model_type == 'GAT':\n", 140 | " # When applying GAT with num heads > 1, one needs to modify the \n", 141 | " # input and output dimension of the conv layers (self.convs),\n", 142 | " # to ensure that the input dim of the next layer is num heads\n", 143 | " # multiplied by the output dim of the previous layer.\n", 144 | " # HINT: In case you want to play with multiheads, you need to change the for-loop when builds up self.convs to be\n", 145 | " # self.convs.append(conv_model(hidden_dim * num_heads, hidden_dim)), \n", 146 | " # and also the first nn.Linear(hidden_dim * num_heads, hidden_dim) in post-message-passing.\n", 147 | " return GAT\n", 148 | "\n", 149 | " def forward(self, data):\n", 150 | " x, edge_index = data.x, data.edge_index\n", 151 | " batch = len(data)\n", 152 | "\n", 153 | " ############################################################################\n", 154 | " # TODO: Your code here! \n", 155 | " # Each layer in GNN should consist of a convolution (specified in model_type),\n", 156 | " # a non-linearity (use RELU), and dropout. \n", 157 | " # HINT: the __init__ function contains parameters you will need. For whole\n", 158 | " # graph classification (as specified in self.task) apply max pooling over\n", 159 | " # all of the nodes with pyg_nn.global_max_pool as the final layer.\n", 160 | " # Our implementation is ~6 lines, but don't worry if you deviate from this.\n", 161 | "\n", 162 | " \n", 163 | " for i in range(self.num_layers):\n", 164 | " x = self.convs[i](x, edge_index)\n", 165 | " x = F.relu(x)\n", 166 | " x = F.dropout(x, p=self.dropout, training = self.training)\n", 167 | "\n", 168 | " # pools\n", 169 | " if self.task == 'graph':\n", 170 | " x = pyg_nn.global_mean_pool(x, batch)\n", 171 | "\n", 172 | " ############################################################################\n", 173 | "\n", 174 | " x = self.post_mp(x)\n", 175 | "\n", 176 | " return F.log_softmax(x, dim=1)\n", 177 | "\n", 178 | " def loss(self, pred, label):\n", 179 | " return F.nll_loss(pred, label)\n", 180 | "\n", 181 | "\n", 182 | "class GraphSage(pyg_nn.MessagePassing):\n", 183 | " \"\"\"Non-minibatch version of GraphSage.\"\"\"\n", 184 | " def __init__(self, in_channels, out_channels, reducer='mean', \n", 185 | " normalize_embedding=True):\n", 186 | " super(GraphSage, self).__init__(aggr='mean')\n", 187 | "\n", 188 | " ############################################################################\n", 189 | " # TODO: Your code here! \n", 190 | " # Define the layers needed for the message and update functions below.\n", 191 | " # self.lin is the linear transformation that you apply to each neighbor before aggregating them\n", 192 | " # self.agg_lin is the linear transformation you apply to the concatenated self embedding (skip connection) and mean aggregated neighbors\n", 193 | " # Our implementation is ~2 lines, but don't worry if you deviate from this.\n", 194 | "\n", 195 | " self.agg_lin = nn.Linear(in_channels + out_channels, out_channels, bias = False) # TODO\n", 196 | " self.lin = nn.Linear(in_channels, out_channels) # TODO\n", 197 | "\n", 198 | " ############################################################################\n", 199 | "\n", 200 | " if normalize_embedding:\n", 201 | " self.normalize_emb = True\n", 202 | "\n", 203 | " def forward(self, x, edge_index):\n", 204 | " num_nodes = x.size(0)\n", 205 | " # x has shape [N, in_channels]\n", 206 | " # edge_index has shape [2, E]\n", 207 | "\n", 208 | " return self.propagate(edge_index, size=(num_nodes, num_nodes), x=x)\n", 209 | "\n", 210 | " def message(self, x_j, edge_index, size):\n", 211 | " # x_j has shape [E, in_channels]\n", 212 | " # edge_index has shape [2, E]\n", 213 | " \n", 214 | " ############################################################################\n", 215 | " # TODO: Your code here! \n", 216 | " # Given x_j, perform the aggregation of a dense layer followed by a RELU non-linearity.\n", 217 | " # Notice that the aggregator operation will be done in self.propagate. \n", 218 | " # HINT: It may be useful to read the pyg_nn implementation of GCNConv,\n", 219 | " # https://pytorch-geometric.readthedocs.io/en/latest/notes/create_gnn.html\n", 220 | " # Our implementation is ~1 line, but don't worry if you deviate from this.\n", 221 | "\n", 222 | " x_j = self.lin(x_j) # TODO\n", 223 | " x_j = F.relu(x_j)\n", 224 | "\n", 225 | "\n", 226 | " ############################################################################\n", 227 | "\n", 228 | " return x_j\n", 229 | "\n", 230 | " def update(self, aggr_out, x):\n", 231 | " # aggr_out has shape [N, out_channels]\n", 232 | " # x has shape [N, in_channels]\n", 233 | " \n", 234 | " ############################################################################\n", 235 | " # TODO: Your code here! Perform the update step here. \n", 236 | " # Perform a MLP with skip-connection, that is a concatenation followed by \n", 237 | " # a linear layer and a RELU non-linearity.\n", 238 | " # Finally, remember to normalize as vector as shown in GraphSage algorithm.\n", 239 | " # Our implementation is ~4 lines, but don't worry if you deviate from this.\n", 240 | " \n", 241 | " if self.normalize_emb:\n", 242 | " aggr_out = torch.cat((aggr_out, x), 1)\n", 243 | " aggr_out = self.agg_lin(aggr_out)\n", 244 | " aggr_out = F.relu(aggr_out)\n", 245 | " aggr_out = F.normalize(aggr_out) # TODO\n", 246 | "\n", 247 | " ############################################################################\n", 248 | "\n", 249 | " return aggr_out\n", 250 | "\n", 251 | "\n", 252 | "class GAT(pyg_nn.MessagePassing):\n", 253 | " # Please run code with num_heads=1. \n", 254 | " def __init__(self, in_channels, out_channels, num_heads=1, concat=True,\n", 255 | " dropout=0, bias=True, **kwargs):\n", 256 | " super(GAT, self).__init__(aggr='add', **kwargs)\n", 257 | "\n", 258 | " self.in_channels = in_channels\n", 259 | " self.out_channels = out_channels\n", 260 | " self.heads = num_heads\n", 261 | " self.concat = concat \n", 262 | " self.dropout = dropout\n", 263 | "\n", 264 | " ############################################################################\n", 265 | " # TODO: Your code here!\n", 266 | " # Use nn.Linear the layers needed for the forward function. \n", 267 | " # Remember that the shape of the output depends on the number of heads and out_channels.\n", 268 | " # Our implementation is ~1 line, but don't worry if you deviate from this.\n", 269 | "\n", 270 | " self.lin = nn.Linear(in_channels, self.heads*out_channels) # TODO\n", 271 | "\n", 272 | " ############################################################################\n", 273 | "\n", 274 | " ############################################################################\n", 275 | " # TODO: Your code here!\n", 276 | " # The attention mechanism is a single feed-forward neural network parametrized\n", 277 | " # by weight vector self.att. Define self.att using nn.Parameter needed for the attention\n", 278 | " # mechanism here. Remember to consider number of heads and out_channels for dimension!\n", 279 | " # Also remember that that the attention mechanism is applied to the concatenation\n", 280 | " # of node feaures of two nodes for dimension.\n", 281 | " # Our implementation is ~1 line, but don't worry if you deviate from this.\n", 282 | "\n", 283 | " self.att = nn.Parameter(torch.Tensor(2*out_channels, 1))\n", 284 | "\n", 285 | " ############################################################################\n", 286 | "\n", 287 | " if bias and concat:\n", 288 | " self.bias = nn.Parameter(torch.Tensor(self.heads * out_channels))\n", 289 | " elif bias and not concat:\n", 290 | " self.bias = nn.Parameter(torch.Tensor(out_channels))\n", 291 | " else:\n", 292 | " self.register_parameter('bias', None)\n", 293 | "\n", 294 | " nn.init.xavier_uniform_(self.att)\n", 295 | " nn.init.zeros_(self.bias)\n", 296 | "\n", 297 | " ############################################################################\n", 298 | "\n", 299 | " def forward(self, x, edge_index, size=None):\n", 300 | " # x has shape [N, in_channels]\n", 301 | " # edge_index has shape [2, E]\n", 302 | " \n", 303 | " ############################################################################\n", 304 | " # TODO: Your code here!\n", 305 | " # Apply your linear transformation to the node feature matrix x before starting\n", 306 | " # to propagate messages.\n", 307 | " # Our implementation is ~1 line, but don't worry if you deviate from this.\n", 308 | " \n", 309 | " x = self.lin(x) # TODO\n", 310 | " ############################################################################\n", 311 | "\n", 312 | " # Start propagating messages.\n", 313 | " return self.propagate(edge_index, size=size, x=x)\n", 314 | "\n", 315 | " def message(self, edge_index_i, x_i, x_j, size_i):\n", 316 | " # Constructs messages to node i for each edge (j, i).\n", 317 | " # edge_index_i has shape [E]\n", 318 | " \n", 319 | " ############################################################################\n", 320 | " # TODO: Your code here! Compute the attention coefficients alpha as described\n", 321 | " # in equation (7). Remember to be careful of the number of heads with dimension!\n", 322 | " # HINT: torch_geometric.utils.softmax may help to calculate softmax for neighbors of i. \n", 323 | " # https://pytorch-geometric.readthedocs.io/en/latest/modules/utils.html#torch_geometric.utils.softmax\n", 324 | " # Our implementation is ~5 lines, but don't worry if you deviate from this.\n", 325 | " \n", 326 | " x_i = x_i.view(-1, self.heads, self.out_channels)\n", 327 | " x_j = x_j.view(-1, self.heads, self.out_channels)\n", 328 | " \n", 329 | " e_ij = torch.cat([x_i, x_j], dim = 2)\n", 330 | " e_ij = torch.einsum(\"abc,cd->abd\", (e_ij, self.att))\n", 331 | " \n", 332 | " m = nn.LeakyReLU(0.2)\n", 333 | " e_ij = m(e_ij)\n", 334 | "\n", 335 | " alpha = pyg_utils.softmax(e_ij, edge_index_i) # TODO\n", 336 | "\n", 337 | " ############################################################################\n", 338 | "\n", 339 | " alpha = F.dropout(alpha, p=self.dropout, training=self.training)\n", 340 | "\n", 341 | " return x_j * alpha.view(-1, self.heads, 1)\n", 342 | " \n", 343 | " def update(self, aggr_out):\n", 344 | " # Updates node embedings.\n", 345 | " if self.concat is True:\n", 346 | " aggr_out = aggr_out.view(-1, self.heads * self.out_channels)\n", 347 | " else:\n", 348 | " aggr_out = aggr_out.mean(dim=1)\n", 349 | "\n", 350 | " if self.bias is not None:\n", 351 | " aggr_out = aggr_out + self.bias\n", 352 | " return aggr_out\n" 353 | ], 354 | "execution_count": 0, 355 | "outputs": [] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "metadata": { 360 | "id": "ucAnFus2hqfK", 361 | "colab_type": "code", 362 | "colab": {} 363 | }, 364 | "source": [ 365 | "from google.colab import drive\n", 366 | "drive.mount('/content/drive')" 367 | ], 368 | "execution_count": 0, 369 | "outputs": [] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "metadata": { 374 | "id": "l0vXQNT5K9ks", 375 | "colab_type": "code", 376 | "colab": {} 377 | }, 378 | "source": [ 379 | "def data_obj():\n", 380 | " edges = open(\"edges-100k.txt\", 'r').readlines()\n", 381 | " labels = open(\"labels_final.txt\", 'r').readlines()\n", 382 | " source_nodes = []\n", 383 | " target_nodes = []\n", 384 | "\n", 385 | " for line in edges:\n", 386 | " x = line.split()\n", 387 | " source_nodes.append(int(x[0]))\n", 388 | " target_nodes.append(int(x[1]))\n", 389 | "\n", 390 | " labels = [int(line) for line in labels]\n", 391 | "\n", 392 | " features = [[1]*NUM_FEATURES for i in range(len(labels))]\n", 393 | " x = torch.tensor(features, dtype=torch.float)\n", 394 | "\n", 395 | " y = torch.LongTensor(labels) #dtype=torch.long\n", 396 | "\n", 397 | " edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.long)\n", 398 | "\n", 399 | " data = Data(x=x, edge_index=edge_index, y=y, batch=torch.tensor([i for i in range(len(labels))])) # num_classes = NUM_LABELS\n", 400 | " return data" 401 | ], 402 | "execution_count": 0, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "metadata": { 408 | "id": "ogDp4jyLqfd2", 409 | "colab_type": "code", 410 | "colab": {} 411 | }, 412 | "source": [ 413 | "import time\n", 414 | "\n", 415 | "import networkx as nx\n", 416 | "import numpy as np\n", 417 | "import torch\n", 418 | "import torch.optim as optim\n", 419 | "\n", 420 | "from torch_geometric.datasets import Planetoid\n", 421 | "from torch_geometric.data import DataLoader\n", 422 | "\n", 423 | "import torch_geometric.nn as pyg_nn\n", 424 | "\n", 425 | "NUM_FEATURES = 1433\n", 426 | "NUM_LABELS = 21\n", 427 | "\n", 428 | "GCN_acc = []\n", 429 | "GraphSage_acc = []\n", 430 | "GAT_acc = []\n", 431 | "\n", 432 | "def train(dataset, task, args):\n", 433 | "\n", 434 | " # build model\n", 435 | " model = GNNStack(NUM_FEATURES, args.hidden_dim, NUM_LABELS, args, task=task)\n", 436 | " scheduler, opt = build_optimizer(args, model.parameters())\n", 437 | "\n", 438 | " # train\n", 439 | " for epoch in range(args.epochs):\n", 440 | " total_loss = 0\n", 441 | " model.train()\n", 442 | " opt.zero_grad()\n", 443 | " pred = model(dataset)\n", 444 | " label = dataset.y\n", 445 | "\n", 446 | " loss = model.loss(pred, label)\n", 447 | " loss.backward()\n", 448 | " opt.step()\n", 449 | " total_loss += loss.item()\n", 450 | " total_loss /= len(dataset)\n", 451 | "\n", 452 | " if epoch % 5 == 0:\n", 453 | " test_acc = test(dataset, model, args)\n", 454 | " # print(test_acc, ' test')\n", 455 | "\n", 456 | "def test(test_dataset, model, args):\n", 457 | " model.eval()\n", 458 | "\n", 459 | " correct = 0\n", 460 | " with torch.no_grad():\n", 461 | " # max(dim=1) returns values, indices tuple; only need indices\n", 462 | " pred = model(test_dataset).max(dim=1)[1]\n", 463 | " label = test_dataset.y\n", 464 | "\n", 465 | " if args.model_type == 'GCN': \n", 466 | " f = open(\"GCN_pred.txt\", 'w')\n", 467 | " f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n", 468 | " f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n", 469 | " f.close()\n", 470 | " elif args.model_type == 'GraphSage':\n", 471 | " f = open(\"GraphSage_pred.txt\", 'w')\n", 472 | " f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n", 473 | " f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n", 474 | " f.close()\n", 475 | " elif args.model_type == 'GAT':\n", 476 | " f = open(\"GAT_pred.txt\", 'w')\n", 477 | " f.write(\"pred: \" + str(pred.tolist()).strip(\"[]\") + \"\\n\")\n", 478 | " f.write(\"label: \" + str(label.tolist()).strip(\"[]\") + \"\\n\")\n", 479 | " f.close()\n", 480 | "\n", 481 | " correct += pred.eq(label).sum().item()\n", 482 | " total = len(label)\n", 483 | " return correct / total\n", 484 | "\n", 485 | "class objectview(object):\n", 486 | " def __init__(self, d):\n", 487 | " self.__dict__ = d\n", 488 | "\n", 489 | "def main():\n", 490 | " for args in [\n", 491 | " {'model_type': 'GCN', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n", 492 | " {'model_type': 'GraphSage', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n", 493 | " {'model_type': 'GAT', 'num_layers': 2, 'batch_size': 32, 'hidden_dim': 32, 'dropout': 0.5, 'epochs': 200, 'opt': 'adam', 'opt_scheduler': 'none', 'opt_restart': 0, 'weight_decay': 5e-3, 'lr': 0.01},\n", 494 | " ]:\n", 495 | " args = objectview(args)\n", 496 | " task = 'node'\n", 497 | " dataset = data_obj()\n", 498 | " train(dataset, task, args)\n", 499 | "\n", 500 | "if __name__ == '__main__':\n", 501 | " main()\n" 502 | ], 503 | "execution_count": 0, 504 | "outputs": [] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "metadata": { 509 | "id": "kAvQJiLukFMk", 510 | "colab_type": "code", 511 | "outputId": "903aadd6-5d91-4d0f-9d27-e0fa880df361", 512 | "colab": { 513 | "base_uri": "https://localhost:8080/", 514 | "height": 265 515 | } 516 | }, 517 | "source": [ 518 | "from matplotlib import pyplot as plt\n", 519 | "\n", 520 | "plt.plot(GCN_acc ,label = \"GCN_acc\")\n", 521 | "plt.plot(GraphSage_acc ,label = \"GraphSage_acc\")\n", 522 | "plt.plot(GAT_acc ,label = \"GAT_acc\")\n", 523 | "plt.legend()\n", 524 | "#plt.show()\n", 525 | "plt.savefig(\"output.png\")" 526 | ], 527 | "execution_count": 0, 528 | "outputs": [ 529 | { 530 | "output_type": "display_data", 531 | "data": { 532 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD4CAYAAADlwTGnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0\ndHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3de5xVdb3/8ddn7op4QajjjwEHkwd3\nBmTATKNEKUwDTwePiv0O8dD4QUIWx6OoxVGLTlS/OtWDn0GlWCcEtSwohBRRj5bJoMjNSC6jDnFg\nBGUGmRnm8vn9sddsFuNc9gwDe+D7fj4e+zF7fddlf/dS1nuv68fcHRERCU9GujsgIiLpoQAQEQmU\nAkBEJFAKABGRQCkAREQClZXuDrRF9+7dvaCgIN3dEBE5qaxbt+4dd+/RuP2kCoCCggKKi4vT3Q0R\nkZOKmb3ZVLsOAYmIBEoBICISKAWAiEigFAAiIoFSAIiIBEoBICISKAWAiEigTqr7ADrSocO1vLnv\nECXvvA9bV3DOe5ubnK4e59mMt6ng8AnuoYjIEV8c+x9c0GtQhy4ziAB4cds7bNx1gJJ33mfnO+9T\nsu999pRXJ8cX595Ldyun3u0D8z7d5TQe+XB3AEy1E0QkTa7e96YCoD0efGEnq/+6l3O75FDQvQuX\nXdiDPt1Pp6B7Fwq6nc65D1bCx75KxpX3HjVfvdfzwLJ/oqC+lt9O+C2ZGZnp6L6IyHERRADM/cch\n/CA3kzPzsj84sqYK6msgt+sHRj315lNse28b3/74t7XxF5FTThAB8A9n5TU/sro88Tf3zKOa6+rr\neGD9A1xw1gWMKxh3HHsnIpIeugqouiLxt9EewB/f/CPbD2xneuF0/foXkVOSAqCJPYC6+joeeO0B\nLjz7Qj5V8Kk0dUxE5PhSADSxB7CyZCU7D+xkWuE0MkyrSEROTdq6NQqAuvo6fvLaT+h7Tl/Gnj82\njR0TETm+FABVDYeAEgGwYucKSspL+FLhl/TrX0ROadrCJfcAzqS2vpafvPYT+p3TjzG9x6S3XyIi\nx5kCoOEkcN6Z/GHHH3ir4i2mD5uuX/8icsrTVq66AjJzqM3IZMGGBQzoNoAxvfTrX0ROfSkFgJmN\nM7OtZrbNzGY3MX6amW00s/Vm9oKZDYzax5rZumjcOjMbE5vn2WiZ66PXhzrua7VBdTnkdmX59uW8\nXfE2Xxr2Jcw++EwgEZFTTat3AptZJjAfGAuUAmvNbJm7b4lNttjdfxJNPx74PjAOeAf4rLv/3cwG\nA6uAnrH5bnL34o75Ku1UXUFN7hks2LCAQecO4hP5n0hrd0RETpRU9gBGAdvcfYe7HwaWABPiE7h7\neWywC+BR+6vu/veofTNwmpnlHnu3O1B1BctOz2PXwV369S8iQUklAHoCb8eGSzn6VzwAZnarmW0H\nvgN8uYnl/BPwirtXx9oeig7/fN2a2fKa2VQzKzaz4rKyshS620bVFfwyu5bB5w7m4z0/3vHLFxHp\npDrsJLC7z3f3jwB3Al+LjzOzQcA84P/Emm9y9yHAx6PX/25muQvdvcjdi3r06NFR3T2iupw9Vsew\nDw3Tr38RCUoqAbAL6BUbzo/amrMEuLZhwMzygSeAf3H37Q3t7r4r+lsBLCZxqOmE86oDVFLPaVmn\npePjRUTSJpUAWAv0NbM+ZpYD3AAsi09gZn1jg1cDb0TtZwN/AGa7+4ux6bPMrHv0Phu4Bth0LF+k\nvWqqD1IHCgARCU6rVwG5e62ZzSBxBU8m8KC7bzaz+4Fid18GzDCzK4Ea4F1gcjT7DOBCYI6ZzYna\nPgW8D6yKNv6ZwNPATzvwe6XGncqag8AZCgARCU5KBWHcfQWwolHbnNj725qZ75vAN5tZ7IgU+3j8\n1FZT6XWA9gBEJDxh3wlcXc6hjMSJXwWAiIQm8ACooDK68icvq4WykSIip6DAA6CcyozEKtAegIiE\nJvAAOLIHoAAQkdAoABQAIhKosAOg6sghoNOzTk9zZ0RETqywAyC+B5CtPQARCUvgAVBOpS4DFZFA\nBR4AFVRmZgOQl6nLQEUkLIEHQDmVWTnkZuaSmZGZ7t6IiJxQgQdABZVZuTr8IyJBUgBkZikARCRI\nCgAFgIgEKvAAKKcyI1MBICJBCjsAqsqpNFMAiEiQwg6A6goqMxQAIhKmlALAzMaZ2VYz22Zms5sY\nP83MNprZejN7wcwGxsbdFc231cw+neoyjzv3RADgCgARCVKrAWBmmcB84CpgIHBjfAMfWezuQ9x9\nGPAd4PvRvANJ1BAeBIwD/p+ZZaa4zOOrthrqa6h0FYQXkTClsgcwCtjm7jvc/TCwBJgQn8Ddy2OD\nXQCP3k8Alrh7tbvvBLZFy2t1mcdddaLLldQpAEQkSKnUBO4JvB0bLgUubjyRmd0KzAJygDGxeV9q\nNG/P6H2ryzyuqisAqKyv0YPgRCRIHXYS2N3nu/tHgDuBr3XUcs1sqpkVm1lxWVlZRy0WqsupAw67\n9gBEJEypBMAuoFdsOD9qa84S4NpW5k15me6+0N2L3L2oR48eKXQ3RbFHQasWgIiEKJUAWAv0NbM+\nZpZD4qTusvgEZtY3Nng18Eb0fhlwg5nlmlkfoC/wcirLPO6iS0BBj4IWkTC1eg7A3WvNbAawCsgE\nHnT3zWZ2P1Ds7suAGWZ2JVADvAtMjubdbGaPAluAWuBWd68DaGqZHf/1WlBVTqWpILyIhCuVk8C4\n+wpgRaO2ObH3t7Uw71xgbirLPKG0ByAigQv3TuDqchWEF5GgBRwAFRzKygEUACISpoADoJzKnMTV\nP3lZKgcpIuEJOAAqqIxuANMegIiEKPAASPzyVwCISIjCDoCsXEABICJhCjgAyqmMTgLrTmARCVG4\nAVBVTmVWFlmWRXZmdrp7IyJywoUbANUVVGaoILyIhCulO4FPOQ3VwDIyOM0UACISpjADoKEamOkE\nsIiEK8xDQA3VwBQAIhKwQAMgqgamgvAiErBAAyDaA1BBeBEJWKABEO0BeK0CQESCFXYA1NcoAEQk\nWGEGQFV0CEgBICIBSykAzGycmW01s21mNruJ8bPMbIuZbTCz1WZ2ftR+uZmtj72qzOzaaNwiM9sZ\nGzesY79aCxr2AOqqFQAiEqxW7wMws0xgPjAWKAXWmtkyd98Sm+xVoMjdD5nZdOA7wPXuvgYYFi2n\nG7AN+GNsvn9z98c75qu0QXU5DlQpAEQkYKnsAYwCtrn7Dnc/DCwBJsQncPc17n4oGnwJyG9iOROB\nJ2PTpU91BVVZObguAxWRgKUSAD2Bt2PDpVFbc24Gnmyi/QbgkUZtc6PDRj8ws9ymFmZmU82s2MyK\ny8rKUuhuCqrLqcw9E9CNYCISrg49CWxmnweKgO82aj8PGAKsijXfBfQHRgLdgDubWqa7L3T3Incv\n6tGjR8d0tLqCyrwzAAWAiIQrlQDYBfSKDedHbUcxsyuBe4Dx7l7daPQ/A0+4e01Dg7vv9oRq4CES\nh5pOjOoKKnO6AHBatgJARMKUSgCsBfqaWR8zyyFxKGdZfAIzGw4sILHx39vEMm6k0eGfaK8AMzPg\nWmBT27vfTtUVyYLwKgYjIqFq9Sogd681sxkkDt9kAg+6+2Yzux8odvdlJA75nAE8ltie85a7jwcw\nswISexDPNVr0r8ysB2DAemBah3yjVFSXU9n1HKjVISARCVdKj4N29xXAikZtc2Lvr2xh3hKaOGns\n7mNS7mVHqyqnstt5CgARCVqYdwJXVyTrASsARCRU4QVAQzWwqA6wAkBEQhVeAETVwA5lJo5+KQBE\nJFThBUBDLYCMTADysvLS2RsRkbQJMACiB8FlZGAYeZkKABEJU4ABcKQecF5WHtFlqyIiwQkwABrq\nAev4v4iELdwAMNUDFpGwhRcADdXAvE4BICJBCy8AkvWA6/QcIBEJWoAB0FAP+LD2AEQkaAEGQAVk\n5qoesIgEL8AAKIfcrlTWVioARCRoAQZAxZEAUDEYEQlY2AGgPQARCViQAeAKABGR1ALAzMaZ2VYz\n22Zms5sYP8vMtpjZBjNbbWbnx8bVmdn66LUs1t7HzP4SLXNpVG7y+Ksupya3K3W6D0BEAtdqAJhZ\nJjAfuAoYCNxoZgMbTfYqUOTuQ4HHge/ExlW6+7DoNT7WPg/4gbtfCLwL3HwM3yN1VeVU5kYF4RUA\nIhKwVPYARgHb3H2Hux8GlgAT4hO4+xp3PxQNvgTkt7TAqBD8GBJhAfAwicLwx191BZXRyV8FgIiE\nLJUA6Am8HRsupYkavzE3A0/GhvPMrNjMXjKzho38ucB77l7b2jLNbGo0f3FZWVkK3W1BQzWw7MQj\noBUAIhKylIrCp8rMPg8UAZ+INZ/v7rvM7ALgGTPbCBxIdZnuvhBYCFBUVOTH1MGoGlhldi6gABCR\nsKWyB7AL6BUbzo/ajmJmVwL3AOPdvbqh3d13RX93AM8Cw4F9wNlm1hBATS6zwzU8BkIF4UVEUgqA\ntUDf6KqdHOAGYFl8AjMbDiwgsfHfG2s/x8xyo/fdgUuBLe7uwBpgYjTpZOB3x/plWtXwILhMBYCI\nSKsBEB2nnwGsAl4HHnX3zWZ2v5k1XNXzXeAM4LFGl3sOAIrN7DUSG/xvu/uWaNydwCwz20binMDP\nO+xbNadhDyAzUQ9YASAiIUvpHIC7rwBWNGqbE3t/ZTPz/QkY0sy4HSSuMDpxkvWAEwGgx0GLSMjC\nuhM4GQCJOsB6FpCIhCysAKg6UhAedAhIRMIWVgDECsID5GXmpa8vIiJpFlgAHKkHnJuZS2Z0LkBE\nJESBBUCiGtih+sPkZenXv4iELbAAUDUwEZEGgQWAisGIiDRQAIiIBCq8AMg7SwEgIkJwAaBzACIi\nDcIKgCoFgIhIg7ACQOcARESSwgmAqBoYuWcqAERECCkAompg5HalsqZSTwIVkeCFEwDRYyDqcrpw\nuP6w9gBEJHgBBUD0ILjoEdAKABEJXUABoHrAIiJxKQWAmY0zs61mts3MZjcxfpaZbTGzDWa22szO\nj9qHmdmfzWxzNO762DyLzGxnVEJyvZkN67iv1YTkHkAUACoGIyKBazUAzCwTmA9cBQwEbjSzgY0m\nexUocvehwOPAd6L2Q8C/uPsgYBzwn2Z2dmy+f3P3YdFr/TF+l5YlC8InqmBqD0BEQpfKHsAoYJu7\n73D3w8ASYEJ8Andf4+6HosGXgPyo/W/u/kb0/u/AXqBHR3W+TRqqgWWoILyICKQWAD2Bt2PDpVFb\nc24GnmzcaGajgBxge6x5bnRo6AdmltvUwsxsqpkVm1lxWVlZCt1tRrIesPYARESgg08Cm9nngSLg\nu43azwN+CUxx9/qo+S6gPzAS6Abc2dQy3X2huxe5e1GPHsew81CtesAiInGpBMAuoFdsOD9qO4qZ\nXQncA4x39+pY+5nAH4B73P2lhnZ33+0J1cBDJA41HT9RNbBKrwUUACIiqQTAWqCvmfUxsxzgBmBZ\nfAIzGw4sILHx3xtrzwGeAH7h7o83mue86K8B1wKbjuWLtCr2JFBQAIiIZLU2gbvXmtkMYBWQCTzo\n7pvN7H6g2N2XkTjkcwbwWGJ7zlvuPh74Z2A0cK6ZfSFa5BeiK35+ZWY9AAPWA9M69qs1EnsQHCgA\nRERaDQAAd18BrGjUNif2/spm5vsv4L+aGTcm9W52gOoKyDszGQB6FpCIhC6gO4GPPAk0y7LIzsxO\nd49ERNIqoABQMRgRkbhwAkDVwEREjhJOADScBK6p1HOAREQIJQBUDUxE5APCCIB4NbDaSvIy89Ld\nIxGRtAsjAKLHQOgcgIjIEYEEQOJBcOSeyaHaQwoAERGCCYBoDyC6EUwngUVEggmAhj0AHQISEWmg\nABARCVQYARBVA/OcM6iqrVIAiIgQSgBEewBVWXk4rgAQESGYAIiqgWWqHrCISINAAiCqBkYdoEdB\ni4hAMAFQnnwOEGgPQEQEUgwAMxtnZlvNbJuZzW5i/Cwz22JmG8xstZmdHxs32czeiF6TY+0jzGxj\ntMwfRaUhjw9VAxMR+YBWA8DMMoH5wFXAQOBGMxvYaLJXgSJ3Hwo8Dnwnmrcb8O/AxSSKvv+7mZ0T\nzfMA8EWgb/Qad8zfpjmNqoEpAEREUtsDGAVsc/cd7n4YWAJMiE/g7mvc/VA0+BKQH73/NPCUu+93\n93eBp4BxUUH4M939JXd34BckCsMfHyOmwKW3KQBERGJSCYCewNux4dKorTk3A0+2Mm/P6H2ryzSz\nqWZWbGbFZWVlKXS3Cf3GweB/UgCIiMR06ElgM/s8UAR8t6OW6e4L3b3I3Yt69OhxTMtKBoCeBSQi\nklIA7AJ6xYbzo7ajmNmVwD3AeHevbmXeXRw5TNTsMjua9gBERI5IJQDWAn3NrI+Z5QA3AMviE5jZ\ncGABiY3/3tioVcCnzOyc6OTvp4BV7r4bKDezj0ZX//wL8LsO+D4tUgCIiByR1doE7l5rZjNIbMwz\ngQfdfbOZ3Q8Uu/syEod8zgAei67mfMvdx7v7fjP7BokQAbjf3fdH778ELAJOI3HO4EmOs8raSgxT\nRTAREVIIAAB3XwGsaNQ2J/b+yhbmfRB4sIn2YmBwyj3tAJW1leRl5XE8bzkQETlZhHEncESPghYR\nOUIBICISKAWAiEigggsAPQlURCQhpZPApwrtAYg0r6amhtLSUqqqqtLdFWmnvLw88vPzyc7OTmn6\n4ALgrNPPSnc3RDql0tJSunbtSkFBga6UOwm5O/v27aO0tJQ+ffqkNE9wh4DysnQPgEhTqqqqOPfc\nc7XxP0mZGeeee26b9uDCCoAaHQISaYk2/ie3tv73CysAdA5ARCRJASAiEqhgAqCmroZar1UAiHRy\ne/bsYdKkSVxwwQWMGDGCSy65hCeeeAKAl19+mdGjR9OvXz+GDx/OLbfcwqFDh1i0aBEZGRls2LAh\nuZzBgwdTUlKSpm9xcggmAA7VJgqWKQBEOi9359prr2X06NHs2LGDdevWsWTJEkpLS9mzZw/XXXcd\n8+bNY+vWrbz66quMGzeOiooKAPLz85k7d26av8HJJZjLQFUMRiR19y3fzJa/l3foMgf+rzP5988O\nanGaZ555hpycHKZNm5ZsO//885k5cyZz5sxh8uTJXHLJJclxEydOTL6/5ppreP7559m6dSv9+vVr\ntT/Tp09n7dq1VFZWMnHiRO677z4A1q5dy2233cb7779Pbm4uq1ev5vTTT+fOO+9k5cqVZGRk8MUv\nfpGZM2e2dRV0OuEFgPYARDqtzZs3c9FFFzU5btOmTUyePLnZeTMyMrjjjjv41re+xcMPP9zqZ82d\nO5du3bpRV1fHFVdcwYYNG+jfvz/XX389S5cuZeTIkZSXl3PaaaexcOFCSkpKWL9+PVlZWezfv7/V\n5Z8MFAAi8gGt/VI/UW699VZeeOEFcnJy6NWrV6vTT5o0iblz57Jz585Wp3300UdZuHAhtbW17N69\nmy1btmBmnHfeeYwcORKAM888E4Cnn36aadOmkZWV2GR269btGL5V5xHMOQAFgEjnN2jQIF555ZXk\n8Pz581m9ejVlZWUMGjSIdevWtTh/VlYW//qv/8q8efNanG7nzp1873vfY/Xq1WzYsIGrr746yEdg\npBQAZjbOzLaa2TYzm93E+NFm9oqZ1ZrZxFj75Wa2PvaqMrNro3GLzGxnbNywjvtaH9QQAHoYnEjn\nNWbMGKqqqnjggQeSbYcOJS7gmDFjBg8//DB/+ctfkuN+85vfsGfPnqOW8YUvfIGnn36asrKyZj+n\nvLycLl26cNZZZ7Fnzx6efDJRkLBfv37s3r2btWsTRQwrKiqora1l7NixLFiwgNraWoBT5hBQqwFg\nZpnAfOAqYCBwo5kNbDTZW8AXgMXxRndf4+7D3H0YMAY4BPwxNsm/NYx39/Xt/xqt0x6ASOdnZvz2\nt7/lueeeo0+fPowaNYrJkyczb948PvzhD7NkyRJuv/12+vXrx4ABA1i1ahVdu3Y9ahk5OTl8+ctf\nZu/evc18ChQWFjJ8+HD69+/PpEmTuPTSS5PzLl26lJkzZ1JYWMjYsWOpqqrilltuoXfv3gwdOpTC\nwkIWL17c7LJPJubuLU9gdglwr7t/Ohq+C8Dd/6OJaRcBv3f3x5sYNxX4hLvf1Nq0zSkqKvLi4uJU\nJz/Ksu3LuOeFe/jDP/6B3mf2btcyRE5lr7/+OgMGDEh3N+QYNfXf0czWuXtR42lTOQTUE3g7Nlwa\ntbXVDcAjjdrmmtkGM/uBmeW2Y5kpq6zRHoCISNwJuQrIzM4DhgCrYs13Af8D5AALgTuB+5uYdyow\nFaB37/b/ctchIJEwXXzxxVRXVx/V9stf/pIhQ4akqUedRyoBsAuIX3+VH7W1xT8DT7h7TUODu++O\n3lab2UPA7U3N6O4LSQQERUVFLR+vaoECQCRM8ZPGcrRUDgGtBfqaWR8zyyFxKGdZGz/nRhod/on2\nCrDE80uvBTa1cZltUllbSU5GDpkZmcfzY0REThqtBoC71wIzSBy+eR141N03m9n9ZjYewMxGmlkp\ncB2wwMw2N8xvZgUk9iCea7ToX5nZRmAj0B345rF/neYdqj2kx0CIiMSkdA7A3VcAKxq1zYm9X0vi\n0FBT85bQxEljdx/Tlo4eq6raKh3+ERGJCepOYAWAiMgRCgAR6VRaqgdwrAoKCnjnnXea/MxrrrmG\nwsJCBg4cyGc+85kO+bzOLqiHwSkARFL05Gz4n40du8x/GAJXfbvFSRrqAUyePDl5t+2bb77JsmVH\nX3dSW1ubfDBbR5gzZw5jx47ltttuAziqsMypTHsAItJptFQPYNGiRYwfP54xY8ZwxRVXcPDgQa64\n4gouuugihgwZwu9+9zsASkpK6N+/PzfddBMDBgxg4sSJyecJAfz4xz9OzvPXv/4VgN27d5Off+Q0\n5tChQwGa/QyAb3zjG/Tr14/LLruMG2+8ke9973sAbN++nXHjxjFixAg+/vGPJz+jKcuXL+fiiy9m\n+PDhXHnllcnnGh08eJApU6YwZMgQhg4dyq9//WsAVq5cyUUXXURhYSFXXHHFMa1rIJG4J8trxIgR\n3l7jnxjvX13z1XbPL3Kq27JlS7q74D/84Q/9K1/5SpPjHnroIe/Zs6fv27fP3d1ramr8wIED7u5e\nVlbmH/nIR7y+vt537tzpgL/wwgvu7j5lyhT/7ne/6+7u559/vv/oRz9yd/f58+f7zTff7O7uK1eu\n9LPOOss/+clP+je/+U3ftWtXi5/x8ssve2FhoVdWVnp5eblfeOGFyc8YM2aM/+1vf3N395deeskv\nv/zyZr/v/v37vb6+3t3df/rTn/qsWbPc3f2OO+7w22677ajp9u7d6/n5+b5jxw539+R6aKyp/45A\nsTexTdUhIBHptOL1AG699VbGjh2bfBa/u3P33Xfz/PPPk5GRwa5du5K/oHv16pV8wNvnP/95fvSj\nH3H77Yl7TT/3uc8BMGLECH7zm98A8OlPf5odO3awcuVKnnzySYYPH86mTZs4++yzm/yMF198kQkT\nJpCXl0deXh6f/exngcQv9z/96U9cd911ye/Q+C7kuNLSUq6//np2797N4cOH6dOnD5CoP7BkyZLk\ndOeccw7Lly9n9OjRyWk6oiaBAkBEOo1BgwYlD3dAoh7AO++8Q1FR4jlmXbp0SY771a9+RVlZGevW\nrSM7O5uCgoLkM/0T95ceER/OzU08diwzMzP5eGdIbFAnTZrEpEmTkuUlKyoqmv2MptTX13P22Wez\nfn1qDzeeOXMms2bNYvz48Tz77LPce++9Kc3XUYI6B6BaACKdW0v1ABo7cOAAH/rQh8jOzmbNmjW8\n+eabyXFvvfUWf/7znwFYvHgxl112WYuf+8wzzyQ/p6Kigu3bt9O7d+9mP+PSSy9l+fLlVFVVcfDg\nQX7/+98DiQpiffr04bHHHgMSeymvvfZas5974MABevZM3CYVL2M5duxY5s+fnxx+9913+ehHP8rz\nzz+frHbWETUJggiAuvo6quuqycvKS3dXRKQFLdUDaOymm26iuLiYIUOG8Itf/IL+/fsnx/Xr14/5\n8+czYMAA3n33XaZPn97i565bt46ioiKGDh3KJZdcwi233MLIkSOb/YyRI0cyfvx4hg4dylVXXcWQ\nIUM466yzgMSeyc9//nMKCwsZNGjQUSeOG7v33nu57rrrGDFiBN27d0+2f+1rX+Pdd99l8ODBFBYW\nsmbNGnr06MHChQv53Oc+R2FhIddff32b1m1TWq0H0Jm0tx7A+zXv89HFH2XWiFlMGTzlOPRM5OR3\nqtQDKCkp4ZprrmHTpuP6eDEOHjzIGWecwaFDhxg9ejQLFy5stqD9idSWegBBnAPQk0BFpKNNnTqV\nLVu2UFVVxeTJkzvFxr+twggAFYMRCUZBQcFx//UPtKks5Ny5c5PnBRpcd9113HPPPR3drTYJIgAO\n1SZO7igARCQd7rnnnrRv7JsSxElgHQISEfkgBYCISKDCCgAVhBERSQorALQHICKSlFIAmNk4M9tq\nZtvMbHYT40eb2StmVmtmExuNqzOz9dFrWay9j5n9JVrm0qje8HHREAC6E1ik82utHsBXvvIVevbs\nSX19PQAPPfQQw4YNY9iwYeTk5DBkyBCGDRvG7Nkf2FRJI61eBWRmmcB8YCxQCqw1s2XuviU22VvA\nF4Dbm1hEpbsPa6J9HvADd19iZj8BbgYeaGK6Y6Y9AJG2mffyPP66v/nHGLdH/279uXPUnS1O463U\nA6ivr+eJJ56gV69ePPfcc1x++eVMmTKFKVMSN3gWFBSwZs2ao+6qlealsgcwCtjm7jvc/TCwBJgQ\nn8DdS9x9A1Cfyoda4slMY4DHo6aHgWtT7nUbKQBETg4t1QMAePbZZxk0aBDTp0/nkUceaddnvPzy\ny1xyySUMHz6cj33sY2zduhWAuro6br/9dgYPHszQoUP58Y9/DMDatWv52Mc+RmFhIaNGjaKiouIY\nv2Xnkcp9AD2Bt2PDpcDFbfiMPDMrBmqBb7v7b4FzgffcveFRfKU0UTgewMymAlMBevfu3YaPPaKy\ntpJMyyQ7I7td84uEprVf6sfL5s2bW7yj9pFHHuHGG29kwoQJ3H333dTU1JCd3bZ/1/379+e///u/\nycrK4umnn+buu+/m17/+NTG9rzYAAAcTSURBVAsXLqSkpIT169eTlZXF/v37OXz4MNdffz1Lly5l\n5MiRlJeXc9ppp84PyRNxI9j57r7LzC4AnjGzjcCBVGd294XAQkg8C6g9HWh4FHTjR8SKSOcWrwfw\n4osvsmLFCr7//e/TtWtXLr74YlatWsU111zTpmUeOHCAyZMn88Ybb2Bm1NTUAIln8E+bNi1ZarJb\nt25s3LiR8847j5EjRwKJp32eSlI5BLQL6BUbzo/aUuLuu6K/O4BngeHAPuBsM2sIoDYts61UC0Dk\n5DBo0CBeeeWV5PD8+fNZvXo1ZWVlrFq1ivfee48hQ4ZQUFDACy+80K7DQF//+te5/PLL2bRpU/KR\nzqFKJQDWAn2jq3ZygBuAZa3MA4CZnWNmudH77sClwJaoRNkaoOGKoclA889MPUaVNQoAkZNBS/UA\nHnnkEX72s59RUlJCSUkJO3fu5Kmnnmq2XkBz4s/gX7RoUbJ97NixLFiwIFkkZv/+/fTr14/du3ez\ndu1aIFErIF5E5mTXagBEx+lnAKuA14FH3X2zmd1vZuMBzGykmZUC1wELzGxzNPsAoNjMXiOxwf92\n7OqhO4FZZraNxDmBn3fkF4vTHoDIyaG5egD33XcfK1eu5Oqrr05O26VLFy677DKWL1/eps+44447\nuOuuuxg+fPhRG/NbbrmF3r17M3ToUAoLC1m8eDE5OTksXbqUmTNnUlhYyNixY0+pPYYg6gH8bOPP\nqDhcwVdHfPU49Erk1HCq1AMIneoBNHLLkFvS3QURkU4niAAQkfA89NBD/PCHPzyq7dJLLz2q1m7o\nFAAikuTup8zl0vE7hEPR1kP6QTwMTkRal5eXx759+9q8EZHOwd3Zt28feXl5Kc+jPQARASA/P5/S\n0lLKysrS3RVpp7y8PPLz81OeXgEgIgBkZ2fTp0+fdHdDTiAdAhIRCZQCQEQkUAoAEZFAnVR3AptZ\nGfBmO2fvDrzTgd3pSOpb+6hv7aO+tc/J3Lfz3b1H48aTKgCOhZkVN3UrdGegvrWP+tY+6lv7nIp9\n0yEgEZFAKQBERAIVUgAsTHcHWqC+tY/61j7qW/uccn0L5hyAiIgcLaQ9ABERiVEAiIgEKogAMLNx\nZrbVzLaZ2ex09yfOzErMbKOZrTeztpc769i+PGhme81sU6ytm5k9ZWZvRH/P6UR9u9fMdkXrbr2Z\nfSZNfetlZmvMbIuZbTaz26L2tK+7FvqW9nVnZnlm9rKZvRb17b6ovY+Z/SX697o0qkXeWfq2yMx2\nxtbbsBPdt6gfmWb2qpn9Phpu3zpz91P6BWQC24ELgBzgNWBguvsV618J0D3d/Yj6Mhq4CNgUa/sO\nMDt6PxuY14n6di9weydYb+cBF0XvuwJ/AwZ2hnXXQt/Svu4AA86I3mcDfwE+CjwK3BC1/wSY3on6\ntgiY2An+n5sFLAZ+Hw23a52FsAcwCtjm7jvc/TCwBJiQ5j51Su7+PLC/UfME4OHo/cPAtSe0U5Fm\n+tYpuPtud38lel8BvA70pBOsuxb6lnaecDAazI5eDowBHo/a07Xemutb2plZPnA18LNo2GjnOgsh\nAHoCb8eGS+kk/wAiDvzRzNaZ2dR0d6YJH3b33dH7/wE+nM7ONGGGmW2IDhGl5fBUnJkVAMNJ/GLs\nVOuuUd+gE6y76FDGemAv8BSJvfX33L02miRt/14b983dG9bb3Gi9/cDMctPQtf8E7gDqo+Fzaec6\nCyEAOrvL3P0i4CrgVjMbne4ONccT+5ed4ldQ5AHgI8AwYDfwf9PZGTM7A/g18BV3L4+PS/e6a6Jv\nnWLduXuduw8D8knsrfdPRz+a0rhvZjYYuItEH0cC3YA7T2SfzOwaYK+7r+uI5YUQALuAXrHh/Kit\nU3D3XdHfvcATJP4RdCZ7zOw8gOjv3jT3J8nd90T/SOuBn5LGdWdm2SQ2sL9y999EzZ1i3TXVt860\n7qL+vAesAS4BzjazhmJVaf/3GuvbuOiQmrt7NfAQJ369XQqMN7MSEoezxwA/pJ3rLIQAWAv0jc6S\n5wA3AMvS3CcAzKyLmXVteA98CtjU8lwn3DJgcvR+MvC7NPblKA0b18g/kqZ1Fx2D/Tnwurt/PzYq\n7euuub51hnVnZj3M7Ozo/WnAWBLnKNYAE6PJ0rXemurbX2OBbiSOs5/Q9ebud7l7vrsXkNiWPePu\nN9HedZbus9kn4gV8hsTVD9uBe9Ldn1i/LiBxVdJrwOZ09w14hMThgBoSxxFvJnF8cTXwBvA00K0T\n9e2XwEZgA4mN7Xlp6ttlJA7vbADWR6/PdIZ110Lf0r7ugKHAq1EfNgFzovYLgJeBbcBjQG4n6tsz\n0XrbBPwX0ZVCafr/7pMcuQqoXetMj4IQEQlUCIeARESkCQoAEZFAKQBERAKlABARCZQCQEQkUAoA\nEZFAKQBERAL1/wGqq0GHDF+q3gAAAABJRU5ErkJggg==\n", 533 | "text/plain": [ 534 | "
" 535 | ] 536 | }, 537 | "metadata": { 538 | "tags": [] 539 | } 540 | } 541 | ] 542 | } 543 | ] 544 | } --------------------------------------------------------------------------------