├── .gitignore ├── LICENSE ├── README.md ├── asserts └── WechatIMG22352.png ├── code_submission ├── config.py ├── configs │ ├── a.json │ ├── az-cs.json │ ├── b.json │ ├── c.json │ ├── coauthor-cs.json │ ├── d.json │ ├── default.json │ ├── e-d5.json │ ├── e.json │ └── tmp.json ├── meta.encoder ├── meta.model ├── model.py ├── models │ ├── __init__.py │ ├── emb_gcn.py │ ├── focal_loss.py │ ├── gat.py │ ├── gcn.py │ ├── nas_autograph_a.py │ ├── nas_autograph_b.py │ ├── nas_autograph_c.py │ ├── nas_autograph_d.py │ ├── nas_autograph_e.py │ ├── nas_azcs.py │ ├── nas_azpo.py │ ├── nas_citeseer.py │ ├── nas_coauthorcs.py │ ├── nas_coauthorphy.py │ ├── nas_cora.py │ ├── nas_phy10000.py │ ├── nas_pubmed.py │ ├── sage.py │ └── simple_gcn.py ├── preprocessing │ ├── __init__.py │ ├── feat_engineer.py │ ├── graph.py │ └── prepredict.py └── utils │ ├── __init__.py │ ├── callbacks.py │ ├── data.py │ ├── drop_edge.py │ ├── ensemble.py │ ├── logger.py │ ├── timer.py │ └── train.py ├── ingestion ├── common.py ├── dataset.py ├── metadata └── timing.py ├── meta_run.sh ├── run_local.sh ├── run_local_test.py └── scoring ├── graph-score.py ├── metadata └── score.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | ingestion/ingestion.py 3 | data/ 4 | __pycache__ 5 | .DS_Store 6 | *.zip 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![license](https://img.shields.io/badge/license-GPL%203.0-green.svg)](https://github.com/Unkrible/AutoGraph2020/blob/master/LICENSE) 2 | 3 | AutoGraph 4 | ====================================== 5 | 6 | 7 | 8 | ## Contents 9 | 10 | - ingestion/: The code and libraries used on Codalab to run your submmission. 11 | 12 | - scoring/: The code and libraries used on Codalab to score your submmission. 13 | 14 | - code_submission/: An example of code submission you can use as template. 15 | 16 | - data/: Some sample data to test your code before you submit it. 17 | - Meta.model: A decision tree for adaptive configuration of hyperparameters![WechatIMG22352](asserts/WechatIMG22352.png) 18 | - [Extra Dataset used in competition](https://github.com/mecthew/Graph-Dataset) 19 | 20 | 21 | 22 | ## Local development and testing 23 | 1. To make your own submission to AutoGraph challenge, you need to modify the 24 | file `model.py` in `code_submission/`, which implements your algorithm. 25 | 2. Test the algorithm on your local computer using Docker, 26 | in the exact same environment as on the CodaLab challenge platform. Advanced 27 | users can also run local test without Docker, if they install all the required 28 | packages. 29 | 3. If you are new to docker, install docker from https://docs.docker.com/get-started/. 30 | Then, at the shell, run: 31 | ``` 32 | cd path/to/autograph_starting_kit/ 33 | docker run --gpus=0 -it --rm -v "$(pwd):/app/autograph" -w /app/autograph nehzux/kddcup2020:v2 34 | ``` 35 | The option `-v "$(pwd):/app/autograph"` mounts current directory 36 | (`autograph_starting_kit/`) as `/app/autograph`. If you want to mount other 37 | directories on your disk, please replace `$(pwd)` by your own directory. 38 | 39 | The Docker image 40 | ``` 41 | nehzux/kddcup2020:v2 42 | ``` 43 | 44 | 4. You will then be able to run the `ingestion program` (to produce predictions) 45 | and the `scoring program` (to evaluate your predictions) on toy sample data. 46 | In the AutoGraph challenge, both two programs will run in parallel to give 47 | feedback. So we provide a Python script to simulate this behavior. To test locally, run: 48 | ``` 49 | python run_local_test.py 50 | ``` 51 | If the program exits without any errors, you can find the final score from the terminal's stdout of your solution. 52 | Also you can view the score by opening the `scoring_output/scores.txt`. 53 | 54 | The full usage is 55 | ``` 56 | python run_local_test.py --dataset_dir=./data/demo --code_dir=./code_submission 57 | ``` 58 | You can change the argument `dataset_dir` to other datasets (e.g. the two 59 | practice datasets we provide). On the other hand, you can also modify the directory containing your other sample code. 60 | 61 | 5. You can directly use `sh ./meta_run.sh log_folder run_times [dataset1, dataset2, ...]` to run our programs in batch. 62 | 63 | 64 | 65 | ## Contributor 66 | 67 | 68 | 69 | - Zhuoer Xu, NJU, [xuzhuoer.rex@gmail.com](mailto:xuzhuoer.rex@gmail.com) 70 | 71 | - Feng Cheng, NJU, [hazzacheng@gmail.com](mailto:hazzacheng@gmail.com) 72 | - Wenjie Wang, NJU, [wjwangpt@gmail.com](mailto:wjwangpt@gmail.com) 73 | - Mengchuan Qiu, NJU, [mecthew.qiu@gmail.com](mailto:mecthew.qiu@gmail.com) 74 | -------------------------------------------------------------------------------- /asserts/WechatIMG22352.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unkrible/AutoGraph2020/06fee732e392971cb973a54e19649821ae0f7786/asserts/WechatIMG22352.png -------------------------------------------------------------------------------- /code_submission/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from collections import defaultdict, ChainMap 4 | from models import MODEL_LIB, MODEL_PARAMETER_LIB 5 | 6 | 7 | class ModelList: 8 | def __init__(self, names, loop=False): 9 | self.names = names 10 | self.n_models = len(names) 11 | self.index = 0 12 | self.model_info = None 13 | self.loop = loop 14 | 15 | def __iter__(self): 16 | self.index = 0 17 | return self 18 | 19 | def __next__(self): 20 | if not self.loop and self.index >= self.n_models: 21 | raise StopIteration 22 | self.index = self.index % self.n_models 23 | if self.index == 0 and self.model_info is not None: 24 | self._update() 25 | name = self.names[self.index] 26 | self.index = self.index + 1 27 | return name, MODEL_LIB[name] 28 | 29 | def __len__(self): 30 | return len(self.names) 31 | 32 | def _update(self): 33 | model_info = [(ele[1], ele[2]) for ele in self.model_info] 34 | model_metrics = defaultdict(list) 35 | for metric, name in model_info: 36 | model_metrics[name].append(metric) 37 | model_metrics = {name: np.mean(metrics) for name, metrics in model_metrics.items()} 38 | model_metrics = [(name, model_metrics.get(name, 1.0))for name in self.names] 39 | model_metrics = sorted(model_metrics, key=lambda x: x[1], reverse=True) 40 | print("sorted metrics", model_metrics) 41 | self.names = [ele[0] for ele in model_metrics] 42 | 43 | def update(self, model_info): 44 | self.model_info = model_info 45 | 46 | 47 | class Config: 48 | """ 49 | 统一管理全局超参数, 如模型序列, 数据处理方式, batch size等 50 | """ 51 | def __init__(self, filename="", config=None): 52 | self.filename = filename 53 | if config is None: 54 | self.config = defaultdict(lambda: None) 55 | with open(filename, 'r') as f: 56 | self.config = json.load(f) 57 | else: 58 | self.config = config 59 | self.model_list = None 60 | 61 | def __getitem__(self, item): 62 | return self.config[item] 63 | 64 | def __setitem__(self, key, value): 65 | self.config[key] = value 66 | 67 | def __delitem__(self, key): 68 | del self.config[key] 69 | 70 | def __getattr__(self, item): 71 | return self.config.get(item, None) 72 | 73 | def __str__(self): 74 | return str(self.config) 75 | 76 | @property 77 | def loop(self): 78 | return self.config.get('loop', False) 79 | 80 | @property 81 | def model_classes(self): 82 | if self.model_list is None: 83 | self.model_list = ModelList(self.model, self.loop) 84 | return self.model_list 85 | 86 | def model_config(self, name): 87 | config = {} 88 | if "model_config" in self.config: 89 | if name in self.config["model_config"]: 90 | config = self.config["model_config"][name] 91 | if name in MODEL_PARAMETER_LIB: 92 | params = MODEL_PARAMETER_LIB[name] 93 | config = ChainMap(config, {"lr": params[0], "dropout": params[1], "weight_decay": params[2], "hidden": params[3]}) 94 | config = ChainMap(config, self.config) 95 | return Config(config=config) 96 | -------------------------------------------------------------------------------- /code_submission/configs/a.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_cora", 4 | "nas_autograph_a", 5 | "nas_autograph_c", 6 | "gat", 7 | "nas_pubmed", 8 | "simple_gcn", 9 | "nas_autograph_b", 10 | "nas_autograph_e" 11 | ], 12 | "model_config": { 13 | "simple_gcn": { 14 | "num_layers": 3, 15 | "drop_edge": 0.8 16 | }, 17 | "gat": { 18 | "num_layers": 3 19 | }, 20 | "nas_pubmed": { 21 | "num_layers": 1 22 | }, 23 | "nas_autograph_a": { 24 | "lr": 0.01, 25 | "dropout": 0.9, 26 | "weight_decay": 0, 27 | "hidden": 128 28 | }, 29 | "nas_autograph_b": { 30 | "lr": 0.001, 31 | "dropout": 0.7, 32 | "weight_decay": 0, 33 | "hidden": 256 34 | }, 35 | "nas_autograph_d": { 36 | "lr": 0.005, 37 | "dropout": 0.1, 38 | "weight_decay": 0.001, 39 | "hidden": 8 40 | }, 41 | "nas_autograph_e": { 42 | "lr": 0.005, 43 | "dropout": 0.7, 44 | "weight_decay": 0.0001, 45 | "hidden": 32 46 | } 47 | }, 48 | "num_epoch": 500, 49 | "num_batch": 1, 50 | "patience": 30, 51 | "use_valid": true, 52 | "use_all_data": false, 53 | "lr": 0.005, 54 | "drop_edge": 1.0, 55 | "weight_decay": 5e-4, 56 | "dropout": 0.5, 57 | "hidden": 64, 58 | "use_sampler": false, 59 | "num_layers": 1, 60 | "min_epoch": 1, 61 | "loop": true 62 | } -------------------------------------------------------------------------------- /code_submission/configs/az-cs.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_citeseer", 4 | "gat", 5 | "nas_cora", 6 | "nas_pubmed", 7 | "nas_autograph_e", 8 | "nas_azpo" 9 | ], 10 | "model_config": { 11 | "nas_cora":{ 12 | "num_layers": 1, 13 | "patience": 15 14 | }, 15 | "nas_citeseer": { 16 | "num_layers": 1, 17 | "patience": 15 18 | }, 19 | "nas_pubmed": { 20 | "num_layers": 1, 21 | "patience": 15 22 | } 23 | }, 24 | "num_epoch": 500, 25 | "num_batch": 1, 26 | "patience": 30, 27 | "use_valid": false, 28 | "use_all_data": false, 29 | "lr": 0.005, 30 | "drop_edge": 1.0, 31 | "weight_decay": 5e-4, 32 | "dropout": 0.5, 33 | "hidden": 64, 34 | "use_sampler": true, 35 | "num_layers": 1, 36 | "min_epoch": 1, 37 | "loop": true 38 | } -------------------------------------------------------------------------------- /code_submission/configs/b.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_citeseer", 4 | "nas_autograph_c", 5 | "gat", 6 | "simple_gcn", 7 | "nas_autograph_b", 8 | "nas_pubmed", 9 | "nas_autograph_e" 10 | ], 11 | "model_config": { 12 | "gat": { 13 | "num_layers": 3 14 | }, 15 | "simple_gcn": { 16 | "drop_edge": 0.8, 17 | "num_layers": 3 18 | }, 19 | "nas_citeseer": { 20 | "num_layers": 1 21 | }, 22 | "nas_pubmed": { 23 | "num_layers": 1 24 | }, 25 | "nas_autograph_a": { 26 | "lr": 0.01, 27 | "dropout": 0.9, 28 | "weight_decay": 0, 29 | "hidden": 128 30 | }, 31 | "nas_autograph_b": { 32 | "lr": 0.001, 33 | "dropout": 0.7, 34 | "weight_decay": 0, 35 | "hidden": 256 36 | }, 37 | "nas_autograph_d": { 38 | "lr": 0.005, 39 | "dropout": 0.1, 40 | "weight_decay": 0.001, 41 | "hidden": 8 42 | }, 43 | "nas_autograph_e": { 44 | "lr": 0.005, 45 | "dropout": 0.7, 46 | "weight_decay": 0.0001, 47 | "hidden": 32 48 | } 49 | }, 50 | "num_epoch": 500, 51 | "num_batch": 1, 52 | "patience": 30, 53 | "use_valid": true, 54 | "use_all_data": false, 55 | "lr": 0.005, 56 | "drop_edge": 1.0, 57 | "weight_decay": 5e-4, 58 | "dropout": 0.5, 59 | "hidden": 64, 60 | "use_sampler": false, 61 | "num_layers": 1, 62 | "min_epoch": 1, 63 | "loop": true 64 | } -------------------------------------------------------------------------------- /code_submission/configs/c.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_cora", 4 | "gat", 5 | "nas_pubmed", 6 | "gcn", 7 | "simple_gcn", 8 | "nas_autograph_e" 9 | ], 10 | "model_config": { 11 | "simple_gcn": { 12 | "drop_edge": 0.8, 13 | "num_layers": 3 14 | }, 15 | "nas_cora":{ 16 | "num_layers": 1 17 | }, 18 | "nas_pubmed": { 19 | "num_layers": 1 20 | }, 21 | "nas_autograph_a": { 22 | "lr": 0.01, 23 | "dropout": 0.9, 24 | "weight_decay": 0, 25 | "hidden": 128 26 | }, 27 | "nas_autograph_b": { 28 | "lr": 0.001, 29 | "dropout": 0.7, 30 | "weight_decay": 0, 31 | "hidden": 256 32 | }, 33 | "nas_autograph_d": { 34 | "lr": 0.005, 35 | "dropout": 0.1, 36 | "weight_decay": 0.001, 37 | "hidden": 8 38 | }, 39 | "nas_autograph_e": { 40 | "lr": 0.005, 41 | "dropout": 0.7, 42 | "weight_decay": 0.0001, 43 | "hidden": 32 44 | } 45 | }, 46 | "num_epoch": 500, 47 | "num_batch": 1, 48 | "patience": 20, 49 | "use_valid": false, 50 | "use_all_data": false, 51 | "lr": 0.005, 52 | "drop_edge": 1.0, 53 | "weight_decay": 5e-4, 54 | "dropout": 0.5, 55 | "hidden": 64, 56 | "use_sampler": true, 57 | "num_layers": 1, 58 | "min_epoch": 1 59 | } -------------------------------------------------------------------------------- /code_submission/configs/coauthor-cs.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_coauthorphy", 4 | "nas_coauthorcs", 5 | "nas_autograph_b", 6 | "nas_autograph_e", 7 | "nas_phy10000", 8 | "nas_autograph_d" 9 | ], 10 | "model_config": { 11 | "nas_autograph_c": { 12 | "lr": 0.0005, 13 | "dropout": 0.8, 14 | "weight_decay": 1e-05, 15 | "hidden": 256 16 | } 17 | }, 18 | "num_epoch": 500, 19 | "num_batch": 1, 20 | "patience": 30, 21 | "use_valid": true, 22 | "use_all_data": false, 23 | "lr": 0.005, 24 | "drop_edge": 1.0, 25 | "weight_decay": 0.0005, 26 | "dropout": 0.5, 27 | "hidden": 64, 28 | "num_layers": 2, 29 | "min_epoch": 1, 30 | "loop": true 31 | } -------------------------------------------------------------------------------- /code_submission/configs/d.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "gat", 4 | "nas_autograph_e", 5 | "nas_autograph_d", 6 | "nas_citeseer", 7 | "nas_pubmed", 8 | "gcn", 9 | "simple_gcn" 10 | ], 11 | "model_config": { 12 | "emb_gcn": { 13 | "emb_dim": 128, 14 | "num_layers": 3 15 | }, 16 | "simple_gcn": { 17 | "num_layers": 3 18 | }, 19 | "gcn": { 20 | "num_layers": 3 21 | }, 22 | "nas_autograph_a": { 23 | "lr": 0.01, 24 | "dropout": 0.9, 25 | "weight_decay": 0, 26 | "hidden": 128 27 | }, 28 | "nas_autograph_b": { 29 | "lr": 0.001, 30 | "dropout": 0.7, 31 | "weight_decay": 0, 32 | "hidden": 256 33 | }, 34 | "nas_autograph_d": { 35 | "lr": 0.005, 36 | "dropout": 0.1, 37 | "weight_decay": 0.001, 38 | "hidden": 8 39 | }, 40 | "nas_autograph_e": { 41 | "lr": 0.005, 42 | "dropout": 0.7, 43 | "weight_decay": 0.0001, 44 | "hidden": 32 45 | } 46 | }, 47 | "num_epoch": 500, 48 | "num_batch": 1, 49 | "patience": 30, 50 | "use_valid": false, 51 | "use_all_data": false, 52 | "lr": 0.005, 53 | "drop_edge": 1.0, 54 | "weight_decay": 5e-4, 55 | "dropout": 0.5, 56 | "hidden": 64, 57 | "use_sampler": true, 58 | "num_layers": 1, 59 | "min_epoch": 1, 60 | "loop": true 61 | } -------------------------------------------------------------------------------- /code_submission/configs/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_autograph_b", 4 | "nas_citeseer", 5 | "nas_pubmed", 6 | "nas_autograph_a" 7 | ], 8 | "model_config": { 9 | "simple_gcn": { 10 | }, 11 | "nas_citeseer": { 12 | "num_layers": 1 13 | }, 14 | "nas_pubmed": { 15 | "num_layers": 1 16 | }, 17 | "nas_autograph_a": { 18 | "lr": 0.01, 19 | "dropout": 0.9, 20 | "weight_decay": 0, 21 | "hidden": 128 22 | }, 23 | "nas_autograph_b": { 24 | "lr": 0.001, 25 | "dropout": 0.7, 26 | "weight_decay": 0, 27 | "hidden": 256 28 | }, 29 | "nas_autograph_c": { 30 | "lr": 0.0005, 31 | "dropout": 0.8, 32 | "weight_decay": 1e-05, 33 | "hidden": 256 34 | } 35 | }, 36 | "num_epoch": 500, 37 | "num_batch": 1, 38 | "patience": 30, 39 | "use_valid": true, 40 | "use_all_data": false, 41 | "lr": 0.005, 42 | "drop_edge": 1.0, 43 | "weight_decay": 5e-4, 44 | "dropout": 0.5, 45 | "hidden": 64, 46 | "use_sampler": true, 47 | "num_layers": 2 48 | } -------------------------------------------------------------------------------- /code_submission/configs/e-d5.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_phy10000", 4 | "nas_cora", 5 | "nas_autograph_e", 6 | "nas_citeseer", 7 | "nas_phy10000", 8 | "gat" 9 | ], 10 | "model_config": { 11 | "simple_gcn": { 12 | "drop_edge": 0.8, 13 | "num_layers": 3 14 | }, 15 | "nas_cora": { 16 | "num_layers": 1 17 | }, 18 | "nas_phy10000": { 19 | "min_epoch": 100, 20 | "use_sampler": false 21 | }, 22 | "nas_pubmed": { 23 | "num_layers": 1 24 | }, 25 | "nas_citeseer": { 26 | "num_layers": 1 27 | }, 28 | "nas_autograph_a": { 29 | "lr": 0.01, 30 | "dropout": 0.9, 31 | "weight_decay": 0, 32 | "hidden": 128 33 | }, 34 | "nas_autograph_b": { 35 | "lr": 0.001, 36 | "dropout": 0.7, 37 | "weight_decay": 0, 38 | "hidden": 256 39 | }, 40 | "nas_autograph_d": { 41 | "lr": 0.005, 42 | "dropout": 0.1, 43 | "weight_decay": 0.001, 44 | "hidden": 8 45 | }, 46 | "nas_autograph_e": { 47 | "lr": 0.005, 48 | "dropout": 0.7, 49 | "weight_decay": 0.0001, 50 | "hidden": 32 51 | } 52 | }, 53 | "num_epoch": 500, 54 | "num_batch": 1, 55 | "patience": 30, 56 | "use_valid": false, 57 | "use_all_data": false, 58 | "lr": 0.005, 59 | "drop_edge": 1.0, 60 | "weight_decay": 5e-4, 61 | "dropout": 0.5, 62 | "hidden": 64, 63 | "use_sampler": true, 64 | "num_layers": 1, 65 | "min_epoch": 40, 66 | "loop": true 67 | } -------------------------------------------------------------------------------- /code_submission/configs/e.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_phy10000", 4 | "nas_cora", 5 | "nas_autograph_e", 6 | "nas_citeseer", 7 | "nas_phy10000", 8 | "nas_autograph_b", 9 | "nas_autograph_a", 10 | "simple_gcn", 11 | "nas_autograph_c", 12 | "nas_coauthorcs", 13 | "nas_coauthorphy" 14 | ], 15 | "model_config": { 16 | "simple_gcn": { 17 | "drop_edge": 0.8, 18 | "num_layers": 3 19 | }, 20 | "nas_cora": { 21 | "num_layers": 1 22 | }, 23 | "nas_phy10000": { 24 | "min_epoch": 100 25 | }, 26 | "nas_pubmed": { 27 | "num_layers": 1 28 | }, 29 | "nas_citeseer": { 30 | "num_layers": 1 31 | }, 32 | "nas_autograph_a": { 33 | "lr": 0.01, 34 | "dropout": 0.9, 35 | "weight_decay": 0, 36 | "hidden": 128 37 | }, 38 | "nas_autograph_b": { 39 | "lr": 0.001, 40 | "dropout": 0.7, 41 | "weight_decay": 0, 42 | "hidden": 256 43 | }, 44 | "nas_autograph_d": { 45 | "lr": 0.005, 46 | "dropout": 0.1, 47 | "weight_decay": 0.001, 48 | "hidden": 8 49 | }, 50 | "nas_autograph_e": { 51 | "lr": 0.005, 52 | "dropout": 0.7, 53 | "weight_decay": 0.0001, 54 | "hidden": 32 55 | } 56 | }, 57 | "num_epoch": 500, 58 | "num_batch": 1, 59 | "patience": 30, 60 | "use_valid": true, 61 | "use_all_data": false, 62 | "lr": 0.005, 63 | "drop_edge": 1.0, 64 | "weight_decay": 5e-4, 65 | "dropout": 0.5, 66 | "hidden": 64, 67 | "use_sampler": true, 68 | "num_layers": 1, 69 | "min_epoch": 40, 70 | "loop": true 71 | } -------------------------------------------------------------------------------- /code_submission/configs/tmp.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": [ 3 | "nas_azpo" 4 | ], 5 | "model_config": { 6 | "nas_autograph_c": { 7 | "lr": 0.0005, 8 | "dropout": 0.8, 9 | "weight_decay": 1e-05, 10 | "hidden": 256 11 | } 12 | }, 13 | "num_epoch": 500, 14 | "num_batch": 1, 15 | "patience": 30, 16 | "use_valid": true, 17 | "use_all_data": false, 18 | "lr": 0.005, 19 | "drop_edge": 1.0, 20 | "weight_decay": 5e-4, 21 | "dropout": 0.5, 22 | "hidden": 64, 23 | "num_layers": 2, 24 | "min_epoch": 1 25 | } -------------------------------------------------------------------------------- /code_submission/meta.encoder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unkrible/AutoGraph2020/06fee732e392971cb973a54e19649821ae0f7786/code_submission/meta.encoder -------------------------------------------------------------------------------- /code_submission/meta.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Unkrible/AutoGraph2020/06fee732e392971cb973a54e19649821ae0f7786/code_submission/meta.model -------------------------------------------------------------------------------- /code_submission/model.py: -------------------------------------------------------------------------------- 1 | """the simple baseline for autograph""" 2 | import random 3 | 4 | import os 5 | import joblib 6 | import numpy as np 7 | import pandas as pd 8 | import torch 9 | import torch.nn.functional as F 10 | import torch_geometric.utils as gtils 11 | from collections import defaultdict 12 | from torch_geometric.data import Data 13 | from sklearn.model_selection import train_test_split 14 | from scipy.stats import gmean 15 | 16 | from models import * 17 | from models import MODEL_PARAMETER_LIB 18 | from utils import * 19 | from preprocessing import * 20 | from config import Config 21 | from utils.ensemble import get_top_models_by_std, get_top_models_by_r 22 | from utils.drop_edge import DropEdgeEachStep 23 | 24 | import copy 25 | import gc 26 | 27 | 28 | def fix_seed(seed): 29 | random.seed(seed) 30 | np.random.seed(seed) 31 | torch.manual_seed(seed) 32 | torch.cuda.manual_seed_all(seed) 33 | torch.backends.cudnn.deterministic = True 34 | 35 | 36 | logger = get_logger("INFO", use_error_log=True) 37 | 38 | 39 | class Model: 40 | 41 | def __init__(self): 42 | self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 43 | self.config = None 44 | self.metadata = {} 45 | self._num_nodes = None 46 | self._origin_graph_data_indices = None 47 | self._valid_indices = None 48 | self._valid_mask = None 49 | self._train_indices = None 50 | self._train_mask = None 51 | self._test_mask = None 52 | self._sampler = None 53 | self._n_class = None 54 | self.y_train = None 55 | self.models_topK = defaultdict(list) 56 | self.used_model_num = 0 57 | self.citation_configs = ['a', 'b', 'demo', 'coauthor-cs', 'coauthor-phy', 'phy10000'] 58 | self.use_adaptive_topK = True 59 | 60 | def load_config(self, data, n_class): 61 | dir_path = os.path.dirname(__file__) 62 | try: 63 | tree = joblib.load(f"{dir_path}/meta.model") 64 | encoder = joblib.load(f"{dir_path}/meta.encoder") 65 | # pd.set_option('display.max_columns', None) 66 | meta_info = pd.Series( 67 | extract_graph_feature(data, n_class) 68 | ) 69 | logger.info("meta_info:\n {}".format(meta_info)) 70 | meta_info = pd.DataFrame([meta_info]) 71 | self.metadata = meta_info 72 | 73 | logger.error(f"tree prob:{tree.predict_proba(meta_info)}") 74 | if meta_info['n_feature'][0] == 0: 75 | logger.error("n_feature of this set is 0") 76 | config = "e" 77 | else: 78 | config = encoder.inverse_transform(tree.predict(meta_info))[0] 79 | if config == "e" and meta_info['n_class'].iloc[0] >= 5: 80 | config = "e-d5" 81 | logger.error(f"use {config} config by meta learning") 82 | self.config = Config(f"{dir_path}/configs/{config}.json") 83 | # self.config = Config(f"{dir_path}/configs/tmp.json") 84 | except Exception as e: 85 | logger.error("Throw error when loading config") 86 | logger.error(e) 87 | self.config = Config(f"{dir_path}/configs/default.json") 88 | # self.config = Config(f"{dir_path}/configs/tmp.json") 89 | 90 | def train_valid_split(self, total_indices, y, valid_rate=0.2): 91 | total_indices = np.asarray(total_indices, dtype=np.int32) 92 | total_class_indices = [] 93 | train_indices, valid_indices = [], [] 94 | each_class_max_sample_num = 1000 95 | for i in range(self._n_class): 96 | total_class_indices.append(np.where(y[:] == i)[0]) 97 | each_class_valid_num = max(1, int(len(total_class_indices[i])*valid_rate)) 98 | each_class_valid_indices = np.random.choice(total_class_indices[i], 99 | each_class_valid_num, 100 | replace=False).tolist() 101 | each_class_train_indices = list(set(total_class_indices[i]) - set(each_class_valid_indices)) 102 | if len(each_class_train_indices) == 0: 103 | each_class_train_indices = each_class_valid_indices 104 | train_indices += np.random.permutation(each_class_train_indices)[:each_class_max_sample_num].tolist() 105 | valid_indices += each_class_valid_indices 106 | 107 | train_indices, valid_indices = total_indices[train_indices], total_indices[valid_indices] 108 | random.shuffle(train_indices) 109 | random.shuffle(valid_indices) 110 | return train_indices, valid_indices 111 | 112 | def generate_pyg_data(self, data): 113 | # get x feature table 114 | x = data['fea_table'].copy() 115 | df = data['edge_file'] 116 | edges = df[['src_idx', 'dst_idx', 'edge_weight']] 117 | 118 | # get indices first 119 | train_indices = data['train_indices'] 120 | if self.config.use_valid: 121 | train_indices, valid_indices = train_test_split(train_indices, test_size=0.2, shuffle=False) 122 | 123 | try: 124 | if x.shape[1] == 1: # 0-dimensional feature 125 | x = x.set_index(keys="node_index") 126 | x = feat_engineering( 127 | x, 128 | edges=edges, 129 | num_nodes=self.metadata["n_node"].iloc[0] 130 | ) 131 | else: 132 | x_feat = x.drop('node_index', axis=1).to_numpy() 133 | conf_name = self.config.filename.split("/")[-1].split(".")[0] 134 | is_only_one_zero = not ((x_feat != 0) & (x_feat != 1)).any() 135 | logger.info("use {} config".format(conf_name)) 136 | logger.info( 137 | "feature only contains zero: {}, only one and zero: {}".format((x_feat == 0).all(), is_only_one_zero)) 138 | 139 | if conf_name in self.citation_configs: # Judge whether it is a citation graph 140 | # if True: 141 | if is_only_one_zero: 142 | logger.info("Normalize features") 143 | normal_feat = feat_row_sum_inv_normalize(x_feat) 144 | normal_df = pd.DataFrame(data=normal_feat) 145 | normal_df["node_index"] = x["node_index"] 146 | x = normal_df 147 | 148 | pre_feat = prepredict(data, train_indices=train_indices, use_valid=self.config.use_valid, use_ohe=False) 149 | x = x.set_index(keys="node_index") 150 | x_index = x.index.tolist() 151 | lpa_preds, lpa_train_acc = lpa_predict(data, n_class=self._n_class, train_indices=train_indices, use_valid=self.config.use_valid) 152 | if not np.isnan(lpa_train_acc) and lpa_train_acc > 0.8: 153 | logger.info("Use LPA predicts") 154 | x = pd.concat([x, pre_feat, lpa_preds], axis=1).values[x_index] 155 | else: 156 | x = pd.concat([x, pre_feat], axis=1).values[x_index] 157 | else: 158 | x = x.set_index(keys="node_index") 159 | x = feat_engineering( 160 | x, 161 | edges=edges, 162 | num_nodes=self.metadata["n_node"].iloc[0] 163 | ) 164 | except Exception as e: 165 | logger.error(e) 166 | if x.shape[1] == 0: 167 | x = np.zeros((x.shape[0], 64), dtype=np.float) 168 | else: 169 | x = x.to_numpy() 170 | 171 | logger.info("x shape: {}".format(x.shape)) 172 | node_index = torch.tensor(data['fea_table']['node_index'].to_numpy(), dtype=torch.long) 173 | x = torch.tensor(x, dtype=torch.float) 174 | 175 | # get edge_index, edge_weight 176 | edges = edges.to_numpy() 177 | edge_index = edges[:, :2].astype(np.int) 178 | # transpose from [edge_num, 2] to [2, edge_num] which is required by PyG 179 | edge_index = torch.tensor(edge_index, dtype=torch.long).transpose(0, 1) 180 | edge_weight = edges[:, 2] 181 | edge_weight = torch.tensor(edge_weight, dtype=torch.float32) 182 | 183 | undirected = gtils.is_undirected(edge_index) 184 | 185 | edge_index, edge_weight = gtils.sort_edge_index(edge_index, edge_weight) 186 | logger.info(f"is undirected ? {undirected}") 187 | logger.info(f"edge index {edge_index.shape}, edge weight {edge_weight.shape}") 188 | 189 | # get train/test mask 190 | num_nodes = x.size(0) 191 | self._num_nodes = num_nodes 192 | y = torch.zeros(num_nodes, dtype=torch.long) 193 | inds = data['train_label'][['node_index']].to_numpy() 194 | train_y = data['train_label'][['label']].to_numpy() 195 | self.y_train = train_y 196 | y[inds] = torch.tensor(train_y, dtype=torch.long) 197 | 198 | # train_indices = data['train_indices'] 199 | self._origin_graph_data_indices = copy.deepcopy(data['train_indices']) 200 | if self.config.use_valid: 201 | # train_indices, valid_indices = train_test_split(train_indices, test_size=0.2) 202 | # train_indices, valid_indices = train_test_split(train_indices, test_size=0.2, shuffle=False) 203 | self.y_train = data['train_label'].set_index('node_index').loc[train_indices][['label']].to_numpy() 204 | test_indices = data['test_indices'] 205 | 206 | data = Data(x=x, node_index=node_index, edge_index=edge_index, y=y, edge_weight=edge_weight) 207 | 208 | data.num_nodes = num_nodes 209 | 210 | train_mask = torch.zeros(num_nodes, dtype=torch.bool) 211 | train_mask[train_indices] = 1 212 | data.train_indices = np.asarray(train_indices) 213 | data.train_mask = train_mask 214 | self._train_indices = np.asarray(train_indices) 215 | self._train_mask = train_mask 216 | 217 | if self.config.use_valid: 218 | valid_mask = torch.zeros(num_nodes, dtype=torch.bool) 219 | valid_mask[valid_indices] = 1 220 | data.valid_indices = valid_indices 221 | data.valid_mask = valid_mask 222 | self._valid_indices = valid_indices 223 | self._valid_mask = valid_mask 224 | 225 | self._test_mask = np.zeros(num_nodes, dtype=np.bool) 226 | self._test_mask[test_indices] = True 227 | test_mask = torch.zeros(num_nodes, dtype=torch.bool) 228 | test_mask[test_indices] = 1 229 | data.test_mask = test_mask 230 | data.test_indices = np.asarray(test_indices) 231 | 232 | self._sampler = Sampler(data, self.metadata["n_edge"].iloc[0], self.device) 233 | 234 | return data 235 | 236 | def train(self, sampler, n_class): 237 | 238 | try: 239 | time_budget = get_time_budget().timing(frac=0.95) 240 | drop_edge_controller = None 241 | model_time_budget = max(time_budget.remain * 0.6, time_budget.remain / len(self.config.model_classes)) 242 | self.models_topK = defaultdict(list) 243 | 244 | for model_name, model_class in self.config.model_classes: 245 | time_budget.check() 246 | config = self.config.model_config(model_name) 247 | logger.info(f"model {model_name} config:\n{config}") 248 | self.used_model_num += 1 249 | data = sampler.random_edge_sampler(percent=config.drop_edge) 250 | 251 | model = model_class( 252 | features_num=data.x.size()[1], 253 | num_class=n_class, 254 | edge_num=data.edge_index.shape[1], 255 | num_layers=config.num_layers, 256 | hidden=config.hidden, 257 | dropout=config.dropout, 258 | drop_edge_controller=drop_edge_controller, 259 | num_nodes=self._num_nodes, 260 | emb_dim=config.emb_dim 261 | ) 262 | 263 | model = model.to(self.device) 264 | 265 | optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, 266 | weight_decay=config.weight_decay) 267 | 268 | train_kwargs = {} 269 | if config.use_valid: 270 | train_kwargs = { 271 | "valid_indices": data.valid_indices, 272 | } 273 | 274 | if config.use_sampler: 275 | dataset = GraphSampleDataset(data, n_class, self.y_train) 276 | else: 277 | dataset = GraphDataset(data) 278 | 279 | topK_list, valid_acc, except_info = torch_train( 280 | data, dataset, model, optimizer, F.nll_loss, 281 | epochs=config.num_epoch, batch_size=data.num_nodes // config.num_batch, 282 | min_epochs=config.min_epoch, 283 | clip_grad=5 if config.use_sampler else 0, 284 | patience=config.patience, 285 | time_budget=time_budget, 286 | all_data=False, 287 | use_adaptive_topK=self.use_adaptive_topK, 288 | model_topK=self.models_topK[model_name], 289 | **train_kwargs 290 | ) 291 | 292 | if except_info == "time_exceed": 293 | print("execute to {}".format(except_info)) 294 | if -valid_acc < 0.80 and self.used_model_num > 1: 295 | del model 296 | gc.collect() 297 | break 298 | 299 | self.models_topK[model_name] = topK_list 300 | del model 301 | gc.collect() 302 | break 303 | 304 | if except_info == "oom": 305 | del model 306 | gc.collect() 307 | continue 308 | 309 | self.models_topK[model_name] = topK_list 310 | 311 | del model 312 | gc.collect() 313 | self.config.model_classes.update(self.models_info) 314 | return self.models_info 315 | 316 | except RuntimeError as exception: 317 | if "out of memory" in str(exception): 318 | logger.info("we met cuda out of memory") 319 | return self.models_info 320 | else: 321 | raise exception 322 | except TimeOutError as e: 323 | print(e) 324 | return self.models_info 325 | 326 | @property 327 | def models_info(self): 328 | info = [] 329 | for name in self.models_topK: 330 | info.extend([(ele['pred'], ele['acc'], name) for ele in self.models_topK[name]]) 331 | return info 332 | 333 | def transition_train_valid(self, data, target): 334 | if target == 'valid': 335 | data.valid_indices = self._valid_indices 336 | data.valid_mask = self._valid_mask 337 | data.train_indices = self._train_indices 338 | data.train_mask = self._train_mask 339 | else: 340 | train_mask = torch.zeros(self._num_nodes, dtype=torch.bool) 341 | train_indices = self._origin_graph_data_indices 342 | train_mask[train_indices] = 1 343 | data.train_indices = np.asarray(train_indices) 344 | data.train_mask = train_mask 345 | return data 346 | 347 | def fake_pred(self, model, data): 348 | model.eval() 349 | data = data.to(self.device) 350 | with torch.no_grad(): 351 | logits, labels = model(data.test_mask).max(1) 352 | return logits, labels 353 | 354 | def pred(self, model, data): 355 | model.eval() 356 | data = data.to(self.device) 357 | with torch.no_grad(): 358 | logits = model(data) 359 | _, preds = logits[data.test_mask].max(1) 360 | return logits, preds 361 | 362 | def train_predict(self, data, time_budget, n_class, schema): 363 | """ 364 | API for ingestion prog to invoke 365 | Args: 366 | data: { 367 | 'fea_table': pd.DataFrame['node_index', 'feat_1', ..., 'feat_n'], 368 | 'edge_file': pd.DataFrame['src_idx', 'dst_idx', 'edge_weight'], 369 | 'train_indices': list of the index of train set, 370 | 'test_indices': list of the index of test set, 371 | 'train_label': pd.DataFrame['node_index', 'label'] 372 | } 373 | time_budget: remain time 374 | n_class: class num 375 | schema: deprecated 376 | 377 | Returns: prediction of nodes in test set 378 | 379 | """ 380 | set_time_budget(time_budget) 381 | self._n_class = n_class 382 | self.load_config(data, n_class) 383 | data = self.generate_pyg_data(data) 384 | # model = self.train(data, n_class) 385 | models_info = self.train(self._sampler, n_class) 386 | print("models_info_acc:") 387 | for i in range(len(models_info)): 388 | print("acc: {}".format(models_info[i][1])) 389 | 390 | # test_logits, test_labels = self.fake_pred(model, data) 391 | # test_logits = test_logits.cpu().numpy() 392 | # test_sorted = test_logits.argsort()[::-1] 393 | # selected = test_sorted[: int(len(test_sorted) * 0.4)] 394 | # selected_id = data.test_indices[selected] 395 | # data.y[selected_id] = torch.tensor(test_labels.cpu().numpy().flatten()[selected], dtype=torch.long, device=self.device) 396 | # data.train_mask[selected_id] = 1 397 | # model = self.train(data) 398 | # logits, preds = self.pred(model, data) 399 | 400 | timing = get_time_budget().timing(frac=0.97) 401 | 402 | ensemble_info = get_top_models_by_r(models_info) 403 | 404 | try: 405 | logger.info("logits_ensemble_len: {}".format(len(ensemble_info))) 406 | 407 | logits_ensemble = None 408 | # logits_list = [] 409 | for pred, weight in ensemble_info: 410 | timing.check() 411 | # logger.info("model_ensemble_weight: {}".format(weight)) 412 | logits = pred[self._test_mask, :] 413 | # normalize logits 414 | logits = logits.T 415 | logits = (logits - np.min(logits, axis=0)) / (np.max(logits, axis=0) - np.min(logits, axis=0)) 416 | logits = logits.T 417 | # logits_list.append(logits) 418 | if logits_ensemble is None: 419 | logits_ensemble = logits * weight 420 | else: 421 | logits_ensemble += logits * weight 422 | timing.check() 423 | 424 | # logits_ensemble = np.array(logits_list) 425 | # logger.info("use gmeans ensemble; logits_ensemble_shape: {}".format(logits_ensemble.shape)) 426 | # logits_ensemble = gmean(logits_ensemble, axis=0) 427 | preds = np.argmax(logits_ensemble, axis=1) 428 | 429 | return preds.flatten() 430 | except TimeOutError as e: 431 | print(e) 432 | return np.argmax(ensemble_info[0][0][self._test_mask, :], axis=1).flatten() 433 | except Exception as e: 434 | print(e) 435 | return np.argmax(np.random.rand(self.metadata['n_test'].iloc[0], self._n_class), axis=1).flatten() 436 | 437 | -------------------------------------------------------------------------------- /code_submission/models/__init__.py: -------------------------------------------------------------------------------- 1 | from models.nas_azcs import NasAzcs 2 | from models.nas_azpo import NasAzpo 3 | from models.nas_coauthorcs import NasCoauthorcs 4 | from models.nas_coauthorphy import NasCoauthorphy 5 | from models.nas_phy10000 import NasPhy10000 6 | 7 | __all__ = [ 8 | "GCN", "SAGE", "GAT", "NasCora", "NasCiteseer", "NasPubmed", "SimpleGCN", "EmbGCN", 9 | "NasAutoGraphA", "NasAutoGraphB", "NasAutoGraphD", "NasAutoGraphE", 10 | "NasCoauthorcs", "NasCoauthorphy", "NasPhy10000", "NasAzpo", "NasAzcs" 11 | ] 12 | 13 | 14 | from .gcn import GCN 15 | from .emb_gcn import EmbGCN 16 | from .sage import SAGE 17 | from .gat import GAT 18 | from .nas_cora import NasCora 19 | from .nas_citeseer import NasCiteseer 20 | from .nas_pubmed import NasPubmed 21 | from .simple_gcn import SimpleGCN 22 | from .nas_autograph_a import NasAutoGraphA 23 | from .nas_autograph_b import NasAutoGraphB 24 | from .nas_autograph_c import NasAutoGraphC 25 | from .nas_autograph_d import NasAutoGraphD 26 | from .nas_autograph_e import NasAutoGraphE 27 | 28 | MODEL_LIB = { 29 | 'gcn': GCN, 30 | 'emb_gcn': EmbGCN, 31 | 'simple_gcn': SimpleGCN, 32 | 'gat': GAT, 33 | 'nas_cora': NasCora, 34 | 'nas_citeseer': NasCiteseer, 35 | 'nas_pubmed': NasPubmed, 36 | 'nas_autograph_a': NasAutoGraphA, 37 | 'nas_autograph_b': NasAutoGraphB, 38 | 'nas_autograph_c': NasAutoGraphC, 39 | 'nas_autograph_d': NasAutoGraphD, 40 | 'nas_autograph_e': NasAutoGraphE, 41 | 'nas_coauthorcs': NasCoauthorcs, 42 | 'nas_coauthorphy': NasCoauthorphy, 43 | 'nas_phy10000': NasPhy10000, 44 | 'nas_azpo': NasAzpo, 45 | 'nas_azcs': NasAzcs 46 | } 47 | 48 | MODEL_PARAMETER_LIB = { 49 | 'default': [0.005, 0.5, 5e-4, 64], 50 | # 'nas_cora': [0.01, 0.9, 0.0001, 64], 51 | # 'nas_citeseer': [0.005, 0.8, 1e-05, 128], 52 | # 'nas_pubmed': [0.01, 0.4, 5e-05, 64], 53 | 'nas_autograph_a': [0.01, 0.9, 0, 128], 54 | 'nas_autograph_b': [0.001, 0.7, 0, 256], 55 | 'nas_autograph_c': [0.0005, 0.8, 1e-05, 256], 56 | 'nas_autograph_d': [0.005, 0.1, 0.001, 8], 57 | 'nas_autograph_e': [0.005, 0.7, 0.0001, 32], 58 | 'nas_coauthorcs': [0.005, 0.5, 1e-05, 64], 59 | 'nas_coauthorphy': [0.01, 0.4, 5e-05, 128], 60 | 'nas_phy10000': [0.001, 0.5, 0.0001, 128], 61 | 'nas_azpo': [0.0005, 0.5, 0.0005, 32], 62 | 'nas_azcs': [0.0005, 0.5, 1e-05, 512] 63 | } 64 | -------------------------------------------------------------------------------- /code_submission/models/emb_gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear, Embedding 4 | from torch_geometric.nn import GCNConv 5 | 6 | 7 | class EmbGCN(torch.nn.Module): 8 | 9 | def __init__(self, num_layers=2, hidden=32, emb_dim=64, num_class=2, num_nodes=None, **kwargs): 10 | super(EmbGCN, self).__init__() 11 | hidden = max(hidden, num_class * 2) 12 | self.conv1 = GCNConv(emb_dim, hidden) 13 | self.convs = torch.nn.ModuleList() 14 | for i in range(num_layers - 1): 15 | self.convs.append(GCNConv(hidden, hidden)) 16 | self.lin2 = Linear(hidden, num_class) 17 | self.emb = Embedding(num_nodes, emb_dim) 18 | self.first_lin = Linear(emb_dim, hidden) 19 | 20 | def reset_parameters(self): 21 | self.first_lin.reset_parameters() 22 | self.emb.reset_parameters() 23 | self.conv1.reset_parameters() 24 | for conv in self.convs: 25 | conv.reset_parameters() 26 | self.lin2.reset_parameters() 27 | 28 | def forward(self, data): 29 | x, edge_index, edge_weight, node_index = data.x, data.edge_index, data.edge_weight, data.node_index 30 | x = self.emb(node_index) 31 | x1 = F.elu(self.conv1(x, edge_index, edge_weight=edge_weight)) 32 | x = F.elu(self.first_lin(x)) 33 | x = F.dropout(x, p=0.5, training=self.training) 34 | for conv in self.convs: 35 | x = F.elu(conv(x, edge_index, edge_weight=edge_weight)) 36 | x = x1 + x 37 | x = F.dropout(x, p=0.5, training=self.training) 38 | x = self.lin2(x) 39 | return F.log_softmax(x, dim=-1) 40 | 41 | def __repr__(self): 42 | return self.__class__.__name__ 43 | -------------------------------------------------------------------------------- /code_submission/models/focal_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time: 2020/5/13 21:26 4 | # @Author: Mecthew 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | 11 | 12 | class FocalLoss(nn.Module): 13 | def __init__(self, class_num, alpha=None, gamma=2, size_average=True): 14 | super(FocalLoss, self).__init__() 15 | if alpha is None: 16 | self.alpha = Variable(torch.ones(class_num, 1), requires_grad=False) 17 | else: 18 | if isinstance(alpha, Variable): 19 | self.alpha = alpha 20 | else: 21 | self.alpha = Variable(torch.tensor(alpha, dtype=torch.float), requires_grad=False) 22 | self.gamma = gamma 23 | self.class_num = class_num 24 | self.size_average = size_average 25 | 26 | def forward(self, inputs, targets): 27 | N = inputs.size(0) 28 | C = inputs.size(1) 29 | P = F.softmax(inputs, dim=1) 30 | 31 | class_mask = inputs.data.new(N, C).fill_(0) 32 | class_mask = Variable(class_mask) 33 | ids = targets.view(-1, 1) 34 | class_mask.scatter_(1, ids.data, 1.) 35 | 36 | if inputs.is_cuda and not self.alpha.is_cuda: 37 | self.alpha = self.alpha.cuda() 38 | alpha = self.alpha[ids.data.view(-1)] 39 | probs = (P*class_mask).sum(1).view(-1, 1) + 1e-5 40 | log_p = probs.log() 41 | batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 42 | 43 | if self.size_average: 44 | loss = batch_loss.mean() 45 | else: 46 | loss = batch_loss.sum() 47 | return loss 48 | -------------------------------------------------------------------------------- /code_submission/models/gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torch_geometric.nn import GATConv 5 | 6 | 7 | class GAT(torch.nn.Module): 8 | def __init__(self, features_num, num_class, num_layers=3, hidden=32, **kwargs): 9 | super(GAT, self).__init__() 10 | hidden = max(hidden, num_class * 2) 11 | self.convs = nn.ModuleList() 12 | for _ in range(num_layers): 13 | self.convs.append(GATConv(hidden, hidden)) 14 | self.input_lin = nn.Linear(features_num, hidden) 15 | self.output_lin = nn.Linear(hidden, num_class) 16 | 17 | def forward(self, data): 18 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 19 | x = F.leaky_relu(self.input_lin(x)) 20 | x = F.dropout(x, p=0.5, training=self.training) 21 | for conv in self.convs: 22 | x = F.leaky_relu(conv(x, edge_index)) 23 | x = F.dropout(x, p=0.5, training=self.training) 24 | x = self.output_lin(x) 25 | return F.log_softmax(x, dim=-1) 26 | 27 | -------------------------------------------------------------------------------- /code_submission/models/gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear 4 | from torch_geometric.nn import GCNConv 5 | 6 | 7 | class GCN(torch.nn.Module): 8 | 9 | # TODO: 网络太弱 10 | def __init__(self, num_layers=3, hidden=32, features_num=16, num_class=2, **kwargs): 11 | super(GCN, self).__init__() 12 | hidden = max(hidden, num_class * 2) 13 | self.conv1 = GCNConv(features_num, hidden) 14 | self.convs = torch.nn.ModuleList() 15 | for i in range(num_layers - 1): 16 | self.convs.append(GCNConv(hidden, hidden)) 17 | self.lin2 = Linear(hidden, num_class) 18 | self.first_lin = Linear(features_num, hidden) 19 | 20 | def reset_parameters(self): 21 | self.first_lin.reset_parameters() 22 | self.conv1.reset_parameters() 23 | for conv in self.convs: 24 | conv.reset_parameters() 25 | self.lin2.reset_parameters() 26 | 27 | def forward(self, data): 28 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 29 | x1 = F.elu(self.conv1(x, edge_index, edge_weight=edge_weight)) 30 | x = F.elu(self.first_lin(x)) 31 | # TODO: dropout rate 32 | x = F.dropout(x, p=0.5, training=self.training) 33 | for conv in self.convs: 34 | x = F.elu(conv(x, edge_index, edge_weight=edge_weight)) 35 | x = x1 + x 36 | x = F.dropout(x, p=0.5, training=self.training) 37 | # x = torch.cat([x1, x], dim=1) 38 | x = self.lin2(x) 39 | return F.log_softmax(x, dim=-1) 40 | 41 | def __repr__(self): 42 | return self.__class__.__name__ 43 | -------------------------------------------------------------------------------- /code_submission/models/nas_autograph_a.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-11 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import GATConv, ARMAConv, SGConv 9 | 10 | 11 | class NasAutoGraphA(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasAutoGraphA, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasAutoGraphACell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasAutoGraphACell(nn.Module): 37 | # best structure:{'action': [1, 'gat_2', 1, 'sg', 'relu', 'concat'], 'hyper_param': [0.01, 0.9, 0, 128]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasAutoGraphACell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 45 | self.headers = 2 if multi_head else 1 46 | self.gat2 = GATConv(hidden_dim, output_dim, heads=self.headers) 47 | self.sg = SGConv(hidden_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | o1 = F.leaky_relu(self.gat2(x, edge_index)) 53 | o2 = F.leaky_relu(self.sg(x, edge_index, edge_weight)) 54 | o3 = F.relu(torch.cat([o1, o2], dim=1)) 55 | return his, o3 56 | 57 | @property 58 | def output_dim(self): 59 | return self._output_dim * (1 + self.headers) 60 | 61 | -------------------------------------------------------------------------------- /code_submission/models/nas_autograph_b.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-11 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import GATConv, ARMAConv, SAGEConv 9 | 10 | 11 | class NasAutoGraphB(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasAutoGraphB, self).__init__() 14 | print(f"edge num {edge_num}") 15 | hidden_dim = max(hidden, num_class * 2) 16 | cur_dim, hidden_dim, output_dim = features_num, hidden_dim, hidden_dim 17 | multi_head = edge_num < 1400000 18 | self.cells = nn.ModuleList() 19 | for _ in range(num_layers): 20 | cell = NasAutoGraphBCell(cur_dim, hidden_dim, output_dim, multi_head) 21 | self.cells.append(cell) 22 | cur_dim = cell.output_dim 23 | self.classifier = nn.Linear(cur_dim, num_class) 24 | 25 | self.dropout = dropout 26 | 27 | def forward(self, data): 28 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 29 | x = F.dropout(x, p=self.dropout, training=self.training) 30 | for cell in self.cells: 31 | x = cell(x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasAutoGraphBCell(nn.Module): 37 | # best structure:{'action': [0, 'arma', 0, 'sage', 'elu', 'add'], 'hyper_param': [0.001, 0.7, 0, 256]} 38 | def __init__(self, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasAutoGraphBCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor = nn.Linear(cur_dim, hidden_dim) 45 | self.arma = ARMAConv(hidden_dim, output_dim) 46 | self.sage = SAGEConv(hidden_dim, self._output_dim, bias=True) 47 | 48 | def forward(self, x, edge_index, edge_weight): 49 | h = self.preprocessor(x) 50 | h1 = F.leaky_relu(self.arma(h, edge_index, edge_weight=edge_weight)) 51 | h2 = F.leaky_relu(self.sage(h, edge_index, edge_weight=edge_weight)) 52 | out = F.elu(torch.cat([h1, h2], dim=1)) 53 | return out 54 | 55 | @property 56 | def output_dim(self): 57 | return self._output_dim * 2 58 | -------------------------------------------------------------------------------- /code_submission/models/nas_autograph_c.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-14 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import GATConv, ARMAConv, SGConv, ChebConv, SAGEConv 9 | 10 | 11 | class NasAutoGraphC(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasAutoGraphC, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasAutoGraphCCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasAutoGraphCCell(nn.Module): 37 | # best structure: {'action': [1, 'sage', 1, 'cheb', 'linear', 'add'], 'hyper_param': [0.0005, 0.8, 1e-05, 256]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasAutoGraphCCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 45 | self.headers = 2 if multi_head else 1 46 | self.cheb = ChebConv(hidden_dim, output_dim, K=2, bias=True) 47 | self.sage = SAGEConv(hidden_dim, output_dim) 48 | self.linear = nn.Linear(self._output_dim, self._output_dim) 49 | 50 | def forward(self, h, x, edge_index, edge_weight): 51 | his = x 52 | x = self.preprocessor_x(x) 53 | o1 = F.leaky_relu(self.cheb(x, edge_index, edge_weight)) 54 | o2 = F.leaky_relu(self.sage(x, edge_index, edge_weight)) 55 | o3 = self.linear(torch.add(o1, o2)) 56 | return his, o3 57 | 58 | @property 59 | def output_dim(self): 60 | return self._output_dim 61 | 62 | 63 | -------------------------------------------------------------------------------- /code_submission/models/nas_autograph_d.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-13 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import ARMAConv, SAGEConv, SGConv 9 | 10 | 11 | class NasAutoGraphD(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasAutoGraphD, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasAutoGraphDCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasAutoGraphDCell(nn.Module): 37 | # best structure:{'action': [0, 'sg', 1, 'arma', 'elu', 'concat'], 'hyper_param': [0.005, 0.1, 0.001, 8]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasAutoGraphDCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 45 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 46 | self.sg = SGConv(hidden_dim, output_dim) 47 | self.arma = ARMAConv(hidden_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | h = self.preprocessor_h(h) 53 | o1 = F.leaky_relu(self.sg(h, edge_index, edge_weight)) 54 | o2 = F.leaky_relu(self.arma(x, edge_index, edge_weight)) 55 | o3 = F.elu(torch.cat([o1, o2], dim=1)) 56 | return his, o3 57 | 58 | @property 59 | def output_dim(self): 60 | return self._output_dim * 2 -------------------------------------------------------------------------------- /code_submission/models/nas_autograph_e.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-12 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | from torch_geometric.nn import ARMAConv, SAGEConv 8 | 9 | 10 | class NasAutoGraphE(nn.Module): 11 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 12 | super(NasAutoGraphE, self).__init__() 13 | hidden_dim = max(hidden, num_class * 2) 14 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 15 | multi_head = edge_num < 1400000 16 | self.cells = nn.ModuleList() 17 | for _ in range(num_layers): 18 | cell = NasAutoGraphECell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 19 | self.cells.append(cell) 20 | his_dim, cur_dim = cur_dim, cell.output_dim 21 | self.classifier = nn.Linear(cur_dim, num_class) 22 | 23 | self.dropout = dropout 24 | 25 | def forward(self, data): 26 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 27 | x = F.dropout(x, p=self.dropout, training=self.training) 28 | h = x 29 | for cell in self.cells: 30 | h, x = cell(h, x, edge_index, edge_weight) 31 | logits = self.classifier(x) 32 | return F.log_softmax(logits, dim=-1) 33 | 34 | 35 | class NasAutoGraphECell(nn.Module): 36 | # best structure:{'action': [1, 'arma', 0, 'sage', 'elu', 'add'], 'hyper_param': [0.005, 0.7, 0.0001, 32]} 37 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 38 | super(NasAutoGraphECell, self).__init__() 39 | self._cur_dim = cur_dim 40 | self._hidden_dim = hidden_dim 41 | self._output_dim = output_dim 42 | 43 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 44 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 45 | self.sage = SAGEConv(hidden_dim, output_dim) 46 | self.arma = ARMAConv(hidden_dim, output_dim) 47 | 48 | def forward(self, h, x, edge_index, edge_weight): 49 | his = x 50 | x = self.preprocessor_x(x) 51 | h = self.preprocessor_h(h) 52 | o1 = F.leaky_relu(self.sage(h, edge_index, edge_weight)) 53 | o2 = F.leaky_relu(self.arma(x, edge_index, edge_weight)) 54 | o3 = F.elu(torch.add(o1, o2)) 55 | return his, o3 56 | 57 | @property 58 | def output_dim(self): 59 | return self._output_dim 60 | -------------------------------------------------------------------------------- /code_submission/models/nas_azcs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-25 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import GATConv, ARMAConv, SGConv, ChebConv, SAGEConv 9 | 10 | 11 | class NasAzcs(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasAzcs, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasAzcsCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasAzcsCell(nn.Module): 37 | # best structure:{'action': [1, 'sg', 1, 'arma', 'relu', 'add'], 'hyper_param': [0.0005, 0.5, 1e-05, 512]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasAzcsCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 45 | self.headers = 2 if multi_head else 1 46 | self.sg = SAGEConv(hidden_dim, output_dim) 47 | self.arma = ARMAConv(hidden_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | o1 = F.leaky_relu(self.sg(x, edge_index, edge_weight)) 53 | o2 = F.leaky_relu(self.arma(x, edge_index, edge_weight)) 54 | o3 = F.relu(torch.add(o1, o2)) 55 | return his, o3 56 | 57 | @property 58 | def output_dim(self): 59 | return self._output_dim -------------------------------------------------------------------------------- /code_submission/models/nas_azpo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-24 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | from torch_geometric.nn import ARMAConv, ChebConv 8 | 9 | 10 | class NasAzpo(nn.Module): 11 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 12 | super(NasAzpo, self).__init__() 13 | hidden_dim = max(hidden, num_class * 2) 14 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 15 | multi_head = edge_num < 1400000 16 | self.cells = nn.ModuleList() 17 | for _ in range(num_layers): 18 | cell = NasAzpoCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 19 | self.cells.append(cell) 20 | his_dim, cur_dim = cur_dim, cell.output_dim 21 | self.classifier = nn.Linear(cur_dim, num_class) 22 | 23 | self.dropout = dropout 24 | 25 | def forward(self, data): 26 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 27 | x = F.dropout(x, p=self.dropout, training=self.training) 28 | h = x 29 | for cell in self.cells: 30 | h, x = cell(h, x, edge_index, edge_weight) 31 | logits = self.classifier(x) 32 | return F.log_softmax(logits, dim=-1) 33 | 34 | 35 | class NasAzpoCell(nn.Module): 36 | # best structure:{'action': [0, 'cheb', 1, 'arma', 'linear', 'add'], 'hyper_param': [0.0005, 0.5, 0.0005, 32]} 37 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 38 | super(NasAzpoCell, self).__init__() 39 | self._cur_dim = cur_dim 40 | self._hidden_dim = hidden_dim 41 | self._output_dim = output_dim 42 | 43 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 44 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 45 | self.cheb = ChebConv(hidden_dim, output_dim, K=2, bias=True) 46 | self.arma = ARMAConv(hidden_dim, output_dim) 47 | self.linear = nn.Linear(output_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | h = self.preprocessor_h(h) 53 | o1 = F.leaky_relu(self.cheb(h, edge_index, edge_weight)) 54 | o2 = F.leaky_relu(self.arma(x, edge_index, edge_weight)) 55 | o3 = self.linear(torch.add(o1, o2)) 56 | return his, o3 57 | 58 | @property 59 | def output_dim(self): 60 | return self._output_dim 61 | -------------------------------------------------------------------------------- /code_submission/models/nas_citeseer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torch_geometric.nn import GATConv 5 | 6 | 7 | class NasCiteseer(nn.Module): 8 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 9 | super(NasCiteseer, self).__init__() 10 | print(f"edge num {edge_num}") 11 | hidden_dim = max(hidden, num_class * 2) 12 | cur_dim, hidden_dim, output_dim = features_num, hidden_dim, hidden_dim 13 | multi_head = edge_num < 1400000 14 | self.cells = nn.ModuleList() 15 | for _ in range(num_layers): 16 | cell = NasCiteseerCell(cur_dim, hidden_dim, output_dim, multi_head) 17 | self.cells.append(cell) 18 | cur_dim = cell.output_dim 19 | self.classifier = nn.Linear(cur_dim, num_class) 20 | 21 | self.dropout = dropout 22 | 23 | def forward(self, data): 24 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 25 | x = F.dropout(x, p=self.dropout, training=self.training) 26 | for cell in self.cells: 27 | x = cell(x, edge_index, edge_weight) 28 | logits = self.classifier(x) 29 | return F.log_softmax(logits, dim=-1) 30 | 31 | 32 | class NasCiteseerCell(nn.Module): 33 | def __init__(self, cur_dim, hidden_dim, output_dim, multi_head): 34 | super(NasCiteseerCell, self).__init__() 35 | self._cur_dim = cur_dim 36 | self._hidden_dim = hidden_dim 37 | self._output_dim = output_dim 38 | 39 | self.preprocessor = nn.Linear(cur_dim, hidden_dim) 40 | self.heads = 6 if multi_head else 1 41 | self.gat6 = GATConv(hidden_dim, output_dim, heads=self.heads) 42 | self.linear = nn.Linear(self._output_dim * (self.heads + 1), self._output_dim) 43 | 44 | def forward(self, x, edge_index, edge_weight): 45 | h = self.preprocessor(x) 46 | h1 = F.leaky_relu(self.gat6(h, edge_index)) 47 | out = self.linear(torch.cat([h, h1], dim=1)) 48 | return out 49 | 50 | @property 51 | def output_dim(self): 52 | return self._output_dim 53 | -------------------------------------------------------------------------------- /code_submission/models/nas_coauthorcs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-20 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import SAGEConv 9 | 10 | 11 | class NasCoauthorcs(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasCoauthorcs, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasCoauthorcsCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasCoauthorcsCell(nn.Module): 37 | # best structure:{'action': [0, 'linear', 1, 'sage', 'tanh', 'concat'], 'hyper_param': [0.005, 0.5, 1e-05, 64]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasCoauthorcsCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 45 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 46 | self.linear = nn.Linear(hidden_dim, output_dim) 47 | self.sage = SAGEConv(hidden_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | h = self.preprocessor_h(h) 53 | o1 = F.leaky_relu(self.linear(h)) 54 | o2 = F.leaky_relu(self.sage(x, edge_index, edge_weight)) 55 | o3 = F.tanh(torch.cat([o1, o2], dim=1)) 56 | return his, o3 57 | 58 | @property 59 | def output_dim(self): 60 | return self._output_dim * 2 61 | -------------------------------------------------------------------------------- /code_submission/models/nas_coauthorphy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-21 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import SAGEConv, GCNConv 9 | 10 | 11 | class NasCoauthorphy(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasCoauthorphy, self).__init__() 14 | hidden_dim = max(hidden, num_class * 2) 15 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 16 | multi_head = edge_num < 1400000 17 | self.cells = nn.ModuleList() 18 | for _ in range(num_layers): 19 | cell = NasCoauthorphyCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 20 | self.cells.append(cell) 21 | his_dim, cur_dim = cur_dim, cell.output_dim 22 | self.classifier = nn.Linear(cur_dim, num_class) 23 | 24 | self.dropout = dropout 25 | 26 | def forward(self, data): 27 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | h = x 30 | for cell in self.cells: 31 | h, x = cell(h, x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasCoauthorphyCell(nn.Module): 37 | # best structure: {'action': [0, 'linear', 1, 'gcn', 'sigmoid', 'concat'], 'hyper_param': [0.01, 0.4, 5e-05, 128]} 38 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasCoauthorphyCell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 45 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 46 | self.linear = nn.Linear(hidden_dim, output_dim) 47 | self.gcn = GCNConv(hidden_dim, output_dim) 48 | 49 | def forward(self, h, x, edge_index, edge_weight): 50 | his = x 51 | x = self.preprocessor_x(x) 52 | h = self.preprocessor_h(h) 53 | o1 = F.leaky_relu(self.linear(h)) 54 | o2 = F.leaky_relu(self.gcn(x, edge_index, edge_weight)) 55 | o3 = F.sigmoid(torch.cat([o1, o2], dim=1)) 56 | return his, o3 57 | 58 | @property 59 | def output_dim(self): 60 | return self._output_dim * 2 61 | -------------------------------------------------------------------------------- /code_submission/models/nas_cora.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torch_geometric.nn import GATConv, GCNConv, ARMAConv 5 | 6 | 7 | class NasCora(nn.Module): 8 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 9 | super(NasCora, self).__init__() 10 | hidden_dim = max(hidden, num_class * 2) 11 | multi_head = edge_num < 1400000 12 | cur_dim, hidden_dim, output_dim = features_num, hidden_dim, hidden_dim 13 | self.cells = nn.ModuleList() 14 | for _ in range(num_layers): 15 | cell = NasCoraCell(cur_dim, hidden_dim, output_dim, multi_head) 16 | self.cells.append(cell) 17 | cur_dim = cell.output_dim 18 | self.classifier = nn.Linear(cur_dim, num_class) 19 | 20 | self.dropout = dropout 21 | 22 | def forward(self, data): 23 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 24 | x = F.dropout(x, p=self.dropout, training=self.training) 25 | for cell in self.cells: 26 | x = cell(x, edge_index, edge_weight) 27 | logits = self.classifier(x) 28 | return F.log_softmax(logits, dim=-1) 29 | 30 | 31 | class NasCoraCell(nn.Module): 32 | def __init__(self, cur_dim, hidden_dim, output_dim, multi_head): 33 | super(NasCoraCell, self).__init__() 34 | self._cur_dim = cur_dim 35 | self._hidden_dim = hidden_dim 36 | self._output_dim = output_dim 37 | self.headers = 6 if multi_head else 1 38 | 39 | self.preprocessor = nn.Linear(cur_dim, hidden_dim) 40 | self.gat6 = GATConv(hidden_dim, output_dim, heads=self.headers) 41 | self.gcn0 = GCNConv(hidden_dim, output_dim) 42 | self.gcn1 = GCNConv(hidden_dim, output_dim) 43 | self.arma = ARMAConv(output_dim * self.headers, output_dim) 44 | 45 | def forward(self, x, edge_index, edge_weight): 46 | h = self.preprocessor(x) 47 | h1 = F.leaky_relu(self.gat6(h, edge_index)) 48 | h2 = F.leaky_relu(self.gcn0(h, edge_index, edge_weight=edge_weight)) 49 | h3 = F.leaky_relu(self.gcn1(h, edge_index, edge_weight=edge_weight)) 50 | h4 = F.leaky_relu(self.arma(h1, edge_index, edge_weight)) 51 | out = torch.cat([h1, h2, h3, h4], dim=1) 52 | return F.tanh(out) 53 | 54 | @property 55 | def output_dim(self): 56 | return self._output_dim * (self.headers + 3) 57 | 58 | -------------------------------------------------------------------------------- /code_submission/models/nas_phy10000.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Created by HazzaCheng on 2020-05-22 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch import nn 8 | from torch_geometric.nn import ARMAConv 9 | 10 | 11 | class NasPhy10000(nn.Module): 12 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 13 | super(NasPhy10000, self).__init__() 14 | print(f"edge num {edge_num}") 15 | hidden_dim = max(hidden, num_class * 2) 16 | cur_dim, hidden_dim, output_dim = features_num, hidden_dim, hidden_dim 17 | multi_head = edge_num < 1400000 18 | self.cells = nn.ModuleList() 19 | for _ in range(num_layers): 20 | cell = NasPhy10000Cell(cur_dim, hidden_dim, output_dim, multi_head) 21 | self.cells.append(cell) 22 | cur_dim = cell.output_dim 23 | self.classifier = nn.Linear(cur_dim, num_class) 24 | 25 | self.dropout = dropout 26 | 27 | def forward(self, data): 28 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 29 | x = F.dropout(x, p=self.dropout, training=self.training) 30 | for cell in self.cells: 31 | x = cell(x, edge_index, edge_weight) 32 | logits = self.classifier(x) 33 | return F.log_softmax(logits, dim=-1) 34 | 35 | 36 | class NasPhy10000Cell(nn.Module): 37 | # best structure:{'action': [0, 'linear', 2, 'arma', 'tanh', 'add'], 'hyper_param': [0.001, 0.5, 0.0001, 128]} 38 | def __init__(self, cur_dim, hidden_dim, output_dim, multi_head): 39 | super(NasPhy10000Cell, self).__init__() 40 | self._cur_dim = cur_dim 41 | self._hidden_dim = hidden_dim 42 | self._output_dim = output_dim 43 | 44 | self.preprocessor = nn.Linear(cur_dim, hidden_dim) 45 | self.linear = nn.Linear(hidden_dim, output_dim) 46 | self.arma = ARMAConv(output_dim, output_dim) 47 | 48 | def forward(self, x, edge_index, edge_weight): 49 | h = self.preprocessor(x) 50 | h1 = F.leaky_relu(self.linear(h)) 51 | h2 = F.leaky_relu(self.arma(h1, edge_index, edge_weight=edge_weight)) 52 | out = F.tanh(torch.add(h1, h2)) 53 | return out 54 | 55 | @property 56 | def output_dim(self): 57 | return self._output_dim 58 | -------------------------------------------------------------------------------- /code_submission/models/nas_pubmed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | from torch_geometric.nn import GATConv, ARMAConv 5 | 6 | 7 | class NasPubmed(nn.Module): 8 | def __init__(self, features_num, num_class, num_layers=2, dropout=0.5, hidden=64, edge_num=1000, **kwargs): 9 | super(NasPubmed, self).__init__() 10 | hidden_dim = max(hidden, num_class * 2) 11 | his_dim, cur_dim, hidden_dim, output_dim = features_num, features_num, hidden_dim, hidden_dim 12 | multi_head = edge_num < 1400000 13 | self.cells = nn.ModuleList() 14 | for _ in range(num_layers): 15 | cell = NasPubmedCell(his_dim, cur_dim, hidden_dim, output_dim, multi_head) 16 | self.cells.append(cell) 17 | his_dim, cur_dim = cur_dim, cell.output_dim 18 | self.classifier = nn.Linear(cur_dim, num_class) 19 | 20 | self.dropout = dropout 21 | 22 | def forward(self, data): 23 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 24 | x = F.dropout(x, p=self.dropout, training=self.training) 25 | h = x 26 | for cell in self.cells: 27 | h, x = cell(h, x, edge_index, edge_weight) 28 | logits = self.classifier(x) 29 | return F.log_softmax(logits, dim=-1) 30 | 31 | 32 | class NasPubmedCell(nn.Module): 33 | def __init__(self, his_dim, cur_dim, hidden_dim, output_dim, multi_head): 34 | super(NasPubmedCell, self).__init__() 35 | self._cur_dim = cur_dim 36 | self._hidden_dim = hidden_dim 37 | self._output_dim = output_dim 38 | 39 | self.preprocessor_h = nn.Linear(his_dim, hidden_dim) 40 | self.preprocessor_x = nn.Linear(cur_dim, hidden_dim) 41 | self.headers = 8 if multi_head else 1 42 | self.gat8 = GATConv(hidden_dim, output_dim, heads=self.headers) 43 | self.arma = ARMAConv(hidden_dim, output_dim) 44 | 45 | def forward(self, h, x, edge_index, edge_weight): 46 | his = x 47 | x = self.preprocessor_x(x) 48 | h = self.preprocessor_h(h) 49 | o3 = F.leaky_relu(self.arma(h, edge_index, edge_weight)) 50 | o4 = F.leaky_relu(self.gat8(x, edge_index)) 51 | o5 = F.tanh(torch.cat([o3, o4], dim=1)) 52 | return his, o5 53 | 54 | @property 55 | def output_dim(self): 56 | return self._output_dim * (1 + self.headers) 57 | 58 | -------------------------------------------------------------------------------- /code_submission/models/sage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.nn import TopKPooling, SAGEConv 3 | from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp 4 | import torch.nn.functional as F 5 | 6 | 7 | # 这是graph classification级别的网络 8 | class SAGE(torch.nn.Module): 9 | def __init__(self, data, device, embed_dim=128, features_num=16, num_class=2, **kwargs): 10 | super(SAGE, self).__init__() 11 | 12 | self.conv1 = SAGEConv(features_num, 128) 13 | self.pool1 = TopKPooling(128, ratio=0.8) 14 | self.conv2 = SAGEConv(128, 128) 15 | self.pool2 = TopKPooling(128, ratio=0.8) 16 | self.conv3 = SAGEConv(128, 128) 17 | self.pool3 = TopKPooling(128, ratio=0.8) 18 | self.lin1 = torch.nn.Linear(256, 128) 19 | self.lin2 = torch.nn.Linear(128, 64) 20 | self.lin3 = torch.nn.Linear(64, num_class) 21 | self.bn1 = torch.nn.BatchNorm1d(128) 22 | self.bn2 = torch.nn.BatchNorm1d(64) 23 | self.act1 = torch.nn.ReLU() 24 | self.act2 = torch.nn.ReLU() 25 | self.data = data 26 | self.item_embedding = data.x 27 | self.device = device 28 | 29 | def forward(self, indices): 30 | data = self.data 31 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 32 | print(f"batch {len(x)}") 33 | batch = torch.tensor([len(x)], dtype=torch.long).to(self.device) 34 | 35 | x = F.relu(self.conv1(x, edge_index, edge_weight=edge_weight)) 36 | 37 | # x, edge_index, _, batch, _ = self.pool1(x, edge_index, None, batch) 38 | print(f"gmp {gmp(x, batch).shape}, gap {gap(x, batch).shape}") 39 | x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1) 40 | 41 | x = F.relu(self.conv2(x, edge_index, edge_weight=edge_weight)) 42 | 43 | # x, edge_index, _, batch, _ = self.pool2(x, edge_index, None, batch) 44 | x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1) 45 | 46 | x = F.relu(self.conv3(x, edge_index, edge_weight=edge_weight)) 47 | 48 | # x, edge_index, _, batch, _ = self.pool3(x, edge_index, None, batch) 49 | x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1) 50 | 51 | x = x1 + x2 + x3 52 | print(f"x1 {x1.shape}, x2 {x2.shape}, x3 {x3.shape}") 53 | 54 | x = self.lin1(x) 55 | x = self.act1(x) 56 | x = self.lin2(x) 57 | x = self.act2(x) 58 | x = F.dropout(x, p=0.5, training=self.training) 59 | 60 | print(indices.shape) 61 | print(x.shape) 62 | x = torch.sigmoid(self.lin3(x))[indices, :] 63 | print(x.shape) 64 | 65 | return F.log_softmax(x, dim=-1) 66 | -------------------------------------------------------------------------------- /code_submission/models/simple_gcn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time: 2020/5/6 9:39 4 | # @Author: Mecthew 5 | import torch 6 | import torch.nn.functional as F 7 | from torch.nn import Linear 8 | from torch_geometric.nn import SplineConv, GCNConv 9 | 10 | 11 | class SimpleGCN(torch.nn.Module): 12 | 13 | # TODO: 网络太弱 14 | def __init__( 15 | self, num_layers=3, hidden=32, features_num=16, num_class=2, dropout=0.5, 16 | drop_edge_controller=None, 17 | **kwargs 18 | ): 19 | super(SimpleGCN, self).__init__() 20 | hidden = max(hidden, num_class * 2) 21 | # self.conv1 = SplineConv(features_num, hidden, dim=1, kernel_size=2) 22 | # self.conv2 = SplineConv(hidden, num_class, dim=1, kernel_size=2) 23 | self.conv1 = GCNConv(features_num, hidden * 2) 24 | self.conv2 = GCNConv(hidden * 2, num_class) 25 | self.dropout = dropout 26 | self.drop_edge_controller = drop_edge_controller 27 | 28 | def forward(self, data): 29 | x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight 30 | # edge_index, edge_weight = self.drop_edge_controller.drop_edges(edge_index, edge_weight, 0.2) 31 | 32 | x = F.dropout(x, p=self.dropout, training=self.training) 33 | x = F.elu(self.conv1(x, edge_index, edge_weight)) 34 | x = F.dropout(x, p=self.dropout, training=self.training) 35 | x = self.conv2(x, edge_index, edge_weight) 36 | return F.log_softmax(x, dim=1) 37 | 38 | def __repr__(self): 39 | return self.__class__.__name__ 40 | -------------------------------------------------------------------------------- /code_submission/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "extract_graph_feature", "prepredict", "lpa_predict", "feat_engineering", "is_nonnegative_integer", 3 | "feat_row_sum_inv_normalize", "get_node2vec_embedding" 4 | ] 5 | 6 | from .graph import extract_graph_feature 7 | from .prepredict import prepredict, lpa_predict, is_nonnegative_integer 8 | from .feat_engineer import feat_engineering 9 | from .feat_engineer import feat_row_sum_inv_normalize, get_node2vec_embedding 10 | -------------------------------------------------------------------------------- /code_submission/preprocessing/feat_engineer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import time 3 | 4 | import pandas as pd 5 | import numpy as np 6 | from torch_geometric.nn.models import Node2Vec 7 | import torch 8 | from torch.optim import Adam 9 | 10 | 11 | def drop_n_unique(x, n=1): 12 | drop_cols = [] 13 | for col in x: 14 | if x[col].nunique() == n: 15 | drop_cols.append(col) 16 | print(f"Drop {drop_cols} by condition (nunique={n})") 17 | all_zero = len(drop_cols) == len(x.columns) 18 | x.drop(columns=drop_cols, inplace=True, axis=1) 19 | print(f"Remain cols {x.columns}") 20 | return all_zero 21 | 22 | 23 | def count_nonzero(x): 24 | non_zero = (x != 0).sum(axis=1) 25 | if non_zero.nunique() != 1: 26 | non_zero /= non_zero.max() 27 | x['non_zero'] = non_zero 28 | 29 | 30 | def feat_engineering(x, edges=None, num_nodes=None): 31 | # TODO: out of memory 32 | all_zero = drop_n_unique(x) 33 | if all_zero: 34 | # print(f"Translate all zero to one hot encode") 35 | # x = pd.get_dummies(x.index) 36 | # return x.to_numpy() 37 | print("Use normalized weight as feature") 38 | edge_weights = np.zeros((num_nodes, num_nodes), dtype=np.float) 39 | edge_weights[edges['src_idx'], edges['dst_idx']] = edges['edge_weight'] 40 | # for i in range(num_nodes): 41 | # max_weight = np.max(edge_weights[:, i]) 42 | # min_weight = np.min(edge_weights[:, i]) 43 | # range_weight = max_weight - min_weight 44 | # if math.isclose(range_weight, 0, abs_tol=1e-4): 45 | # continue 46 | # edge_weights[:, i] = (edge_weights[:, i] - min_weight) / range_weight 47 | return edge_weights 48 | count_nonzero(x) 49 | return x.to_numpy() 50 | 51 | 52 | def feat_row_sum_inv_normalize(x): 53 | """ 54 | :param x: np.ndarray, raw features. 55 | :return: np.ndarray, normalized features 56 | """ 57 | x_feat = x.astype(dtype=np.float64) 58 | inv_x_rowsum = np.power(x_feat.sum(axis=1), -1).flatten() 59 | inv_x_rowsum[np.isinf(inv_x_rowsum)] = 0. 60 | x_diag_mat = np.diag(inv_x_rowsum) 61 | normalized_x = x_diag_mat.dot(x_feat) 62 | return normalized_x 63 | 64 | 65 | def get_node2vec_embedding(data, num_nodes, edge_index, embedding_dim=300): 66 | """ 67 | :param data: pd.DataFrame. 68 | :param num_nodes: int, number of nodes. 69 | :param edge_index: np.ndarray, shape = (2, edge_num) 70 | :return: np.ndarray, shape = (num_nodes, embedding_dim) 71 | """ 72 | t1 = time.time() 73 | edge_index = torch.tensor(edge_index) 74 | train_indices = data['train_indices'] 75 | test_indices = data['test_indices'] 76 | total_indices = sorted(train_indices + test_indices, reverse=False) 77 | train_label = data['train_label']['label'].values 78 | 79 | node2vec = Node2Vec(num_nodes=num_nodes, embedding_dim=embedding_dim, walk_length=10, 80 | context_size=5, walks_per_node=1) 81 | optimizer = Adam(node2vec.parameters(), lr=1e-1, weight_decay=1e-4) 82 | for i in range(10): 83 | optimizer.zero_grad() 84 | node2vec.forward(subset=torch.tensor(total_indices)) 85 | loss = node2vec.loss(edge_index=edge_index) 86 | loss.backward() 87 | optimizer.step() 88 | print("loss at epoch{}: {}".format(i, loss.item())) 89 | 90 | x_feats = node2vec.embedding(torch.tensor(total_indices)).detach().numpy() 91 | print("Time cost for node2vec {}s".format(time.time() - t1)) 92 | return x_feats 93 | -------------------------------------------------------------------------------- /code_submission/preprocessing/graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def is_undirected(num_node, edges): 6 | src, dist = zip(*edges) 7 | a_mat = np.zeros(shape=(num_node, num_node), dtype=np.bool) 8 | a_mat[src, dist] = True 9 | return (a_mat == a_mat.T).all() 10 | 11 | 12 | def extend_directed(edges): 13 | """ 14 | 将无向图转为有向图 15 | Args: 16 | edges: pd.DataFrame(columns=['src_idx', 'dst_idx', 'edge_weight']) 17 | 18 | Returns: 19 | undirected_edges 20 | """ 21 | edges_shadow = edges.copy() 22 | edges_shadow[['src_idx', 'dst_idx']] = edges_shadow[['dst_idx', 'src_idx']] 23 | undirected_edges = pd.concat([edges, edges_shadow], axis=0).reset_index(drop=True).drop_duplicates() 24 | return undirected_edges 25 | 26 | 27 | def extract_graph_feature(graph_df, n_class): 28 | """ 29 | 30 | Args: 31 | graph_df: { 32 | 'fea_table': pd.DataFrame['node_index', 'feat_1', ..., 'feat_n'], 33 | 'edge_file': pd.DataFrame['src_idx', 'dst_idx', 'edge_weight'], 34 | 'train_indices': list of the index of train set, 35 | 'test_indices': list of the index of test set, 36 | 'train_label': pd.DataFrame['node_index', 'label'] 37 | } 38 | n_class: num of class 39 | 40 | Returns: 41 | 42 | """ 43 | fea_table = graph_df['fea_table'].set_index(keys="node_index") 44 | edges = graph_df['edge_file'] 45 | train_indices = graph_df['train_indices'] 46 | test_indices = graph_df['test_indices'] 47 | train_label = graph_df['train_label'] 48 | 49 | edge_weight = edges['edge_weight'] 50 | in_degree = edges['dst_idx'].value_counts() 51 | out_degree = edges['src_idx'].value_counts() 52 | label_counts = train_label['label'].value_counts() 53 | 54 | (n_node, n_feature), n_edge = fea_table.shape, len(edges) 55 | n_train, n_test = len(train_indices), len(test_indices) 56 | meaning_weight = not (edge_weight == edge_weight[0]).all() 57 | max_degree, min_degree, mean_degree = in_degree.max(), in_degree.min(), in_degree.mean() 58 | max_labels, min_labels = label_counts.max(), label_counts.min() 59 | label_distribute = label_counts.sort_index(axis=0) / n_train 60 | print("label_distribute\n{}".format(label_distribute)) 61 | info = { 62 | "n_node": n_node, "n_feature": n_feature, "n_edge": n_edge, 63 | "n_class": n_class, 64 | "n_train": n_train, "n_test": n_test, 65 | "meaning_weight": meaning_weight, 66 | "max_degree": max_degree, "min_degree": min_degree, "mean_degree": mean_degree, 67 | "max_labels": max_labels / n_train, "min_labels": min_labels / n_train, 68 | # "label_distribute": label_distribute 69 | } 70 | 71 | return info 72 | -------------------------------------------------------------------------------- /code_submission/preprocessing/prepredict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time: 2020/5/14 20:41 4 | # @Author: Mecthew 5 | import time 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import scipy 10 | from sklearn.svm import LinearSVC 11 | from sklearn.linear_model import logistic 12 | from sklearn.calibration import CalibratedClassifierCV 13 | from sklearn.metrics import accuracy_score 14 | from sklearn.preprocessing import OneHotEncoder 15 | import scipy.sparse as sp 16 | from utils.logger import get_logger 17 | logger = get_logger("INFO") 18 | 19 | 20 | class SVM: 21 | def __init__(self, **kwargs): 22 | self.name = "SVM" 23 | self._model = CalibratedClassifierCV(LinearSVC(C=1.0, max_iter=500, class_weight=None, random_state=666)) 24 | 25 | def fit(self, x_train, y_train): 26 | self._model.fit(x_train, y_train) 27 | 28 | def predict(self, x_test): 29 | return self._model.predict_proba(x_test) 30 | 31 | 32 | class LR: 33 | def __init__(self, **kwargs): 34 | self.name = "LR" 35 | self._model = logistic.LogisticRegression(C=1.0, solver="liblinear", multi_class="auto", 36 | class_weight=None, max_iter=100, random_state=666) 37 | 38 | def fit(self, x_train, y_train): 39 | self._model.fit(x_train, y_train) 40 | 41 | def predict(self, x_test): 42 | return self._model.predict_proba(x_test) 43 | 44 | 45 | def prepredict(graph_df, train_indices, use_valid, use_ohe=False): 46 | t1 = time.time() 47 | fea_table = graph_df['fea_table'].set_index(keys="node_index") 48 | train_indices = train_indices 49 | if use_valid: 50 | valid_indices = list(set(graph_df['train_indices']) - set(train_indices)) 51 | test_indices = graph_df['test_indices'] + valid_indices 52 | else: 53 | test_indices = graph_df['test_indices'] 54 | train_label = graph_df['train_label'].set_index('node_index').loc[train_indices][['label']] 55 | 56 | x_train, y_train = fea_table.loc[train_indices].to_numpy(), train_label.to_numpy() 57 | x_test = fea_table.loc[test_indices].to_numpy() 58 | lr = LR() 59 | lr.fit(x_train, y_train) 60 | 61 | if use_ohe: 62 | ohe = OneHotEncoder(handle_unknown="ignore").fit(y_train.reshape(-1, 1)) 63 | x_train_feat, x_test_feat = ohe.transform(np.argmax(lr.predict(x_train), axis=1).reshape(-1, 1)).toarray(), \ 64 | ohe.transform(np.argmax(lr.predict(x_test), axis=1).reshape(-1, 1)).toarray() 65 | else: 66 | x_train_feat, x_test_feat = lr.predict(x_train), \ 67 | lr.predict(x_test) 68 | pre_feat = np.concatenate([x_train_feat, x_test_feat], axis=0) 69 | total_indices = np.concatenate([train_indices, test_indices], axis=0) 70 | 71 | train_predict = np.argmax(x_train_feat, axis=1) 72 | train_acc = accuracy_score(y_true=y_train, y_pred=train_predict) 73 | t2 = time.time() 74 | logger.info("Time cost for training {}: {}s, train acc {}".format(lr.name, t2-t1, train_acc)) 75 | 76 | return pd.DataFrame(data=pre_feat, index=total_indices) 77 | 78 | 79 | def lpa_predict(graph_df, n_class, train_indices, use_valid, max_iter=100, tol=1e-3, use_ohe=False): 80 | t1 = time.time() 81 | train_indices = train_indices 82 | if use_valid: 83 | valid_indices = list(set(graph_df['train_indices']) - set(train_indices)) 84 | test_indices = graph_df['test_indices'] + valid_indices 85 | else: 86 | test_indices = graph_df['test_indices'] 87 | train_label = graph_df['train_label'].set_index('node_index').loc[train_indices][['label']].to_numpy() 88 | print("Train label shape {}".format(train_label.shape)) 89 | train_label = train_label.reshape(-1) 90 | edges = graph_df['edge_file'][['src_idx', 'dst_idx', 'edge_weight']].to_numpy() 91 | edge_index = edges[:, :2].astype(np.int).transpose() # transpose to (2, num_edges) 92 | edge_weight = edges[:, 2].astype(np.float) 93 | num_nodes = len(train_indices) + len(test_indices) 94 | 95 | t2 = time.time() 96 | total_indices = np.concatenate([train_indices, test_indices], axis=0) 97 | adj = sp.coo_matrix((edge_weight, edge_index), shape=(num_nodes, num_nodes)).tocsr() 98 | adj = adj[total_indices] # reorder 99 | adj = adj[:, total_indices] 100 | 101 | t3 = time.time() 102 | logger.debug("Time cost for transform adj {}s".format(t3 - t2)) 103 | row_sum = np.array(adj.sum(axis=1), dtype=np.float) 104 | d_inv = np.power(row_sum, -1).flatten() 105 | d_inv[np.isinf(d_inv)] = 0. 106 | normal_adj = sp.diags(d_inv).dot(adj).tocsr().transpose() 107 | 108 | Pll = normal_adj[:len(train_indices), :len(train_indices)].copy() 109 | Plu = normal_adj[:len(train_indices), len(train_indices):].copy() 110 | Pul = normal_adj[len(train_indices):, :len(train_indices)].copy() 111 | Puu = normal_adj[len(train_indices):, len(train_indices):].copy() 112 | label_mat = np.eye(n_class)[train_label] 113 | label_mat_prob = label_mat.copy() 114 | print("Pul shape {}, label_mat shape {}".format(Pul.shape, label_mat_prob.shape)) 115 | 116 | Pul_dot_lable_mat = Pul.dot(label_mat) 117 | unlabel_mat = np.zeros(shape=(len(test_indices), n_class)) 118 | iter, changed = 0, np.inf 119 | t4 = time.time() 120 | logger.debug("Time cost for prepare matrix {}s".format(t4-t3)) 121 | while iter < max_iter and changed > tol: 122 | if iter % 10 == 0: 123 | logger.debug("---> Iteration %d/%d, changed: %f" % (iter, max_iter, changed)) 124 | 125 | iter += 1 126 | pre_unlabel_mat = unlabel_mat 127 | unlabel_mat = Puu.dot(unlabel_mat) + Pul_dot_lable_mat 128 | label_mat_prob = Pll.dot(label_mat_prob) + Plu.dot(pre_unlabel_mat) 129 | changed = np.abs(pre_unlabel_mat - unlabel_mat).sum() 130 | logger.debug("Time cost for training lpa {}".format(time.time() - t4)) 131 | # preds = np.argmax(np.array(unlabel_mat), axis=1) 132 | # unlabel_mat = np.eye(n_class)[preds] 133 | train_acc = accuracy_score(y_true=train_label, y_pred=np.argmax(label_mat_prob, axis=1)) 134 | logger.info("LPA training acc {}".format(train_acc)) 135 | logger.info("Time cost for LPA {}s".format(time.time() - t1)) 136 | total_indices = np.concatenate([train_indices, test_indices], axis=0) 137 | if use_ohe: 138 | ohe = OneHotEncoder(handle_unknown="ignore").fit(train_label.reshape(-1, 1)) 139 | label_mat_ohe = ohe.transform(np.argmax(label_mat_prob, axis=1).reshape(-1, 1)).toarray() 140 | unlabel_mat_ohe = ohe.transform(np.argmax(unlabel_mat, axis=1).reshape(-1, 1)).toarray() 141 | lu_mat_ohe = np.concatenate([label_mat_ohe, unlabel_mat_ohe], axis=0) 142 | return pd.DataFrame(data=lu_mat_ohe, index=total_indices), train_acc 143 | else: 144 | unlabel_mat_prob = unlabel_mat 145 | lu_mat_prob = np.concatenate([label_mat_prob, unlabel_mat_prob], axis=0) 146 | return pd.DataFrame(data=lu_mat_prob, index=total_indices), train_acc 147 | 148 | 149 | def is_nonnegative_integer(x_feats): 150 | is_nonnegative = (x_feats >= 0).all() 151 | is_integer = True 152 | for feat in x_feats: 153 | feat_int_sum = np.array(feat, dtype=np.int).sum() 154 | feat_sum = np.array(feat, dtype=np.float).sum() 155 | is_integer = (feat_int_sum == feat_sum) 156 | if is_integer is False: 157 | break 158 | return is_nonnegative and is_integer 159 | -------------------------------------------------------------------------------- /code_submission/utils/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "get_logger", "torch_train", "GraphDataset", "Sampler", "get_time_budget", "set_time_budget", 3 | "GraphSampleDataset", "TimeOutError" 4 | ] 5 | 6 | from .logger import get_logger 7 | from .train import torch_train 8 | from .data import GraphDataset, GraphSampleDataset, Sampler 9 | from .timer import set_time_budget, get_time_budget, TimeOutError 10 | -------------------------------------------------------------------------------- /code_submission/utils/callbacks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils import get_logger 3 | 4 | import copy 5 | 6 | logger = get_logger("INFO") 7 | 8 | BEST_VALID_TOP_NUM = 3 9 | 10 | 11 | class Callback: 12 | def __init__(self): pass 13 | def on_train_begin(self, *args, **kwargs): pass 14 | def on_train_end(self, *args, **kwargs): pass 15 | def on_epoch_begin(self, *args, **kwargs): pass 16 | def on_epoch_end(self, *args, **kwargs): pass 17 | def on_batch_begin(self, *args, **kwargs): pass 18 | def on_batch_end(self, *args, **kwargs): pass 19 | def on_loss_begin(self, *args, **kwargs): pass 20 | def on_loss_end(self, *args, **kwargs): pass 21 | def on_step_begin(self, *args, **kwargs): pass 22 | def on_step_end(self, *args, **kwargs): pass 23 | 24 | 25 | class EarlyStopping(Callback): 26 | def __init__(self, patience=5, tol=0.001, min_epochs=1, use_adaptive_topK=False): 27 | super(EarlyStopping, self).__init__() 28 | self.patience = patience 29 | self.tol = tol 30 | self.best = -0.1 31 | # self.best = np.inf 32 | self.best_epoch = -1 33 | self.wait = 0 34 | self.stopped_epoch = -1 35 | # self.threshold = threshold 36 | self.min_epochs= min_epochs 37 | self.topK_list = [] 38 | self.use_adaptive_topK = use_adaptive_topK 39 | self.loopn_best = { 40 | "pred": None, 41 | "acc": -1.0, 42 | "loss": 9999 43 | } 44 | 45 | def on_epoch_end(self, epoch, val_acc, epoch_loss, y_hat): 46 | use_adaptive_topK = self.use_adaptive_topK 47 | val_loss = min(1.0, val_acc + self.tol) 48 | if use_adaptive_topK: 49 | is_add = self.add_into_adaptive_topK(epoch, y_hat, val_acc, epoch_loss) 50 | else: 51 | self.topK_list = self.add_into_topK(self.topK_list, y_hat, val_acc, epoch_loss) 52 | 53 | if val_acc > self.best and self.best < 0.999: 54 | self.best = max(val_loss - self.tol, self.best) 55 | self.best_epoch = epoch 56 | self.wait = 0 57 | else: 58 | self.wait += 1 59 | if self.wait >= self.patience and epoch > self.min_epochs: 60 | self.stopped_epoch = epoch 61 | logger.warning( 62 | f"Early stopping conditioned on val_acc patience {self.patience} " 63 | f"in epoch {self.stopped_epoch}. " 64 | f"Metric is {val_acc}, best {self.best} in epoch {self.best_epoch}" 65 | ) 66 | if use_adaptive_topK: 67 | if is_add is False: 68 | self.add_into_adaptive_topK(epoch, y_hat, val_acc, epoch_loss, early_stop=True) 69 | return True 70 | return False 71 | 72 | def add_into_topK(self, topK_list, y_hat, acc, loss): 73 | valid_dict = { 74 | "pred": None, 75 | "acc": acc, 76 | "loss": loss 77 | } 78 | 79 | if len(topK_list) < BEST_VALID_TOP_NUM: 80 | valid_dict["pred"] = y_hat 81 | topK_list.append(valid_dict) 82 | return topK_list 83 | if (acc <= topK_list[-1]["acc"]) or ((acc == topK_list[-1]["acc"]) and (loss > topK_list[-1]["loss"])): 84 | return topK_list 85 | valid_dict["pred"] = y_hat 86 | topK_list[-1] = valid_dict 87 | topK_list = sorted(topK_list, key=lambda x: (-x["acc"], x["loss"])) 88 | return topK_list 89 | 90 | def add_into_adaptive_topK(self, epoch, y_hat, acc, loss, early_stop=False): 91 | if early_stop or (epoch > 0 and (epoch + 1) % 10 == 0): 92 | # self.topK_list.append(self.loopn_best) 93 | self.topK_list = self.add_into_topK(self.topK_list, self.loopn_best["pred"], self.loopn_best["acc"], self.loopn_best["loss"]) 94 | self.loopn_best = { 95 | "pred": None, 96 | "acc": -1.0, 97 | "loss": 9999 98 | } 99 | return True 100 | else: 101 | valid_dict = { 102 | "pred": None, 103 | "acc": acc, 104 | "loss": loss 105 | } 106 | if (acc > self.loopn_best["acc"]) or ((acc == self.loopn_best["acc"]) and (loss < self.loopn_best["loss"])): 107 | valid_dict["pred"] = y_hat 108 | self.loopn_best = valid_dict 109 | return False 110 | -------------------------------------------------------------------------------- /code_submission/utils/data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.utils.data import Dataset 4 | from torch_geometric.data import Data 5 | from collections import defaultdict 6 | 7 | from utils import get_logger 8 | 9 | logger = get_logger("DEBUG") 10 | 11 | 12 | class GraphDataset(Dataset): 13 | 14 | def __init__(self, data): 15 | super(GraphDataset, self).__init__() 16 | self.data = data 17 | self.length = data.train_mask.sum() 18 | logger.info(f"Graph dataset: length({self.length})") 19 | self.indices = (data.train_mask == 1).nonzero() 20 | 21 | def __getitem__(self, index: int): 22 | index = self.indices[index] 23 | return index 24 | 25 | def __len__(self) -> int: 26 | return self.length 27 | 28 | def resample(self): 29 | return self 30 | 31 | 32 | class GraphSampleDataset(Dataset): 33 | 34 | def __init__( 35 | self, data, n_class, y_train, 36 | ): 37 | super(GraphSampleDataset, self).__init__() 38 | self.data = data 39 | self.class_info = self._init_info(n_class, y_train) 40 | self.train_indices = (data.train_mask == 1).nonzero() 41 | self.indices, self.length = None, None 42 | self.n_class = n_class 43 | self.min_num, self.max_num, self.sample_nums = self._init_sample_num( 44 | len(y_train) / n_class 45 | ) 46 | 47 | def _init_sample_num(self, n_mean): 48 | num = [self.class_info[i]['num'] for i in range(self.n_class)] 49 | n_min = min(num) 50 | n_max = max(num) 51 | n_max = int(min(n_max * 0.8, n_mean * 1.2)) 52 | n_min = int(max(n_min * 1.5, n_mean * 0.5)) 53 | sample_nums = [max(n_min, min(n_max, ele)) for ele in num] 54 | for i in range(self.n_class): 55 | print(f"Sample {sample_nums[i]} / {num[i]} from class {i}") 56 | return n_min, n_max, sample_nums 57 | 58 | def resample(self): 59 | self.indices = self._init_indices(self.n_class) 60 | self.length = len(self.indices) 61 | return self 62 | 63 | def _init_indices(self, n_class): 64 | all_indices = [] 65 | for i in range(n_class): 66 | num = self.class_info[i]['num'] 67 | indices = self.class_info[i]['indices'] 68 | sampled_indices = [] 69 | sampled_num = self.sample_nums[i] 70 | while sampled_num > 0: 71 | cur = min(sampled_num, num) 72 | sampled_indices.append(np.random.permutation(indices)[:cur]) 73 | sampled_num -= cur 74 | all_indices += sampled_indices 75 | return np.concatenate(all_indices) 76 | 77 | def _init_info(self, n_class, y_train): 78 | class_info = defaultdict(dict) 79 | for i in range(n_class): 80 | indices = np.where(y_train[:] == i)[0] 81 | num = len(indices) 82 | class_info[i]['num'] = num 83 | class_info[i]['indices'] = indices 84 | return class_info 85 | 86 | def __getitem__(self, index: int): 87 | index = self.train_indices[self.indices[index]] 88 | return index 89 | 90 | def __len__(self): 91 | return self.length 92 | 93 | 94 | class Sampler: 95 | def __init__(self, data, num_edges, device): 96 | self.data = data 97 | self.device = device 98 | self._origin_num_edges = num_edges 99 | self.adj, self.unique_edges, self.num_edges = None, None, None 100 | 101 | def _construct_adj(self): 102 | self.adj, self.unique_edges = Sampler.__construct_adj(self._origin_num_edges, self.data.edge_index) 103 | self.num_edges = len(self.unique_edges) 104 | print(f"num edge {self._origin_num_edges}, unique edge {self.num_edges}") 105 | 106 | @staticmethod 107 | def __construct_adj(num_edges, edges_tensor): 108 | unique = np.zeros(num_edges, dtype=np.bool) 109 | adj = np.zeros(num_edges, dtype=np.int) 110 | edges = edges_tensor.cpu().numpy() 111 | edges_dict = defaultdict(lambda: 0) 112 | for i in range(num_edges): 113 | if not (edges[1, i], edges[0, i]) in edges_dict: 114 | unique[i] = True 115 | edges_dict[(edges[0, i], edges[1, i])] = i 116 | for i in range(num_edges): 117 | adj[i] = edges_dict[(edges[1, i], edges[0, i])] 118 | del edges_dict 119 | return adj, np.argwhere(unique) 120 | 121 | def stub_sampler(self): 122 | return self.data.to(self.device) 123 | 124 | def _make_undirected(self, edge_index): 125 | symmetry = self.adj[edge_index] 126 | undirected = np.union1d(edge_index, symmetry) 127 | print(f"Before undirected {len(edge_index)}, after undirected {len(undirected)}") 128 | return undirected 129 | 130 | def random_edge_sampler(self, percent=1.0): 131 | """ 132 | Randomly drop edge 133 | Args: 134 | percent: preserve edges' percent 135 | 136 | Returns: data 137 | 138 | """ 139 | 140 | if percent >= 1.0: 141 | return self.stub_sampler() 142 | 143 | if self.adj is None: 144 | self._construct_adj() 145 | 146 | data = self.data 147 | num_preserved_edges = int(percent * self.num_edges) 148 | perm = self.unique_edges[np.random.permutation(self.num_edges)[:num_preserved_edges]] 149 | perm = self._make_undirected(perm) 150 | random_data = Data( 151 | x=data.x, y=data.y, 152 | train_indices=data.train_indices, train_mask=data.train_mask, 153 | test_indices=data.test_indices, test_mask=data.test_mask, 154 | edge_index=data.edge_index[:, perm], edge_weight=data.edge_weight[perm] 155 | ) 156 | if hasattr(data, "valid_indices"): 157 | random_data.valid_indices = data.valid_indices 158 | random_data.valid_mask = data.valid_mask 159 | return random_data.to(self.device) 160 | -------------------------------------------------------------------------------- /code_submission/utils/drop_edge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time: 2020/5/12 23:20 4 | # @Author: Mecthew 5 | import numpy as np 6 | import torch_geometric.utils as gtils 7 | 8 | 9 | class DropEdgeEachStep: 10 | def __init__(self, adj, unique_edges): 11 | self.adj = adj 12 | self.unique_edges = unique_edges 13 | self.counter = 0 14 | 15 | def drop_edges(self, edge_index, edge_weight, drop_rate=0.2): 16 | num_edges = len(self.unique_edges) 17 | num_preserved_edges = int(num_edges * (1-drop_rate)) 18 | preserved_edges_idx = self.unique_edges[np.random.permutation(num_edges)[:num_preserved_edges]] 19 | perm = self._make_undirected(preserved_edges_idx) 20 | if self.counter == 0: 21 | print(f"Is undirected after drop edges: {gtils.is_undirected(edge_index[:, perm])}") 22 | self.counter += 1 23 | return edge_index[:, perm], edge_weight[perm] 24 | 25 | def _make_undirected(self, edge_index): 26 | symmetry = self.adj[edge_index] 27 | undirected = np.union1d(edge_index, symmetry) 28 | return undirected 29 | -------------------------------------------------------------------------------- /code_submission/utils/ensemble.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def get_top_models_by_std(models_info, ensemble_std_threshold=1e-2): 5 | """ 6 | select model by std 7 | Args: 8 | models_info: (model, metric), where smaller metric indicates better model 9 | ensemble_std_threshold: std threshold 10 | Returns: 11 | 12 | """ 13 | pred, metrics = zip(*sorted(models_info, key=lambda x: -x[1])) 14 | 15 | print("sorted model metrics:") 16 | top_num = 0 17 | for i in range(len(metrics)): 18 | print("metrics: {}".format(metrics[i])) 19 | std = np.std(metrics[:i]) 20 | top_num = i 21 | if std > ensemble_std_threshold: 22 | break 23 | pred = pred[:top_num] 24 | metrics = np.asarray(metrics[:top_num]) 25 | metrics = metrics + 15 * (metrics - metrics.mean()) 26 | # metrics[np.where(metrics > 0.01)] = 0.01 27 | weights = metrics / metrics.sum() 28 | return list(zip(pred, weights)) 29 | 30 | 31 | def get_top_models_by_r(models_info, range_threshold=1e-2): 32 | """ 33 | select model by std 34 | Args: 35 | models_info: (model, metric), where smaller metric indicates better model 36 | range_threshold: range threshold 37 | Returns: 38 | 39 | """ 40 | pred, metrics, model_name = zip(*sorted(models_info, key=lambda x: -x[1])) 41 | 42 | print("sorted model metrics:") 43 | top_num = 0 44 | for i in range(len(metrics)): 45 | print("metrics: {}\tmodel_name: {}".format(metrics[i], model_name[i])) 46 | r = np.abs(metrics[0] - metrics[i]) 47 | top_num = i 48 | if r > range_threshold: 49 | break 50 | if i == len(metrics)-1: 51 | top_num = i + 1 52 | pred = pred[:top_num] 53 | metrics = np.asarray(metrics[:top_num]) 54 | metrics = metrics + 15 * (metrics - metrics.mean()) 55 | weights = metrics / metrics.sum() 56 | return list(zip(pred, weights)) 57 | -------------------------------------------------------------------------------- /code_submission/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | 4 | 5 | def get_logger(verbosity_level, use_error_log=False): 6 | logger = logging.getLogger(__file__) 7 | logging_level = getattr(logging, verbosity_level) 8 | logger.setLevel(logging_level) 9 | formatter = logging.Formatter( 10 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s') 11 | if not logger.handlers: 12 | stdout_handler = logging.StreamHandler(sys.stdout) 13 | stdout_handler.setLevel(logging_level) 14 | stdout_handler.setFormatter(formatter) 15 | logger.addHandler(stdout_handler) 16 | if use_error_log: 17 | stderr_handler = logging.StreamHandler(sys.stderr) 18 | stderr_handler.setLevel(logging.WARNING) 19 | stderr_handler.setFormatter(formatter) 20 | logger.addHandler(stderr_handler) 21 | logger.propagate = False 22 | return logger 23 | -------------------------------------------------------------------------------- /code_submission/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | # import functools 3 | # import multiprocessing 4 | 5 | 6 | TIME_BUDGET = None 7 | 8 | # multiprocessing.get_context("forkserver") 9 | # manager = multiprocessing.Manager() 10 | 11 | 12 | def set_time_budget(time_budget): 13 | global TIME_BUDGET 14 | TIME_BUDGET = TimeBudget(time_budget) 15 | 16 | 17 | def get_time_budget(): 18 | global TIME_BUDGET 19 | return TIME_BUDGET 20 | 21 | 22 | def _wrapped_method(m, return_dict, args, kwargs): 23 | res = m(*args, **kwargs) 24 | return_dict["res"] = res 25 | return return_dict 26 | 27 | 28 | # def time_limit(milliseconds=1000): 29 | # def wrapper(method): 30 | # @functools.wraps(method) 31 | # def timed(*args, **kwargs): 32 | # global TIME_BUDGET 33 | # time_budget = TIME_BUDGET 34 | # return_dict = manager.dict() 35 | # 36 | # p = multiprocessing.Process( 37 | # target=_wrapped_method, 38 | # args=(method, return_dict, args, kwargs), 39 | # ) 40 | # p.start() 41 | # # use 80% time for fitting 42 | # running_time = min(time_budget.remain * 0.8, milliseconds) 43 | # p.join(running_time) 44 | # if p.is_alive(): 45 | # p.kill() 46 | # print(f"Task exceeds the time budget {running_time} and has been cancelled") 47 | # res = None 48 | # else: 49 | # res = return_dict.get("res") 50 | # p.terminate() 51 | # print(f"After running, there is {time_budget.remain: .4f}s remaining time") 52 | # return res 53 | # return timed 54 | # return wrapper 55 | 56 | 57 | class TimeOutError(Exception): 58 | pass 59 | 60 | 61 | class TimeBudget: 62 | def __init__(self, time_budget): 63 | self._time_budget = time_budget 64 | self._start_time = time.time() 65 | 66 | def reset(self): 67 | self._start_time = time.time() 68 | 69 | @property 70 | def remain(self): 71 | escape_time = time.time() - self._start_time 72 | return self._time_budget - escape_time 73 | 74 | @remain.setter 75 | def remain(self, value): 76 | self._time_budget = value 77 | 78 | def timing(self, seconds=None, frac=1.0): 79 | if seconds is None: 80 | seconds = self.remain * frac 81 | else: 82 | seconds = min(seconds, self.remain * frac) 83 | return TimeBudget(seconds) 84 | 85 | def check(self): 86 | if self.remain < 0: 87 | raise TimeOutError(f"Time out {self.remain: 0.4f}") 88 | 89 | def __add__(self, other): 90 | # self._time_budget += other 91 | return self 92 | 93 | def __sub__(self, other): 94 | # self._time_budget -= other 95 | return self 96 | 97 | def __str__(self): 98 | return str(self.remain) 99 | 100 | def __repr__(self): 101 | return repr(self.remain) 102 | 103 | def __format__(self, format_spec): 104 | return format(self.remain, format_spec) 105 | -------------------------------------------------------------------------------- /code_submission/utils/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from tqdm import tqdm 4 | from torch.utils.data import DataLoader 5 | from sklearn.metrics import accuracy_score 6 | 7 | from .logger import get_logger 8 | from .timer import get_time_budget 9 | from utils.callbacks import EarlyStopping 10 | 11 | logger = get_logger("DEBUG") 12 | 13 | 14 | def get_accuracy(y_hat, indices, data): 15 | accuracy = accuracy_score( 16 | data.y[indices].cpu().numpy(), 17 | y_hat[indices].argmax(axis=1).reshape(-1)) 18 | return accuracy 19 | 20 | 21 | def torch_train( 22 | data, dataset, model, optimizer, loss_func, 23 | epochs=512, batch_size=32, patience=5, 24 | clip_grad=0, 25 | min_epochs=1, 26 | valid_indices=None, all_data=False, 27 | use_adaptive_topK=False, model_topK=None, 28 | time_budget=None 29 | ): 30 | early_stopping_cb = EarlyStopping(patience=patience, min_epochs=min_epochs, use_adaptive_topK=use_adaptive_topK) 31 | early_stopping_cb.topK_list = model_topK 32 | # on epoch begin 33 | with tqdm(total=epochs) as t: 34 | try: 35 | for i in range(epochs): 36 | data_loader = DataLoader(dataset.resample(), batch_size=batch_size, shuffle=True) 37 | model.train() 38 | epoch_loss = 0 39 | for indices in data_loader: 40 | # on batch begin 41 | optimizer.zero_grad() 42 | y_hat = model(data) 43 | loss = loss_func(y_hat[indices].squeeze(), data.y[indices].squeeze()) 44 | loss.backward() 45 | if clip_grad > 0: 46 | for p in model.parameters(): 47 | nn.utils.clip_grad_norm_(p, clip_grad) 48 | optimizer.step() 49 | # on batch end 50 | epoch_loss += loss.item() 51 | 52 | model.eval() 53 | with torch.no_grad(): 54 | y_hat = model(data) 55 | y_hat = y_hat.cpu().numpy() 56 | 57 | # on epoch end 58 | if valid_indices is not None: 59 | valid_acc = get_accuracy(y_hat, valid_indices, data) 60 | else: 61 | valid_acc = get_accuracy(y_hat, data.train_indices, data) 62 | 63 | t.set_postfix( 64 | Epoch=f"{i: 03,d}", 65 | loss=f"{epoch_loss: 0.5f}", 66 | acc=f"{valid_acc: 0.5f}", 67 | patience=f"{early_stopping_cb.wait: 03,d}/{early_stopping_cb.patience}" 68 | ) 69 | t.update(1) 70 | 71 | if early_stopping_cb.on_epoch_end(i, valid_acc, epoch_loss, y_hat): 72 | break 73 | try: 74 | time_budget.check() 75 | except Exception as e: 76 | print(e) 77 | return early_stopping_cb.topK_list, -early_stopping_cb.best, "time_exceed" 78 | except RuntimeError as exception: 79 | if "out of memory" in str(exception): 80 | logger.info("we met cuda out of memory") 81 | return early_stopping_cb.topK_list, -early_stopping_cb.best, "oom" 82 | else: 83 | raise exception 84 | return early_stopping_cb.topK_list, -early_stopping_cb.best, None 85 | -------------------------------------------------------------------------------- /ingestion/common.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation, broad-except 2 | """common""" 3 | import logging 4 | import importlib 5 | import sys 6 | 7 | 8 | class ModelApiError(Exception): 9 | """Model api error""" 10 | 11 | 12 | def get_logger(verbosity_level, name, use_error_log=False): 13 | """Set logging format to something like: 14 | 2019-04-25 12:52:51,924 INFO score.py: 15 | """ 16 | logger = logging.getLogger(name) 17 | logging_level = getattr(logging, verbosity_level) 18 | logger.setLevel(logging_level) 19 | formatter = logging.Formatter( 20 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s') 21 | stdout_handler = logging.StreamHandler(sys.stdout) 22 | stdout_handler.setLevel(logging_level) 23 | stdout_handler.setFormatter(formatter) 24 | logger.addHandler(stdout_handler) 25 | if use_error_log: 26 | stderr_handler = logging.StreamHandler(sys.stderr) 27 | stderr_handler.setLevel(logging.WARNING) 28 | stderr_handler.setFormatter(formatter) 29 | logger.addHandler(stderr_handler) 30 | logger.propagate = False 31 | return logger 32 | 33 | 34 | VERBOSITY_LEVEL = 'INFO' 35 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 36 | METHOD_LIST = ['train_predict'] 37 | 38 | 39 | def _check_umodel_methed(umodel): 40 | # Check if the model has methods in METHOD_LIST 41 | for attr in ['train_predict']: 42 | if not hasattr(umodel, attr): 43 | raise ModelApiError( 44 | f"Your model object doesn't have the method attr") 45 | 46 | 47 | def import_umodel(): 48 | """import user model""" 49 | model_cls = importlib.import_module('model').Model 50 | _check_umodel_methed(model_cls) 51 | 52 | return model_cls 53 | 54 | 55 | def init_usermodel(): 56 | """initialize user model""" 57 | return import_umodel()() 58 | -------------------------------------------------------------------------------- /ingestion/dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | AutoWSL datasets. 3 | """ 4 | import copy 5 | from os.path import join 6 | from datetime import datetime 7 | import numpy as np 8 | import pandas as pd 9 | import yaml 10 | from common import get_logger 11 | 12 | TYPE_MAP = { 13 | 'cat': str, 14 | 'multi-cat': str, 15 | 'str': str, 16 | 'num': np.float64, 17 | 'timestamp': 'str' 18 | } 19 | 20 | VERBOSITY_LEVEL = 'WARNING' 21 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 22 | TIMESTAMP_TYPE_NAME = 'timestamp' 23 | TRAIN_FILE = 'train_node_id.txt' 24 | TRAIN_LABEL = 'train_label.tsv' 25 | TEST_FILE = 'test_node_id.txt' 26 | INFO_FILE = 'config.yml' 27 | FEA_TABLE = 'feature.tsv' 28 | EDGE_FILE = 'edge.tsv' 29 | 30 | SEP = '\t' 31 | 32 | 33 | def _date_parser(millisecs): 34 | if np.isnan(float(millisecs)): 35 | return millisecs 36 | 37 | return datetime.fromtimestamp(float(millisecs)) 38 | 39 | 40 | class Dataset: 41 | """"Dataset""" 42 | def __init__(self, dataset_dir): 43 | """ 44 | train_dataset, test_dataset: list of strings 45 | train_label: np.array 46 | """ 47 | self.dataset_dir_ = dataset_dir 48 | self.metadata_ = self._read_metadata(join(dataset_dir, INFO_FILE)) 49 | self.edge_data = None 50 | self.train_indices = None 51 | self.train_label = None 52 | self.test_indices = None 53 | self.fea_table = None 54 | self.get_data() 55 | 56 | def get_data(self): 57 | """get all training data""" 58 | data = { 59 | 'fea_table': self.get_fea_table(), 60 | 'edge_file': self.get_edge(), 61 | 'train_indices': self.get_train_indices(), 62 | 'test_indices': self.get_test_indices(), 63 | 'train_label': self.get_train_label(), 64 | } 65 | return data 66 | 67 | def get_fea_table(self): 68 | """get train""" 69 | if self.fea_table is None: 70 | self.fea_table = self._read_dataset( 71 | join(self.dataset_dir_, FEA_TABLE)) 72 | return self.fea_table 73 | 74 | def get_edge(self): 75 | """get edge file""" 76 | dtype = { 77 | 'src_id': int, 78 | 'dst_idx': int, 79 | 'edge_weight': float 80 | } 81 | if self.edge_data is None: 82 | self.edge_data = pd.read_csv( 83 | join(self.dataset_dir_, EDGE_FILE), dtype=dtype, sep=SEP) 84 | return self.edge_data 85 | 86 | def get_train_label(self): 87 | """get train label""" 88 | dtype = { 89 | 'node_index': int, 90 | 'label': int, 91 | } 92 | if self.train_label is None: 93 | self.train_label = pd.read_csv( 94 | join(self.dataset_dir_, TRAIN_LABEL), dtype=dtype, sep=SEP) 95 | 96 | return self.train_label 97 | 98 | def get_test_indices(self): 99 | """get test index file""" 100 | if self.test_indices is None: 101 | with open(join(self.dataset_dir_, TEST_FILE), 'r') as ftmp: 102 | self.test_indices = [int(line.strip()) for line in ftmp] 103 | 104 | return self.test_indices 105 | 106 | def get_train_indices(self): 107 | """get train index file""" 108 | if self.train_indices is None: 109 | with open(join(self.dataset_dir_, TRAIN_FILE), 'r') as ftmp: 110 | self.train_indices = [int(line.strip()) for line in ftmp] 111 | 112 | return self.train_indices 113 | 114 | def get_metadata(self): 115 | """get metadata""" 116 | return copy.deepcopy(self.metadata_) 117 | 118 | @staticmethod 119 | def _read_metadata(metadata_path): 120 | with open(metadata_path, 'r') as ftmp: 121 | return yaml.safe_load(ftmp) 122 | 123 | def _read_dataset(self, dataset_path): 124 | schema = self.metadata_['schema'] 125 | if isinstance(schema, dict): 126 | table_dtype = {key: TYPE_MAP[val] for key, val in schema.items()} 127 | date_list = [key for key, val in schema.items() 128 | if val == TIMESTAMP_TYPE_NAME] 129 | dataset = pd.read_csv( 130 | dataset_path, sep=SEP, dtype=table_dtype, 131 | parse_dates=date_list, date_parser=_date_parser) 132 | else: 133 | dataset = pd.read_csv(dataset_path, sep=SEP) 134 | 135 | return dataset 136 | -------------------------------------------------------------------------------- /ingestion/metadata: -------------------------------------------------------------------------------- 1 | command: python $ingestion_program/ingestion.py --dataset_dir=$input --output_dir=$predictions --ingestion_program_dir=$ingestion_program --code_dir=$submission_program --score_dir=$output --temp_dir=$tmp 2 | -------------------------------------------------------------------------------- /ingestion/timing.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation, broad-except 2 | """common""" 3 | import signal 4 | import math 5 | import time 6 | from contextlib import contextmanager 7 | import numpy as np 8 | import yaml 9 | from common import get_logger 10 | 11 | VERBOSITY_LEVEL = 'INFO' 12 | LOGGER = get_logger(VERBOSITY_LEVEL, __file__) 13 | 14 | CUM = 0 15 | RESET = 1 16 | MODES = set([CUM, RESET]) 17 | 18 | 19 | OP_MAP = { 20 | 'mean': np.mean, 21 | 'max': np.max, 22 | 'std': np.std, 23 | 'sum': sum, 24 | } 25 | 26 | 27 | class TimeoutException(Exception): 28 | """timeoutexception""" 29 | 30 | 31 | class Timer: 32 | """timer""" 33 | def __init__(self): 34 | self.total = {} 35 | self.history = {} 36 | self.modes = {} 37 | 38 | @classmethod 39 | def from_file(cls, save_file): 40 | """contruct timer from a save file""" 41 | timer = Timer() 42 | timer.load(save_file) 43 | return timer 44 | 45 | def add_process(self, pname, time_budget, mode=RESET): 46 | """set time_budget 47 | mode: CUM/RESET 48 | """ 49 | if pname in self.total: 50 | raise ValueError(f"Existing process of timer: {pname}") 51 | if mode not in MODES: 52 | raise ValueError(f"wrong process mode: {mode}") 53 | 54 | self.total[pname] = time_budget 55 | self.history[pname] = [] 56 | self.modes[pname] = mode 57 | 58 | @contextmanager 59 | def time_limit(self, pname, verbose=True): 60 | """limit time""" 61 | def signal_handler(signum, frame): 62 | raise TimeoutException(f"{pname}: Timed out!") 63 | signal.signal(signal.SIGALRM, signal_handler) 64 | time_budget = int(math.ceil(self.get_remain(pname))) 65 | signal.alarm(time_budget) 66 | start_time = time.time() 67 | 68 | try: 69 | 70 | if verbose: 71 | LOGGER.info(f'start {pname} with time budget {time_budget}') 72 | yield 73 | finally: 74 | exec_time = time.time() - start_time 75 | signal.alarm(0) 76 | self.history[pname].append(exec_time) 77 | 78 | if verbose: 79 | LOGGER.info(f'{pname} success, time spent {exec_time} sec') 80 | 81 | if self.get_remain(pname) <= 0: 82 | raise TimeoutException(f"{pname}: Timed out!") 83 | 84 | def get_remain(self, pname): 85 | """get remaining time of process""" 86 | if self.modes[pname] == CUM: 87 | remain = self.total[pname] - sum(self.history[pname]) 88 | else: 89 | remain = self.total[pname] 90 | 91 | return remain 92 | 93 | def get_all_remain(self): 94 | """get remaining time of process""" 95 | return {key: self.get_remain(key) for key in self.total.keys()} 96 | 97 | def get_stats(self, pname): 98 | """get stats of timing history""" 99 | result = {} 100 | for stat in ['sum', 'mean', 'max', 'std']: 101 | history = self.history[pname] 102 | if history: 103 | result[stat] = float(OP_MAP[stat](self.history[pname])) 104 | else: 105 | result[stat] = 0 106 | return result 107 | 108 | def get_overall_duration(self): 109 | """get overall duration""" 110 | duration = 0 111 | for _, value in self.history.items(): 112 | duration += sum(value) 113 | return duration 114 | 115 | def get_all_stats(self): 116 | """get all stats of timing history""" 117 | stats = {pname: self.get_stats(pname) for pname in self.total.keys()} 118 | return stats 119 | 120 | def save(self, save_file): 121 | """save timer""" 122 | save_content = { 123 | 'total': self.total, 124 | 'history': self.history, 125 | 'modes': self.modes 126 | } 127 | with open(save_file, 'w') as ftmp: 128 | yaml.dump(save_content, ftmp) 129 | 130 | def load(self, save_file): 131 | """load timer""" 132 | with open(save_file, 'r') as ftmp: 133 | save_content = yaml.safe_load(ftmp) 134 | self.total = save_content['total'] 135 | self.history = save_content['history'] 136 | self.modes = save_content['modes'] 137 | -------------------------------------------------------------------------------- /meta_run.sh: -------------------------------------------------------------------------------- 1 | !/bin/bash 2 | 3 | log_folder="${pwd}/log_output/$1_output" 4 | 5 | if [ ! -x $log_folder ]; then 6 | mkdir $log_folder 7 | fi 8 | 9 | run_times=$2 10 | IFS=" " 11 | datasets=($@) 12 | unset datasets[0] 13 | unset datasets[1] 14 | 15 | echo $datasets 16 | 17 | for (( i=1; i <= run_times; i++)) 18 | do 19 | for dataset in ${datasets[@]} 20 | do 21 | if [[ $dataset =~ "/" ]];then # 针对new-data/co-az类型创建一个目录new-data 22 | par_dir=(${dataset//// }[0]) 23 | new_dir=${log_folder}"/"$par_dir 24 | echo "Make dir: $new_dir" 25 | if [ ! -x "$new_dir" ]; then 26 | mkdir "$new_dir" 27 | fi 28 | fi 29 | dataset_dir="/home/chengfeng/autograph/public/$dataset" 30 | cur_time="`date +%Y-%m-%d-%H-%M-%S`" 31 | log_file="$log_folder/$dataset-$cur_time.log" 32 | python_command="python run_local_test.py --dataset_dir=$dataset_dir 2>&1" 33 | log_command="tee -i $log_file" 34 | echo "Current time: $cur_time" 35 | echo "Run command: $python_command" 36 | echo "Log info into file: $log_file" 37 | eval "$python_command | $log_command" 38 | done 39 | done 40 | -------------------------------------------------------------------------------- /run_local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | log_folder="./log_output" 4 | if [ ! -x $log_folder ]; then 5 | mkdir $log_folder 6 | fi 7 | 8 | for arg in $* 9 | do 10 | if [[ $arg =~ "/" ]];then # 针对new-data/co-az类型创建一个目录new-data 11 | par_dir=(${arg//// }[0]) 12 | new_dir=${log_folder}"/"$par_dir 13 | echo "Make dir: $new_dir" 14 | if [ ! -x "$new_dir" ]; then 15 | mkdir "$new_dir" 16 | fi 17 | fi 18 | dataset_dir="../public/$arg" 19 | cur_time="`date +%Y-%m-%d-%H-%M-%S`" 20 | log_file="$log_folder/$arg-$cur_time.log" 21 | python_command="python run_local_test.py --dataset_dir=$dataset_dir 2>&1" 22 | log_command="tee -i $log_file" 23 | echo "Current time: $cur_time" 24 | echo "Run command: $python_command" 25 | echo "Log info into file: $log_file" 26 | eval "$python_command | $log_command" 27 | done 28 | -------------------------------------------------------------------------------- /run_local_test.py: -------------------------------------------------------------------------------- 1 | """run local test in starting kit""" 2 | # pylint: disable=logging-fstring-interpolation 3 | 4 | import argparse 5 | import logging 6 | import os 7 | from os.path import join, isdir 8 | import shutil 9 | from multiprocessing import Process 10 | 11 | VERBOSITY_LEVEL = 'WARNING' 12 | 13 | logging.basicConfig( 14 | level=getattr(logging, VERBOSITY_LEVEL), 15 | format='%(asctime)s %(levelname)s %(filename)s: %(message)s', 16 | datefmt='%Y-%m-%d %H:%M:%S' 17 | ) 18 | 19 | 20 | def _here(*args): 21 | here = os.path.dirname(os.path.realpath(__file__)) 22 | return os.path.join(here, *args) 23 | 24 | 25 | def _ingestion_program(starting_kit_dir): 26 | return join(starting_kit_dir, 'ingestion', 'ingestion.py') 27 | 28 | 29 | def _scoring_program(starting_kit_dir): 30 | return join(starting_kit_dir, 'scoring', 'score.py') 31 | 32 | 33 | def remove_dir(output_dir): 34 | """Remove the directory `output_dir`. 35 | This aims to clean existing output of last run of local test. 36 | """ 37 | if isdir(output_dir): 38 | logging.info( 39 | f"Cleaning existing output directory of last run: {output_dir}") 40 | shutil.rmtree(output_dir) 41 | 42 | 43 | def _clean(starting_kit_dir): 44 | ingestion_output_dir = join(starting_kit_dir, 'sample_result_submission') 45 | score_dir = os.path.join(starting_kit_dir, 'scoring_output') 46 | remove_dir(ingestion_output_dir) 47 | remove_dir(score_dir) 48 | 49 | 50 | def run(dataset_dir, code_dir): 51 | """run""" 52 | # Current directory containing this script 53 | starting_kit_dir = _here() 54 | path_ingestion = _ingestion_program(starting_kit_dir) 55 | path_scoring = _scoring_program(starting_kit_dir) 56 | 57 | # Run ingestion and scoring at the same time 58 | command_ingestion = ( 59 | 'python ' 60 | # f'{path_ingestion} --dataset_dir={dataset_dir}/data ' 61 | f'{path_ingestion} --dataset_dir={dataset_dir}/train.data' 62 | f' --code_dir={code_dir}') 63 | 64 | command_scoring = ( 65 | # f'python {path_scoring} --solution_dir={dataset_dir}/solution') 66 | f'python {path_scoring} --solution_dir={dataset_dir}') 67 | 68 | def run_ingestion(): 69 | os.system(command_ingestion) 70 | 71 | def run_scoring(): 72 | os.system(command_scoring) 73 | 74 | ingestion_process = Process(name='ingestion', target=run_ingestion) 75 | scoring_process = Process(name='scoring', target=run_scoring) 76 | _clean(starting_kit_dir) 77 | 78 | ingestion_process.start() 79 | scoring_process.start() 80 | 81 | 82 | def _parse_args(): 83 | default_starting_kit_dir = _here() 84 | default_dataset_dir = join(default_starting_kit_dir, 'data', 'demo') 85 | default_code_dir = join(default_starting_kit_dir, 'code_submission') 86 | 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument('--dataset_dir', type=str, 89 | default=default_dataset_dir, 90 | help="Directory storing the dataset, should contain" 91 | "'data' and 'solution'") 92 | 93 | parser.add_argument('--code_dir', type=str, 94 | default=default_code_dir, 95 | help="Directory storing the submission code " 96 | "`model.py` and other necessary packages.") 97 | 98 | args = parser.parse_args() 99 | return args 100 | 101 | 102 | def main(): 103 | """main entry""" 104 | args = _parse_args() 105 | dataset_dir = args.dataset_dir 106 | code_dir = args.code_dir 107 | logging.info("#" * 50) 108 | logging.info("Begin running local test using") 109 | logging.info(f"code_dir = {code_dir}") 110 | logging.info(f"dataset_dir = {dataset_dir}") 111 | logging.info("#" * 50) 112 | run(dataset_dir, code_dir) 113 | 114 | 115 | if __name__ == '__main__': 116 | main() 117 | -------------------------------------------------------------------------------- /scoring/graph-score.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time: 2020/4/30 19:05 4 | # @Author: Mecthew 5 | 6 | import os 7 | import sys 8 | import re 9 | from datetime import datetime 10 | import numpy as np 11 | 12 | 13 | class ScoreTuple: 14 | def __init__(self, dataset_name, score_list): 15 | self.name = dataset_name 16 | self.dataset_score_list = score_list 17 | 18 | def __str__(self): 19 | return "%-18s\tmean %.6f, max %.6f, min %.6f, std %.6f, num %d" % (self.name, np.mean(self.dataset_score_list), np.max(self.dataset_score_list), 20 | np.min(self.dataset_score_list), np.std(self.dataset_score_list), len(self.dataset_score_list)) 21 | 22 | 23 | def read_score_of_dir(dir_path): 24 | score_list = [] 25 | time_cost_patten = re.compile(r'Scoring duration: ([0-9e+-\\.]+) sec.') 26 | score_patten = re.compile(r'The score of your algorithm on the task is: ([0-9\\.]+).') 27 | 28 | for file_name in os.listdir(dir_path): 29 | if file_name.endswith(".log"): 30 | file_path = os.path.join(dir_path, file_name) 31 | score, time_duration = None, None 32 | for line in open(file_path, 'r', encoding="utf8"): 33 | if line.strip().startswith(datetime.now().year.__str__()) and \ 34 | (score is None or time_duration is None): 35 | try: 36 | time_duration = time_cost_patten.findall(line.strip())[0] 37 | except Exception as e: 38 | pass 39 | try: 40 | score = score_patten.findall(line.strip())[0] 41 | except Exception as e: 42 | pass 43 | score_list.append((file_name, time_duration, score)) 44 | elif os.path.isdir(os.path.join(dir_path, file_name)): 45 | child_dir_path = os.path.join(dir_path, file_name) 46 | for subfile in os.listdir(child_dir_path): 47 | if subfile.endswith(".log"): 48 | file_path = os.path.join(child_dir_path, subfile) 49 | score, time_duration = None, None 50 | for line in open(file_path, 'r', encoding="utf8"): 51 | if line.strip().startswith(datetime.now().year.__str__()) and \ 52 | (score is None or time_duration is None): 53 | try: 54 | time_duration = time_cost_patten.findall(line.strip())[0] 55 | except Exception as e: 56 | pass 57 | try: 58 | score = score_patten.findall(line.strip())[0] 59 | except Exception as e: 60 | pass 61 | score_list.append((os.path.join(file_name, subfile), time_duration, score)) 62 | 63 | return score_list 64 | 65 | 66 | def main(argv): 67 | score_list = read_score_of_dir(argv[1]) 68 | mean_score_list, dataset_score_list = [], [] 69 | prev_dataset = None 70 | counter = 0 71 | dataset_name_patten = re.compile(r"(.*)-2020.*") 72 | for tup in score_list: 73 | counter += 1 74 | dataset_name = dataset_name_patten.findall(tup[0])[0] 75 | if dataset_name in ["coauthor", "az"]: 76 | dataset_name = "-".join(tup[0].split("-")[:2]) 77 | if prev_dataset is not None and dataset_name != prev_dataset and len(dataset_score_list) > 0: 78 | mean_score_list.append(ScoreTuple(prev_dataset, dataset_score_list)) 79 | dataset_score_list = [] 80 | prev_dataset = dataset_name 81 | if tup[-1] is not None: 82 | dataset_score_list.append(float(tup[-1])) 83 | if len(dataset_score_list) > 0: 84 | mean_score_list.append(ScoreTuple(prev_dataset, dataset_score_list)) 85 | 86 | mean_score_list = sorted(mean_score_list, key=lambda x: len(x.name)) 87 | for tup in mean_score_list: 88 | print(tup) 89 | # print("{:<15} {}".format(tup[0], str(tup[1]))) 90 | 91 | if __name__ == '__main__': 92 | main(sys.argv) -------------------------------------------------------------------------------- /scoring/metadata: -------------------------------------------------------------------------------- 1 | command: python $program/score.py --solution_dir=$hidden --prediction_dir=$predictions --score_dir=$output 2 | description: Compute scores for the competition 3 | -------------------------------------------------------------------------------- /scoring/score.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=logging-fstring-interpolation 2 | """scoring function for autograph""" 3 | 4 | import argparse 5 | import datetime 6 | import os 7 | from os.path import join 8 | import logging 9 | import sys 10 | import time 11 | 12 | import yaml 13 | import numpy as np 14 | import pandas as pd 15 | from collections import Counter 16 | from sklearn.metrics import accuracy_score 17 | 18 | from filelock import FileLock 19 | 20 | # Verbosity level of logging. 21 | # Can be: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL 22 | # VERBOSITY_LEVEL = 'INFO' 23 | VERBOSITY_LEVEL = 'DEBUG' 24 | WAIT_TIME = 30 25 | MAX_TIME_DIFF = datetime.timedelta(seconds=600) 26 | DEFAULT_SCORE = -1 27 | SOLUTION_FILE = 'test_label.tsv' 28 | 29 | 30 | def get_logger(verbosity_level, use_error_log=False): 31 | """Set logging format to something like: 32 | 2019-04-25 12:52:51,924 INFO score.py: 33 | """ 34 | logger = logging.getLogger(__file__) 35 | logging_level = getattr(logging, verbosity_level) 36 | logger.setLevel(logging_level) 37 | formatter = logging.Formatter( 38 | fmt='%(asctime)s %(levelname)s %(filename)s: %(message)s') 39 | stdout_handler = logging.StreamHandler(sys.stdout) 40 | stdout_handler.setLevel(logging_level) 41 | stdout_handler.setFormatter(formatter) 42 | logger.addHandler(stdout_handler) 43 | if use_error_log: 44 | stderr_handler = logging.StreamHandler(sys.stderr) 45 | stderr_handler.setLevel(logging.WARNING) 46 | stderr_handler.setFormatter(formatter) 47 | logger.addHandler(stderr_handler) 48 | logger.propagate = False 49 | return logger 50 | 51 | 52 | LOGGER = get_logger(VERBOSITY_LEVEL) 53 | 54 | 55 | def _here(*args): 56 | """Helper function for getting the current directory of the script.""" 57 | here_dir = os.path.dirname(os.path.realpath(__file__)) 58 | return os.path.abspath(join(here_dir, *args)) 59 | 60 | 61 | def _get_solution(solution_dir): 62 | """Get the solution array from solution directory.""" 63 | solution_file = join(solution_dir, SOLUTION_FILE) 64 | solution = pd.read_csv(solution_file, sep='\t') 65 | return solution 66 | 67 | 68 | def _get_prediction(prediction_dir): 69 | pred_file = join(prediction_dir, 'predictions') 70 | return pd.read_csv(pred_file)['label'] 71 | 72 | 73 | def _get_score(solution_dir, prediction_dir): 74 | """get score""" 75 | LOGGER.info('===== get solution') 76 | solution = _get_solution(solution_dir)['label'] 77 | LOGGER.info('===== read prediction') 78 | prediction = _get_prediction(prediction_dir) 79 | if solution.shape != prediction.shape: 80 | raise ValueError(f"Bad prediction shape: {prediction.shape}. " 81 | f"Expected shape: {solution.shape}") 82 | 83 | LOGGER.info('===== calculate score') 84 | LOGGER.debug(f'solution shape = {solution.shape}') 85 | LOGGER.debug(f'prediction shape = {prediction.shape}') 86 | score = accuracy_score(solution, prediction) 87 | 88 | def get_df(counter, name): 89 | counter = {k: v for (k, v) in sorted(counter.items(), key=lambda x: x[0])} 90 | keys = counter.keys() 91 | values = counter.values() 92 | return pd.DataFrame({name: list(values)}, index=keys) 93 | 94 | labels_count = Counter(solution) 95 | length = len(solution) 96 | labels = get_df(labels_count, "Label num") 97 | labels_ratio = get_df({k: labels_count[k] / length for k in labels_count}, "Label ratio") 98 | errors_count = Counter(solution[solution != prediction]) 99 | errors = get_df(errors_count, "Error") 100 | errors_ratio = get_df({k: errors_count[k] / labels_count[k] for k in errors_count}, "Error ratio") 101 | desc = labels.join(labels_ratio).join(errors).join(errors_ratio) 102 | LOGGER.debug(f"Desc:\n{desc}") 103 | 104 | return score 105 | 106 | 107 | def _update_score(args, duration): 108 | score = _get_score(solution_dir=args.solution_dir, 109 | prediction_dir=args.prediction_dir) 110 | # Update learning curve page (detailed_results.html) 111 | _write_scores_html(args.score_dir) 112 | # Write score 113 | LOGGER.info('===== write score') 114 | write_score(args.score_dir, score, duration) 115 | LOGGER.info(f"accuracy: {score:.4}") 116 | return score 117 | 118 | 119 | def _init_scores_html(detailed_results_filepath): 120 | html_head = (' ' 121 | '
')
122 |     html_end = '
' 123 | with open(detailed_results_filepath, 'a') as html_file: 124 | html_file.write(html_head) 125 | html_file.write("Starting training process...
Please be patient. " 126 | "Learning curves will be generated when first " 127 | "predictions are made.") 128 | html_file.write(html_end) 129 | 130 | 131 | def _write_scores_html(score_dir, auto_refresh=True, append=False): 132 | filename = 'detailed_results.html' 133 | if auto_refresh: 134 | html_head = (' ' 135 | '
')
136 |     else:
137 |         html_head = """
"""
138 |     html_end = '
' 139 | if append: 140 | mode = 'a' 141 | else: 142 | mode = 'w' 143 | filepath = join(score_dir, filename) 144 | with open(filepath, mode) as html_file: 145 | html_file.write(html_head) 146 | html_file.write(html_end) 147 | LOGGER.debug(f"Wrote learning curve page to {filepath}") 148 | 149 | 150 | def write_score(score_dir, score, duration): 151 | """Write score and duration to score_dir/scores.txt""" 152 | score_filename = join(score_dir, 'scores.txt') 153 | with open(score_filename, 'w') as ftmp: 154 | ftmp.write(f'score: {score}\n') 155 | ftmp.write(f'Duration: {duration}\n') 156 | LOGGER.debug(f"Wrote to score_filename={score_filename} with " 157 | f"score={score}, duration={duration}") 158 | 159 | 160 | class IngestionError(Exception): 161 | """Ingestion error""" 162 | 163 | 164 | class ScoringError(Exception): 165 | """scoring error""" 166 | 167 | 168 | def get_ingestion_info(prediction_dir): 169 | """get ingestion information""" 170 | ingestion_info = None 171 | endfile_path = os.path.join(prediction_dir, 'end.yaml') 172 | 173 | if not os.path.isfile(endfile_path): 174 | raise IngestionError("[-] No end.yaml exist, ingestion failed") 175 | 176 | LOGGER.info('===== Detected end.yaml file, get ingestion information') 177 | with open(endfile_path, 'r') as ftmp: 178 | ingestion_info = yaml.safe_load(ftmp) 179 | 180 | return ingestion_info 181 | 182 | 183 | def get_ingestion_pid(prediction_dir): 184 | """get ingestion pid""" 185 | # Wait 60 seconds for ingestion to start and write 'start.txt', 186 | # Otherwise, raise an exception. 187 | wait_time = 60 188 | startfile = os.path.join(prediction_dir, 'start.txt') 189 | lockfile = os.path.join(prediction_dir, 'start.txt.lock') 190 | 191 | for i in range(wait_time): 192 | if os.path.exists(startfile): 193 | with FileLock(lockfile): 194 | with open(startfile, 'r') as ftmp: 195 | ingestion_pid = ftmp.read() 196 | LOGGER.info( 197 | f'Detected the start of ingestion after {i} seconds.') 198 | return int(ingestion_pid) 199 | else: 200 | time.sleep(1) 201 | raise IngestionError(f'[-] Failed: scoring didn\'t detected the start of' 202 | 'ingestion after {wait_time} seconds.') 203 | 204 | 205 | def is_process_alive(ingestion_pid): 206 | """detect ingestion alive""" 207 | try: 208 | os.kill(ingestion_pid, 0) 209 | except OSError: 210 | return False 211 | else: 212 | return True 213 | 214 | 215 | def _parse_args(): 216 | # Default I/O directories: 217 | root_dir = _here(os.pardir) 218 | default_solution_dir = join(root_dir, "sample_data") 219 | default_prediction_dir = join(root_dir, "sample_result_submission") 220 | default_score_dir = join(root_dir, "scoring_output") 221 | parser = argparse.ArgumentParser() 222 | parser.add_argument('--solution_dir', type=str, 223 | default=default_solution_dir, 224 | help=("Directory storing the solution with true " 225 | "labels, e.g. adult.solution.")) 226 | parser.add_argument('--prediction_dir', type=str, 227 | default=default_prediction_dir, 228 | help=("Directory storing the predictions. It should" 229 | "contain e.g. [start.txt, adult.predict_0, " 230 | "adult.predict_1, ..., end.yaml].")) 231 | parser.add_argument('--score_dir', type=str, 232 | default=default_score_dir, 233 | help=("Directory storing the scoring output e.g. " 234 | "`scores.txt` and `detailed_results.html`.")) 235 | args = parser.parse_args() 236 | LOGGER.debug(f"Parsed args are: {args}") 237 | LOGGER.debug("-" * 50) 238 | LOGGER.debug(f"Using solution_dir: {args.solution_dir}") 239 | LOGGER.debug(f"Using prediction_dir: {args.prediction_dir}") 240 | LOGGER.debug(f"Using score_dir: {args.score_dir}") 241 | return args 242 | 243 | 244 | def _init(args): 245 | if not os.path.isdir(args.score_dir): 246 | os.mkdir(args.score_dir) 247 | detailed_results_filepath = join( 248 | args.score_dir, 'detailed_results.html') 249 | # Initialize detailed_results.html 250 | _init_scores_html(detailed_results_filepath) 251 | 252 | 253 | def _finalize(score, scoring_start): 254 | """finalize the scoring""" 255 | # Use 'end.yaml' file to detect if ingestion program ends 256 | duration = time.time() - scoring_start 257 | LOGGER.info( 258 | "[+] Successfully finished scoring! " 259 | f"Scoring duration: {duration:.2} sec. " 260 | f"The score of your algorithm on the task is: {score:.6}.") 261 | 262 | LOGGER.info("[Scoring terminated]") 263 | 264 | 265 | def main(): 266 | """main entry""" 267 | scoring_start = time.time() 268 | LOGGER.info('===== init scoring program') 269 | args = _parse_args() 270 | _init(args) 271 | score = DEFAULT_SCORE 272 | 273 | ingestion_pid = get_ingestion_pid(args.prediction_dir) 274 | 275 | LOGGER.info("===== wait for the exit of ingestion.") 276 | while is_process_alive(ingestion_pid): 277 | time.sleep(1) 278 | 279 | # Compute/write score 280 | ingestion_info = get_ingestion_info(args.prediction_dir) 281 | duration = ingestion_info['ingestion_duration'] 282 | score = _update_score(args, duration) 283 | 284 | _finalize(score, scoring_start) 285 | 286 | 287 | if __name__ == "__main__": 288 | main() 289 | --------------------------------------------------------------------------------