├── .github └── workflows │ ├── ccpp.yml │ └── validate-citation-cff.yml ├── .gitignore ├── .gitmodules ├── CITATION.cff ├── COPYING ├── LICENSE ├── README.md ├── builds └── linux │ └── Makefile └── src ├── RPBHFA2LaunchParameters.cuh ├── RPBHKernels.cu ├── RPBHKernels.cuh ├── RPBarnesHutApproximator.cpp ├── RPBarnesHutApproximator.hpp ├── RPCPUForceAtlas2.cpp ├── RPCPUForceAtlas2.hpp ├── RPCommon.cpp ├── RPCommon.hpp ├── RPFA2Kernels.cu ├── RPFA2Kernels.cuh ├── RPForceAtlas2.cpp ├── RPForceAtlas2.hpp ├── RPGPUForceAtlas2.cu ├── RPGPUForceAtlas2.hpp ├── RPGraph.cpp ├── RPGraph.hpp ├── RPGraphLayout.cpp ├── RPGraphLayout.hpp ├── RPLayoutAlgorithm.cpp ├── RPLayoutAlgorithm.hpp └── graph_viewer.cpp /.github/workflows/ccpp.yml: -------------------------------------------------------------------------------- 1 | name: compile CPU code 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | strategy: 8 | matrix: 9 | os: [ubuntu-18.04, ubuntu-20.04, ubuntu-22.04] 10 | runs-on: ${{ matrix.os }} 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | submodules: true 15 | - name: make (without CUDA support) 16 | run: make graph_viewer CUDA_SUPPORT=0 17 | working-directory: ./builds/linux 18 | -------------------------------------------------------------------------------- /.github/workflows/validate-citation-cff.yml: -------------------------------------------------------------------------------- 1 | name: "Validate CITATION.cff" 2 | 3 | on: 4 | push: 5 | paths: 6 | - "CITATION.cff" 7 | jobs: 8 | validate: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: "Checkout repository" 12 | uses: actions/checkout@v3 13 | 14 | - name: "Run validation" 15 | uses: "citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084" 16 | with: 17 | args: "--validate" 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | *.swp 4 | 5 | *.trace 6 | *.o 7 | *.out 8 | # Xcode 9 | # 10 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 11 | 12 | ## Build generated 13 | build/ 14 | DerivedData/ 15 | 16 | ## Various settings 17 | *.pbxuser 18 | !default.pbxuser 19 | *.mode1v3 20 | !default.mode1v3 21 | *.mode2v3 22 | !default.mode2v3 23 | *.perspectivev3 24 | !default.perspectivev3 25 | xcuserdata/ 26 | 27 | ## Other 28 | *.moved-aside 29 | *.xccheckout 30 | *.xcscmblueprint 31 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/pngwriter"] 2 | path = lib/pngwriter 3 | url = https://github.com/pngwriter/pngwriter.git 4 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "Please use the conference paper under 'preferred-citation' to cite this project." 3 | title: "GPUGraphLayout" 4 | authors: 5 | - family-names: "Brinkmann" 6 | given-names: "Govert G." 7 | orcid: "https://orcid.org/0000-0002-2713-213X" 8 | date-released: 2017-09-18 9 | repository: "https://github.com/govertb/GPUGraphLayout" 10 | identifiers: 11 | - description: "Collection of archived snapshots of GPUGraphLayout" 12 | type: doi 13 | value: 10.5281/zenodo.6334692 14 | keywords: 15 | - "visualization" 16 | - "graph-algorithms" 17 | - "cuda" 18 | - "gephi" 19 | - "social-network-analysis" 20 | - "forceatlas2" 21 | - "graph-layout" 22 | license: "AGPL-3.0" 23 | preferred-citation: 24 | type: conference-paper 25 | title: "Exploiting GPUs for Fast Force-Directed Visualization of Large-Scale Networks" 26 | journal: "46th International Conference on Parallel Processing" 27 | year: 2017 28 | month: 9 29 | start: 382 30 | end: 391 31 | date-released: 2017-09-07 32 | languages: 33 | - "en" 34 | authors: 35 | - family-names: "Brinkmann" 36 | given-names: "Govert G." 37 | affiliation: "Leiden Institute of Advanced Computer Science (LIACS)" 38 | orcid: "https://orcid.org/0000-0002-2713-213X" 39 | - family-names: "Rietveld" 40 | given-names: "Kristian F. D." 41 | affiliation: "Leiden Institute of Advanced Computer Science (LIACS)" 42 | orcid: "https://orcid.org/0000-0003-0455-3430" 43 | - family-names: "Takes" 44 | given-names: "Frank W." 45 | affiliation: "Leiden Institute of Advanced Computer Science (LIACS)" 46 | orcid: "https://orcid.org/0000-0001-5468-1030" 47 | institution: 48 | name: "Leiden Institute of Advanced Computer Science" 49 | alias: "LIACS" 50 | address: "Niels Bohrweg 2" 51 | post-code: "2333 CA" 52 | city: "Leiden" 53 | country: "NL" 54 | website: "https://liacs.leidenuniv.nl" 55 | conference: 56 | name: "46th International Conference on Parallel Processing" 57 | alias: "ICPP 2017" 58 | country: "GB" 59 | city: "Bristol" 60 | date-start: 2017-08-14 61 | date-end: 2017-08-17 62 | website: "https://www.icpp-conf.org/2017/" 63 | publisher: 64 | name: "IEEE" 65 | website: "https://ieee.org" 66 | copyright: "© 2017 IEEE" 67 | doi: "10.1109/ICPP.2017.47" 68 | url: "https://doi.org/10.1109/ICPP.2017.47" 69 | isbn: "9781538610428" 70 | issn: "2332-5690" 71 | keywords: 72 | - "Network visualization" 73 | - "force-directed graph layout" 74 | - "large-scale networks" 75 | - "parallel programming" 76 | - "CUDA" 77 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published by 637 | the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | COPYING -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `graph_viewer` | GPU accelerated graph layout 2 | ============================================= 3 | 4 | This repository contains experimental code for large scale graph layout using the GPU. Currently we only implement the basics of ForceAtlas2, a graph layout algorithm designed for social network visualization in Gephi[1](#jacomy14),[2](#bastian09). Our implementation of ForceAtlas2 is based on [the open source implementation](https://github.com/gephi/gephi/tree/6efb108718fa67d1055160f3a18b63edb4ca7be2/modules/LayoutPlugin/src/main/java/org/gephi/layout/plugin/forceAtlas2) used in Gephi itself, and considers the graph to be undirected. For force approximation, we use a CUDA implementation of the Barnes-Hut approximation algorithm[3](#barnes86) by Martin Burtscher and Keshav Pingali[4](#burtscher11). This implementation is available as part of [LonstarGPU](http://iss.ices.utexas.edu/?p=projects/galois/lonestargpu). The average speedup, compared to a *de facto* CPU implementation of ForceAtlas2, is over 40x. This makes it feasible to compute layouts for networks with millions of nodes and edges. More details and results can be found in: 5 | 6 | * G.G. Brinkmann, [K.F.D. Rietveld](https://liacs.leidenuniv.nl/~rietveldkfd) and [F.W. Takes](https://liacs.leidenuniv.nl/~takesfw), [Exploiting GPUs for fast force-directed visualization of large-scale networks](https://dx.doi.org/10.1109/ICPP.2017.47), in Proceedings of the 46th International Conference on Parallel Processing (ICPP), pp. 382-391, 2017. 7 | 8 | 9 | #### Citing 10 | To cite this software, please use the aforementioned reference, or the `preferred-citation` section in [CITATION.cff](./CITATION.cff). The latter can be converted to the desired format using [various tools](https://github.com/citation-file-format/citation-file-format/tree/52647a247e9b1a5b04154934f39615b5ee8c4d65#tools-to-work-with-citationcff-files-wrench), or using the _Cite this repository_ button in the _About_ section of [this project's GitHub page](https://github.com/govertb/GPUGraphLayout). 11 | 12 | 13 | #### System Requirements 14 | 15 | A CUDA capable GPU. Currently only Linux is supported. 16 | 17 | #### Obtaining all code 18 | This repository contains a submodule (`lib/pngwriter`). Be sure to run 19 | ``` 20 | git submodule init && git submodule update 21 | ``` 22 | from the root of this Git repository before compiling. The code also depends on the `libpng` library (including its development headers). It should be possible to obtain this using the package manager for your Linux distribution. 23 | 24 | #### Compiling 25 | A `Makefile` is located in `builds/linux`. Running 26 | ``` 27 | make graph_viewer 28 | ``` 29 | from this directory compiles `graph_viewer` with CUDA support. 30 | To compile without CUDA support, run `make graph_viewer CUDA_SUPPORT=0`. 31 | 32 | #### Usage 33 | `graph_viewer gpu|cpu max_iterations num_snaps sg|wg scale gravity exact|approximate edgelist_path out_path [png|csv|bin]` 34 | 35 | 36 | | Argument | Description | 37 | | -------------------- | ----------- | 38 | | `gpu\|cpu` | Choose between a parallel GPU implementation or a serial CPU implementation. | 39 | | `max_iterations` | How many iterations of the layout algorithm to run. | 40 | | `num_snaps` | Choose how many times during the layout process a visualization should be rendered. | 41 | | `wg\|sg` | Choose between weak gravity (inversely proportional to distance) or strong gravity. | 42 | | `scale` | Scale repulsive force. | 43 | | `gravity` | Scale gravitational force. | 44 | | `exact\|approximate` | Choose between the exact/pairwise $O(\|V\|^2)$ repulsive force calculation or the $O(\|V\| \log \|V\|)$ approximation using Barnes-Hut (GPU implementation only supports Barnes-Hut). | 45 | | `edgelist_path` | Text file (ascii) containing node IDs for each edge on a separate line (whitespace separated). Lines starting with a `#`, the direction of edges, and self-loops are ignored. | 46 | | `out_path` | Path to write resulting layout to. | 47 | 48 | `[png|csv|bin]` is optional, defaulting to `png`, and determines the format of the layout written to `out_path`. 49 | 50 | #### References 51 | 1 M. Jacomy, T. Venturini, S. Heymann, and M. Bastian, ["Forceatlas2, a continuous graph layout algorithm for handy network visualization designed for the Gephi software"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0098679), PLoS ONE, vol. 9, no. 6, pp. 1–12, 2014. 52 | 53 | 2 M. Bastian, S. Heymann, and M. Jacomy, ["Gephi: an open source software for exploring and manipulating networks."](https://aaai.org/ocs/index.php/ICWSM/09/paper/view/154) in Proceedings of International Conference on Web and Social Media (ICWSM), 2009, pp. 361–362. 54 | 55 | 3J. Barnes and P. Hut, ["A hierarchical O(N log N) force-calculation algorithm"](https://www.nature.com/nature/journal/v324/n6096/abs/324446a0.html), Nature, vol. 324, pp. 446–449, 1986. 56 | 57 | 4 M. Burtscher and K. Pingali, ["An efficient CUDA implementation of the tree-based Barnes Hut n-body algorithm"](https://www.sciencedirect.com/science/article/pii/B9780123849885000061), in GPU Computing Gems Emerald Edition, W. mei W. Hwu, Ed., 2011, ch. 6, pp. 75–92. 58 | 59 | #### License 60 | Most source files for this program are released under the GNU Affero General Public License. The license notice in each file provides more information. A copy of the GNU Affero General Public License can be found in the `LICENCE` file. 61 | 62 | #### Disclaimer 63 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 64 | -------------------------------------------------------------------------------- /builds/linux/Makefile: -------------------------------------------------------------------------------- 1 | CC:=gcc 2 | 3 | # Directories 4 | L_SRC_DIR := ../../lib 5 | SRC_DIR := ../../src 6 | OBJ_DIR := obj 7 | 8 | # Compile with CUDA support by default 9 | CUDA_SUPPORT ?= 1 10 | ifeq ($(CUDA_SUPPORT), 1) 11 | CC:=nvcc 12 | endif 13 | 14 | # Debug compilation, disable by default 15 | DEBUG ?= 0 16 | ifeq ($(DEBUG), 1) 17 | CXXFLAGS:=-g 18 | NVCCFLAGS:=-G 19 | O_LVL:=-O0 20 | else 21 | NVCCFLAGS:=-D NDEBUG 22 | O_LVL:=-O3 23 | endif 24 | 25 | # C preprocessor flags 26 | CPPFLAGS :=-D NO_FREETYPE 27 | 28 | # C/C++ compiler flags 29 | CFLAGS :=$(O_LVL) 30 | CXXFLAGS :=$(CXXFLAGS) $(O_LVL) -std=c++11 31 | 32 | # Linker flags 33 | LDFLAGS :=-lc -lm -lstdc++ -lpng 34 | 35 | # src./obj. files 36 | GRAPH_VIEWER_SRCS := $(wildcard $(SRC_DIR)/*.cpp) 37 | GRAPH_VIEWER_OBJS := $(GRAPH_VIEWER_SRCS:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.o) 38 | 39 | PNGWRITER_SRCS := $(L_SRC_DIR)/pngwriter/src/pngwriter.cc 40 | PNGWRITER_OBJS := $(PNGWRITER_SRCS:$(L_SRC_DIR)/%.cc=$(OBJ_DIR)/%.o) 41 | 42 | CUDA_SRCS := $(wildcard $(SRC_DIR)/*.cu) 43 | CUDA_OBJS := $(CUDA_SRCS:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.o) 44 | CUDA_DEPS := $(wildcard $(SRC_DIR)/*.cuh) 45 | 46 | CPP_SRC := $(GRAPH_VIEWER_SRCS) $(PNGWRITER_SRCS) 47 | CUDA_SRC := $(CUDA_SRCS) 48 | SOURCES := $(C_SRC) $(CPP_SRC) 49 | OBJECTS := $(GRAPH_VIEWER_OBJS) $(PNGWRITER_OBJS) 50 | 51 | ifeq ($(CUDA_SUPPORT), 1) 52 | OBJECTS := $(OBJECTS) $(CUDA_OBJS) 53 | SOURCES := $(SOURCES) $(CUDA_SRCS) 54 | endif 55 | 56 | # Generate dependency (.h, .hpp) 57 | # ala http://stackoverflow.com/questions/2394609/makefile-header-dependencies 58 | depend: .depend 59 | .depend: $(SRCS) 60 | rm -f .depend 61 | $(CC) $(CXXFLAGS) $(CPPFLAGS) -M $(CPP_SRC) >> .depend 62 | 63 | include ./.depend 64 | 65 | graph_viewer: $(OBJECTS) 66 | $(CC) $(OBJECTS) $(LDFLAGS) -o graph_viewer 67 | 68 | $(GRAPH_VIEWER_OBJS): $(GRAPH_VIEWER_SRCS) 69 | mkdir -p $(@D) 70 | $(CC) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.cpp) 71 | 72 | $(CUDA_OBJS): $(CUDA_SRCS) $(CUDA_DEPS) 73 | mkdir -p $(@D) 74 | nvcc --device-c $(CXXFLAGS) $(NVCCFLAGS) $(CPPFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.cu) 75 | 76 | $(PNGWRITER_OBJS): $(PNGWRITER_SRCS) 77 | mkdir -p $(@D) 78 | $(CC) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(L_SRC_DIR)/%.cc) 79 | 80 | clear: clean 81 | 82 | clean: 83 | rm -r graph_viewer $(OBJ_DIR)/* ./.depend 84 | 85 | .PHONY: all clear clean depend 86 | -------------------------------------------------------------------------------- /src/RPBHFA2LaunchParameters.cuh: -------------------------------------------------------------------------------- 1 | /// Per kernel launch configuration parameters 2 | // 1: BoundingBoxKernel, SpeedKernel (reductions of size |V|) 3 | // 2: TreeBuild 4 | // 3: Summarization 5 | // 4: SortKernel 6 | // 5: ForceKernel 7 | // 6: DisplacementKernel, GravityKernel, AttractiveForce (all 'streaming' kernels) 8 | // InitializationKernel, ClearKernel1, ClearKernel2 don't use macros for launch configuration. 9 | 10 | #if __CUDA_ARCH__ >= 500 // Maxwell (5.x) or Pascal (6.x) 11 | 12 | #define THREADS1 512 /* must be a power of 2 */ 13 | #define THREADS2 512 14 | #define THREADS3 128 15 | #define THREADS4 64 16 | #define THREADS5 256 17 | #define THREADS6 1024 18 | 19 | #define FACTOR1 3 20 | #define FACTOR2 3 21 | #define FACTOR3 6 /* must all be resident at the same time */ 22 | #define FACTOR4 6 /* must all be resident at the same time */ 23 | #define FACTOR5 5 24 | #define FACTOR6 1 25 | 26 | 27 | #elif __CUDA_ARCH__ >= 300 // Kepler (3.x) 28 | 29 | #define THREADS1 512 /* must be a power of 2 */ 30 | #define THREADS2 512 31 | #define THREADS3 128 32 | #define THREADS4 64 33 | #define THREADS5 256 34 | #define THREADS6 1024 35 | 36 | #define FACTOR1 3 37 | #define FACTOR2 3 38 | #define FACTOR3 6 /* must all be resident at the same time */ 39 | #define FACTOR4 6 /* must all be resident at the same time */ 40 | #define FACTOR5 5 41 | #define FACTOR6 1 42 | 43 | #elif __CUDA_ARCH__ < 300 // Fermi (2.x) or Tesla (1.x) 44 | 45 | #define THREADS1 512 /* must be a power of 2 */ 46 | #define THREADS2 512 47 | #define THREADS3 128 48 | #define THREADS4 64 49 | #define THREADS5 256 50 | #define THREADS6 1024 51 | 52 | #define FACTOR1 3 53 | #define FACTOR2 3 54 | #define FACTOR3 6 /* must all be resident at the same time */ 55 | #define FACTOR4 6 /* must all be resident at the same time */ 56 | #define FACTOR5 5 57 | #define FACTOR6 1 58 | 59 | #endif 60 | 61 | #define WARPSIZE 32 62 | #define MAXDEPTH 32 63 | -------------------------------------------------------------------------------- /src/RPBHKernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | The following code is a modified version of the CUDA BarnesHut v3.1 code 3 | by Martin Burtscher. Modifications were made to transform the code from a 4 | three-dimensional Barnes-Hut implementation to a two-dimensional implementation. 5 | Since our application (graph layout) only needs two dimensions. 6 | 7 | What follows is the copyright notice associated with that 8 | original code, as it is provided by the copyright holder: 9 | Texas State University-San Macros. 10 | */ 11 | 12 | 13 | /* 14 | CUDA BarnesHut v3.1: Simulation of the gravitational forces 15 | in a galactic cluster using the Barnes-Hut n-body algorithm 16 | 17 | Copyright (c) 2013, Texas State University-San Marcos. All rights reserved. 18 | 19 | Redistribution and use in source and binary forms, with or without modification, 20 | are permitted for academic, research, experimental, or personal use provided that 21 | the following conditions are met: 22 | 23 | * Redistributions of source code must retain the above copyright notice, 24 | this list of conditions and the following disclaimer. 25 | * Redistributions in binary form must reproduce the above copyright notice, 26 | this list of conditions and the following disclaimer in the documentation 27 | and/or other materials provided with the distribution. 28 | * Neither the name of Texas State University-San Marcos nor the names of its 29 | contributors may be used to endorse or promote products derived from this 30 | software without specific prior written permission. 31 | 32 | For all other uses, please contact the Office for Commercialization and Industry 33 | Relations at Texas State University-San Marcos . 34 | 35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 36 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 37 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED 38 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 39 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 40 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 42 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 43 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 44 | OF THE POSSIBILITY OF SUCH DAMAGE. 45 | 46 | Author: Martin Burtscher 47 | */ 48 | 49 | #include 50 | #include 51 | #include "RPBHKernels.cuh" 52 | 53 | // Variables marked extern in header. 54 | __device__ float minxdg, minydg, maxxdg, maxydg; 55 | 56 | 57 | // Variables for use in this file only. 58 | static __device__ volatile int stepd = -1; 59 | static __device__ volatile int maxdepthd = 1; 60 | static __device__ volatile int bottomd; // initialized by BoundingBoxKernel 61 | static __device__ unsigned int blkcntd = 0; 62 | static __device__ volatile float radiusd; 63 | 64 | 65 | /*** The Kernel Definitions ***/ 66 | /******************************************************************************/ 67 | /*** compute center and radius ************************************************/ 68 | /******************************************************************************/ 69 | 70 | __global__ 71 | __launch_bounds__(THREADS1, FACTOR1) 72 | void BoundingBoxKernel(int nnodesd, int nbodiesd, volatile int * __restrict startd, 73 | volatile int * __restrict childd, volatile float * __restrict node_massd, 74 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd, 75 | volatile float * __restrict maxxd, volatile float * __restrict maxyd, 76 | volatile float * __restrict minxd, volatile float * __restrict minyd) 77 | { 78 | register int i, j, k, inc; 79 | register float val, minx, maxx, miny, maxy; 80 | __shared__ volatile float sminx[THREADS1], smaxx[THREADS1], sminy[THREADS1], smaxy[THREADS1]; 81 | 82 | // initialize with valid data (in case #bodies < #threads) 83 | minx = maxx = body_posd[0].x; 84 | miny = maxy = body_posd[0].y; 85 | 86 | // scan all bodies 87 | i = threadIdx.x; 88 | inc = THREADS1 * gridDim.x; 89 | for (j = i + blockIdx.x * THREADS1; j < nbodiesd; j += inc) 90 | { 91 | val = body_posd[j].x; 92 | minx = fminf(minx, val); 93 | maxx = fmaxf(maxx, val); 94 | val = body_posd[j].y; 95 | miny = fminf(miny, val); 96 | maxy = fmaxf(maxy, val); 97 | } 98 | 99 | // reduction in shared memory 100 | sminx[i] = minx; 101 | smaxx[i] = maxx; 102 | sminy[i] = miny; 103 | smaxy[i] = maxy; 104 | 105 | for (j = THREADS1 / 2; j > 0; j /= 2) 106 | { 107 | __syncthreads(); 108 | if (i < j) 109 | { 110 | k = i + j; 111 | sminx[i] = minx = fminf(minx, sminx[k]); 112 | smaxx[i] = maxx = fmaxf(maxx, smaxx[k]); 113 | sminy[i] = miny = fminf(miny, sminy[k]); 114 | smaxy[i] = maxy = fmaxf(maxy, smaxy[k]); 115 | } 116 | } 117 | 118 | // write block result to global memory 119 | if (i == 0) 120 | { 121 | k = blockIdx.x; 122 | minxd[k] = minx; 123 | maxxd[k] = maxx; 124 | minyd[k] = miny; 125 | maxyd[k] = maxy; 126 | __threadfence(); 127 | 128 | inc = gridDim.x - 1; 129 | if (inc == atomicInc(&blkcntd, inc)) 130 | { 131 | // I'm the last block, so combine all block results 132 | for (j = 0; j <= inc; j++) 133 | { 134 | minx = fminf(minx, minxd[j]); 135 | maxx = fmaxf(maxx, maxxd[j]); 136 | miny = fminf(miny, minyd[j]); 137 | maxy = fmaxf(maxy, maxyd[j]); 138 | } 139 | // compute 'radius' 140 | radiusd = fmaxf(maxx - minx, maxy - miny) * 0.5f; 141 | 142 | // insert the root node into the BH tree. 143 | k = nnodesd; 144 | bottomd = k; 145 | 146 | node_massd[k] = -1.0f; 147 | node_posd[k].x = (minx + maxx) * 0.5f; 148 | node_posd[k].y = (miny + maxy) * 0.5f; 149 | startd[k] = 0; 150 | 151 | k *= 4; // skip over the children of all nodes 152 | for (i = 0; i < 4; i++) childd[k + i] = -1; 153 | 154 | stepd++; 155 | } 156 | } 157 | } 158 | 159 | /******************************************************************************/ 160 | /*** build tree ***************************************************************/ 161 | /******************************************************************************/ 162 | 163 | // Sets all child pointers of internal nodes in BH tree to null (-1) in childd 164 | __global__ 165 | __launch_bounds__(1024, 1)void ClearKernel1(int nnodesd, int nbodiesd, volatile int * __restrict childd) 166 | { 167 | register int k, inc, top, bottom; 168 | 169 | top = 4 * nnodesd; // children of root node initialized before. 170 | bottom = 4 * nbodiesd; 171 | inc = blockDim.x * gridDim.x; 172 | k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x; 173 | if (k < bottom) k += inc; 174 | 175 | // iterate over all cells assigned to thread 176 | while (k < top) 177 | { 178 | childd[k] = -1; 179 | k += inc; 180 | } 181 | } 182 | 183 | 184 | __global__ 185 | __launch_bounds__(THREADS2, FACTOR2) 186 | void TreeBuildingKernel(int nnodesd, int nbodiesd, volatile int * __restrict childd, 187 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd) 188 | { 189 | register int i, j, depth, localmaxdepth, skip, inc; 190 | register float x, y, r; 191 | register float px, py; 192 | register float dx, dy; 193 | register int ch, n, cell, locked, patch; 194 | register float rootr, rootx, rooty; 195 | 196 | // cache root data 197 | rootx = node_posd[nnodesd].x; 198 | rooty = node_posd[nnodesd].y; 199 | rootr = radiusd; 200 | 201 | localmaxdepth = 1; 202 | skip = 1; 203 | inc = blockDim.x * gridDim.x; 204 | i = threadIdx.x + blockIdx.x * blockDim.x; 205 | 206 | // iterate over all bodies assigned to thread 207 | while (i < nbodiesd) 208 | { 209 | if (skip != 0) 210 | { 211 | // new body, so start traversing at root 212 | skip = 0; 213 | px = body_posd[i].x; 214 | py = body_posd[i].y; 215 | n = nnodesd; 216 | depth = 1; 217 | r = rootr * 0.5f; 218 | dx = dy = -r; 219 | j = 0; 220 | // determine which child to follow, 221 | if (rootx < px) {j = 1; dx = r;} 222 | if (rooty < py) {j |= 2; dy = r;} 223 | x = rootx + dx; 224 | y = rooty + dy; 225 | } 226 | 227 | // follow path to leaf cell 228 | ch = childd[n*4+j]; 229 | 230 | while (ch >= nbodiesd) 231 | { 232 | n = ch; 233 | depth++; 234 | r *= 0.5f; 235 | dx = dy = -r; 236 | j = 0; 237 | // determine which child to follow 238 | if (x < px) {j = 1; dx = r;} 239 | if (y < py) {j |= 2; dy = r;} 240 | x += dx; 241 | y += dy; 242 | ch = childd[n*4+j]; 243 | } 244 | 245 | // here ch is either leaf (< nbodiesd), null (-1), locked (-2) 246 | 247 | if (ch != -2) 248 | { 249 | // here we insert body into either empty cell, or split leafcell. 250 | // skip if child pointer is locked and try again later 251 | locked = n*4+j; 252 | if (ch == -1) 253 | { 254 | if (-1 == atomicCAS((int *)&childd[locked], -1, i)) 255 | { // if null, just insert the new body 256 | localmaxdepth = max(depth, localmaxdepth); 257 | i += inc; // move on to next body 258 | skip = 1; 259 | } 260 | // else: failed to claim cell, re-traverse next iteration. 261 | } 262 | else 263 | { // there already is a body in this position 264 | if (ch == atomicCAS((int *)&childd[locked], ch, -2)) 265 | { 266 | // lock is now aquired on childd[locked]. 267 | // ch is old BH node id living at childd[locked] 268 | 269 | // if bodies have same position, offset the body to insert 270 | // and redo traversal 271 | if (body_posd[ch].x == px && body_posd[ch].y == py) 272 | { 273 | body_posd[i].x *= .99; 274 | body_posd[i].y *= .99; 275 | skip = 0; // start all over 276 | childd[locked] = ch; // release lock 277 | break; 278 | } 279 | 280 | patch = -1; 281 | // create new cell(s) and insert the new and old body 282 | do 283 | { 284 | // 1.) Create new cell 285 | cell = atomicSub((int *)&bottomd, 1) - 1; 286 | assert(cell > nbodiesd); 287 | 288 | if (patch != -1) childd[n*4+j] = cell; 289 | patch = max(patch, cell); 290 | 291 | // 2.) Make newly created cell current 292 | depth++; 293 | n = cell; 294 | r *= 0.5f; 295 | 296 | // 3.) Insert old body into correct quadrant 297 | j = 0; 298 | if (x < body_posd[ch].x) j = 1; 299 | if (y < body_posd[ch].y) j |= 2; 300 | childd[cell*4+j] = ch; 301 | 302 | // 4.) Determine center + quadrant for cell of new body 303 | j = 0; 304 | dx = dy = -r; 305 | if (x < px) {j = 1; dx = r;} 306 | if (y < py) {j |= 2; dy = r;} 307 | x += dx; 308 | y += dy; 309 | 310 | // 5.) Visit this cell/check if in use (possibly by old body) 311 | ch = childd[n*4+j]; 312 | // repeat until the two bodies are different children 313 | } while (ch >= 0); 314 | childd[n*4+j] = i; // insert new body 315 | 316 | localmaxdepth = max(depth, localmaxdepth); 317 | i += inc; // move on to next body 318 | skip = 2; 319 | } 320 | // else: failed to aquire lock, re-traverse next iteration. 321 | } 322 | } 323 | __syncthreads(); // __threadfence(); 324 | 325 | if (skip == 2) childd[locked] = patch; // unlock 326 | } 327 | // record maximum tree depth 328 | atomicMax((int *)&maxdepthd, localmaxdepth); 329 | } 330 | 331 | // Sets mass of cells to -1.0, and all startd entries to null (-1). 332 | __global__ 333 | __launch_bounds__(1024, 1) 334 | void ClearKernel2(int nnodesd, volatile int * __restrict startd, volatile float * __restrict node_massd) 335 | { 336 | register int k, inc, bottom; 337 | 338 | bottom = bottomd; 339 | inc = blockDim.x * gridDim.x; 340 | k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x; 341 | if (k < bottom) k += inc; 342 | 343 | // iterate over all cells assigned to thread, skip root cell. 344 | while (k < nnodesd) 345 | { 346 | node_massd[k] = -1.0f; 347 | startd[k] = -1; 348 | k += inc; 349 | } 350 | } 351 | 352 | 353 | /******************************************************************************/ 354 | /*** compute center of mass ***************************************************/ 355 | /******************************************************************************/ 356 | 357 | __global__ 358 | __launch_bounds__(THREADS3, FACTOR3) 359 | void SummarizationKernel(const int nnodesd, const int nbodiesd, volatile int * __restrict countd, const int * __restrict childd, 360 | volatile float * __restrict body_massd, volatile float * __restrict node_massd, volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd) 361 | { 362 | register int i, j, k, ch, inc, cnt, bottom, flag; 363 | register float m, cm, px, py; 364 | __shared__ int child[THREADS3 * 4]; 365 | __shared__ float mass[THREADS3 * 4]; 366 | 367 | bottom = bottomd; 368 | inc = blockDim.x * gridDim.x; 369 | k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x; 370 | if (k < bottom) k += inc; 371 | 372 | register int restart = k; 373 | for (j = 0; j < 5; j++) 374 | { // wait-free pre-passes 375 | // iterate over all cells assigned to thread 376 | while (k <= nnodesd) 377 | { 378 | if (node_massd[k] < 0.0f) 379 | { 380 | for (i = 0; i < 4; i++) 381 | { 382 | ch = childd[k*4+i]; 383 | child[i*THREADS3+threadIdx.x] = ch; // cache children 384 | if ((ch >= nbodiesd) && ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) < 0.0f)) break; 385 | } 386 | if (i == 4) 387 | { 388 | // all children are ready 389 | cm = 0.0f; 390 | px = 0.0f; 391 | py = 0.0f; 392 | cnt = 0; 393 | for (i = 0; i < 4; i++) 394 | { 395 | ch = child[i*THREADS3+threadIdx.x]; 396 | if (ch >= 0) 397 | { 398 | if (ch >= nbodiesd) 399 | { // count bodies (needed later) 400 | m = mass[i*THREADS3+threadIdx.x]; 401 | cnt += countd[ch]; 402 | px += node_posd[ch].x * m; 403 | py += node_posd[ch].y * m; 404 | } 405 | else 406 | { 407 | m = body_massd[ch]; 408 | cnt++; 409 | px += body_posd[ch].x * m; 410 | py += body_posd[ch].y * m; 411 | } 412 | // add child's contribution 413 | cm += m; 414 | } 415 | } 416 | countd[k] = cnt; 417 | m = 1.0f / cm; 418 | node_posd[k].x = px * m; 419 | node_posd[k].y = py * m; 420 | __threadfence(); // make sure data are visible before setting mass 421 | node_massd[k] = cm; 422 | } 423 | } 424 | k += inc; // move on to next cell 425 | } 426 | k = restart; 427 | } 428 | 429 | flag = 0; 430 | j = 0; 431 | // iterate over all cells assigned to thread 432 | while (k <= nnodesd) 433 | { 434 | if (k < nbodiesd and body_massd[k] >= 0.0f) 435 | k += inc; 436 | else if(k >= nbodiesd and node_massd[k] >= 0.0f) 437 | k += inc; 438 | 439 | else 440 | { 441 | if (j == 0) 442 | { 443 | j = 4; 444 | for (i = 0; i < 4; i++) 445 | { 446 | ch = childd[k*4+i]; 447 | child[i*THREADS3+threadIdx.x] = ch; // cache children 448 | if ((ch < nbodiesd) || ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) >= 0.0f)) j--; 449 | } 450 | } 451 | else 452 | { 453 | j = 4; 454 | for (i = 0; i < 4; i++) 455 | { 456 | ch = child[i*THREADS3+threadIdx.x]; 457 | if ((ch < nbodiesd) || (mass[i*THREADS3+threadIdx.x] >= 0.0f) || ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) >= 0.0f)) j--; 458 | } 459 | } 460 | 461 | if (j == 0) 462 | { 463 | // all children are ready 464 | cm = 0.0f; 465 | px = 0.0f; 466 | py = 0.0f; 467 | cnt = 0; 468 | for (i = 0; i < 4; i++) 469 | { 470 | ch = child[i*THREADS3+threadIdx.x]; 471 | if (ch >= 0) 472 | { 473 | if (ch >= nbodiesd) 474 | { // count bodies (needed later) 475 | m = mass[i*THREADS3+threadIdx.x]; 476 | cnt += countd[ch]; 477 | px += node_posd[ch].x * m; 478 | py += node_posd[ch].y * m; 479 | } 480 | else 481 | { 482 | m = body_massd[ch]; 483 | cnt++; 484 | px += body_posd[ch].x * m; 485 | py += body_posd[ch].y * m; 486 | } 487 | // add child's contribution 488 | cm += m; 489 | } 490 | } 491 | countd[k] = cnt; 492 | m = 1.0f / cm; 493 | node_posd[k].x = px * m; 494 | node_posd[k].y = py * m; 495 | flag = 1; 496 | } 497 | } 498 | __syncthreads(); // __threadfence(); 499 | if (flag != 0) 500 | { 501 | k < nbodiesd ? body_massd[k] = cm : node_massd[k] = cm; 502 | k += inc; 503 | flag = 0; 504 | } 505 | } 506 | } 507 | 508 | 509 | /******************************************************************************/ 510 | /*** sort bodies **************************************************************/ 511 | /******************************************************************************/ 512 | 513 | __global__ 514 | __launch_bounds__(THREADS4, FACTOR4) 515 | void SortKernel(int nnodesd, int nbodiesd, int * __restrict sortd, int * __restrict countd, volatile int * __restrict startd, int * __restrict childd) 516 | { 517 | register int i, j, k, ch, dec, start, bottom; 518 | 519 | bottom = bottomd; 520 | dec = blockDim.x * gridDim.x; 521 | k = nnodesd + 1 - dec + threadIdx.x + blockIdx.x * blockDim.x; 522 | 523 | // iterate over all cells assigned to thread 524 | while (k >= bottom) 525 | { 526 | start = startd[k]; 527 | if (start >= 0) 528 | { 529 | j = 0; 530 | for (i = 0; i < 4; i++) 531 | { 532 | ch = childd[k*4+i]; 533 | if (ch >= 0) 534 | { 535 | if (i != j) 536 | { 537 | // move children to front (needed later for speed) 538 | childd[k*4+i] = -1; 539 | childd[k*4+j] = ch; 540 | } 541 | j++; 542 | if (ch >= nbodiesd) 543 | { 544 | // child is a cell 545 | startd[ch] = start; // set start ID of child 546 | start += countd[ch]; // add #bodies in subtree 547 | } 548 | else 549 | { 550 | // child is a body 551 | sortd[start] = ch; // record body in 'sorted' array 552 | start++; 553 | } 554 | } 555 | } 556 | k -= dec; // move on to next cell 557 | } 558 | } 559 | } 560 | 561 | 562 | /******************************************************************************/ 563 | /*** compute force ************************************************************/ 564 | /******************************************************************************/ 565 | 566 | __global__ 567 | __launch_bounds__(THREADS5, FACTOR5) 568 | void ForceCalculationKernel(int nnodesd, int nbodiesd, float itolsqd, float epssqd, 569 | volatile int * __restrict sortd, volatile int * __restrict childd, 570 | volatile float * __restrict body_massd, volatile float * __restrict node_massd, 571 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd, 572 | volatile float * __restrict fxd, volatile float * __restrict fyd, const float k_rd) 573 | { 574 | register int i, j, k, n, depth, base, sbase, diff, pd, nd; 575 | register float px, py, ax, ay, dx, dy, tmp; 576 | __shared__ volatile int pos[MAXDEPTH * THREADS5/WARPSIZE], node[MAXDEPTH * THREADS5/WARPSIZE]; 577 | __shared__ float dq[MAXDEPTH * THREADS5/WARPSIZE]; 578 | 579 | if (0 == threadIdx.x) 580 | { 581 | tmp = radiusd * 2; 582 | // precompute values that depend only on tree level 583 | dq[0] = tmp * tmp * itolsqd; 584 | for (i = 1; i < maxdepthd; i++) 585 | { 586 | dq[i] = dq[i - 1] * 0.25f; 587 | dq[i - 1] += epssqd; 588 | } 589 | dq[i - 1] += epssqd; 590 | 591 | assert(maxdepthd <= MAXDEPTH); 592 | } 593 | __syncthreads(); 594 | 595 | if (maxdepthd <= MAXDEPTH) 596 | { 597 | // figure out first thread in each warp (lane 0) 598 | base = threadIdx.x / WARPSIZE; 599 | sbase = base * WARPSIZE; 600 | j = base * MAXDEPTH; 601 | 602 | diff = threadIdx.x - sbase; 603 | // make multiple copies to avoid index calculations later 604 | if (diff < MAXDEPTH) dq[diff+j] = dq[diff]; 605 | 606 | __syncthreads(); 607 | __threadfence_block(); 608 | 609 | // iterate over all bodies assigned to thread 610 | for (k = threadIdx.x + blockIdx.x * blockDim.x; k < nbodiesd; k += blockDim.x * gridDim.x) 611 | { 612 | i = sortd[k]; // get permuted/sorted 613 | // cache position info 614 | px = body_posd[i].x; 615 | py = body_posd[i].y; 616 | 617 | ax = 0.0f; 618 | ay = 0.0f; 619 | 620 | // initialize iteration stack, i.e., push root node onto stack 621 | depth = j; 622 | if (sbase == threadIdx.x) 623 | { 624 | pos[j] = 0; 625 | node[j] = nnodesd * 4; 626 | } 627 | 628 | do 629 | { 630 | // stack is not empty 631 | pd = pos[depth]; 632 | nd = node[depth]; 633 | while (pd < 4) 634 | { 635 | // node on top of stack has more children to process 636 | n = childd[nd + pd]; // load child pointer 637 | pd++; 638 | 639 | if (n >= 0) 640 | { 641 | if(n < nbodiesd) 642 | { 643 | dx = px - body_posd[n].x; 644 | dy = py - body_posd[n].y; 645 | } 646 | else 647 | { 648 | dx = px - node_posd[n].x; 649 | dy = py - node_posd[n].y; 650 | } 651 | tmp = dx*dx + dy*dy + epssqd; // compute distance squared (plus softening) 652 | 653 | // check body-body interaction 654 | if (n < nbodiesd) 655 | { 656 | ax += k_rd * dx * body_massd[i] * body_massd[n] / tmp; 657 | ay += k_rd * dy * body_massd[i] * body_massd[n] / tmp; 658 | } 659 | 660 | // or, if n is cell, ensure all threads agree that cell is far enough away 661 | else if(__all_sync(__activemask(), tmp >= dq[depth])) 662 | { 663 | ax += k_rd * dx * body_massd[i] * node_massd[n] / tmp; 664 | ay += k_rd * dy * body_massd[i] * node_massd[n] / tmp; 665 | } 666 | else 667 | { 668 | // push cell onto stack 669 | if (sbase == threadIdx.x) 670 | { // maybe don't push and inc if last child 671 | pos[depth] = pd; 672 | node[depth] = nd; 673 | } 674 | depth++; 675 | pd = 0; 676 | nd = n * 4; 677 | } 678 | } 679 | else 680 | { 681 | pd = 4; // early out because all remaining children are also zero 682 | } 683 | } 684 | depth--; // done with this level 685 | } while (depth >= j); 686 | 687 | 688 | // save computed acceleration 689 | fxd[i] += ax; 690 | fyd[i] += ay; 691 | } 692 | } 693 | } 694 | -------------------------------------------------------------------------------- /src/RPBHKernels.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | The following code is a modified version of the CUDA BarnesHut v3.1 code 3 | by Martin Burtscher. Modifications were made to transform the code from a 4 | three-dimensional Barnes-Hut implementation to a two-dimensional implementation. 5 | Since our application (graph layout) only needs two dimensions. 6 | 7 | What follows is the copyright notice associated with that 8 | original code, as it is provided by the copyright holder: 9 | Texas State University-San Macros. 10 | */ 11 | 12 | 13 | /* 14 | CUDA BarnesHut v3.1: Simulation of the gravitational forces 15 | in a galactic cluster using the Barnes-Hut n-body algorithm 16 | 17 | Copyright (c) 2013, Texas State University-San Marcos. All rights reserved. 18 | 19 | Redistribution and use in source and binary forms, with or without modification, 20 | are permitted for academic, research, experimental, or personal use provided that 21 | the following conditions are met: 22 | 23 | * Redistributions of source code must retain the above copyright notice, 24 | this list of conditions and the following disclaimer. 25 | * Redistributions in binary form must reproduce the above copyright notice, 26 | this list of conditions and the following disclaimer in the documentation 27 | and/or other materials provided with the distribution. 28 | * Neither the name of Texas State University-San Marcos nor the names of its 29 | contributors may be used to endorse or promote products derived from this 30 | software without specific prior written permission. 31 | 32 | For all other uses, please contact the Office for Commercialization and Industry 33 | Relations at Texas State University-San Marcos . 34 | 35 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 36 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 37 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED 38 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 39 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 40 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 42 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 43 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 44 | OF THE POSSIBILITY OF SUCH DAMAGE. 45 | 46 | Author: Martin Burtscher 47 | */ 48 | 49 | #ifndef RPBHKernels_cuh 50 | #define RPBHKernels_cuh 51 | 52 | #include "RPBHFA2LaunchParameters.cuh" 53 | 54 | extern __device__ volatile int errd; 55 | extern __device__ float minxdg, minydg, maxxdg, maxydg; 56 | 57 | __global__ 58 | __launch_bounds__(THREADS1, FACTOR1) 59 | void BoundingBoxKernel(int nnodesd, int nbodiesd, volatile int * __restrict startd, 60 | volatile int * __restrict childd, volatile float * __restrict node_massd, 61 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd, 62 | volatile float * __restrict maxxd, volatile float * __restrict maxyd, 63 | volatile float * __restrict minxd, volatile float * __restrict minyd); 64 | 65 | __global__ 66 | __launch_bounds__(1024, 1) 67 | void ClearKernel1(int nnodesd, int nbodiesd, volatile int * __restrict childd); 68 | 69 | __global__ 70 | __launch_bounds__(THREADS2, FACTOR2) 71 | void TreeBuildingKernel(int nnodesd, int nbodiesd, volatile int * __restrict childd, 72 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd); 73 | 74 | __global__ 75 | __launch_bounds__(1024, 1) 76 | void ClearKernel2(int nnodesd, volatile int * __restrict startd, volatile float * __restrict node_massd); 77 | 78 | __global__ 79 | __launch_bounds__(THREADS3, FACTOR3) 80 | void SummarizationKernel(const int nnodesd, const int nbodiesd, volatile int * __restrict countd, const int * __restrict childd, 81 | volatile float * __restrict body_massd, volatile float * __restrict node_massd, volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd); 82 | 83 | __global__ 84 | __launch_bounds__(THREADS4, FACTOR4) 85 | void SortKernel(int nnodesd, int nbodiesd, int * __restrict sortd, int * __restrict countd, volatile int * __restrict startd, int * __restrict childd); 86 | 87 | __global__ 88 | __launch_bounds__(THREADS5, FACTOR5) 89 | void ForceCalculationKernel(int nnodesd, int nbodiesd, float itolsqd, float epssqd, 90 | volatile int * __restrict sortd, volatile int * __restrict childd, 91 | volatile float * __restrict body_massd, volatile float * __restrict node_massd, 92 | volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd, 93 | volatile float * __restrict fxd, volatile float * __restrict fyd, const float k_rd); 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /src/RPBarnesHutApproximator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPBarnesHutApproximator.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include "RPBarnesHutApproximator.hpp" 25 | #include 26 | #include 27 | #include 28 | 29 | namespace RPGraph 30 | { 31 | BarnesHutCell::BarnesHutCell(Coordinate position, float length, Coordinate particle_position, float particle_mass) 32 | : cell_center{position}, length{length}, mass_center(particle_position), total_mass{particle_mass} 33 | { 34 | lb = position.x - length/2.0; 35 | rb = position.x + length/2.0; 36 | bb = position.y - length/2.0; 37 | ub = position.y + length/2.0; 38 | } 39 | 40 | BarnesHutCell::~BarnesHutCell() 41 | { 42 | for (nid_t n = 0; n < 4; ++n) delete sub_cells[n]; 43 | } 44 | 45 | void BarnesHutCell::add_leafcell(int quadrant, float mass, Coordinate pos) 46 | { 47 | Coordinate leafcell_center_coordinate = Coordinate(0,0); 48 | if (quadrant == 0) 49 | leafcell_center_coordinate = Coordinate(this->cell_center.x-length/4.0,this->cell_center.y+length/4); 50 | else if (quadrant == 1) 51 | leafcell_center_coordinate = Coordinate(this->cell_center.x+length/4.0,this->cell_center.y+length/4); 52 | else if (quadrant == 2) 53 | leafcell_center_coordinate = Coordinate(this->cell_center.x+length/4.0,this->cell_center.y-length/4); 54 | else if (quadrant == 3) 55 | leafcell_center_coordinate = Coordinate(this->cell_center.x-length/4.0,this->cell_center.y-length/4); 56 | 57 | sub_cells[quadrant] = new BarnesHutCell(leafcell_center_coordinate, this->length/2.0, pos, mass); 58 | num_subparticles += 1; 59 | 60 | } 61 | 62 | BarnesHutApproximator::BarnesHutApproximator(Coordinate root_center, float root_length, float theta) 63 | : root_center{root_center}, root_length{root_length}, theta{theta} 64 | { 65 | this->reset(root_center, root_length); 66 | } 67 | 68 | void BarnesHutApproximator::reset(Coordinate root_center, float root_length) 69 | { 70 | delete root_cell; // this recursively deletes the entire tree 71 | root_cell = nullptr; 72 | 73 | this->root_center = root_center; 74 | this->root_length = root_length; 75 | } 76 | 77 | 78 | Real2DVector BarnesHutApproximator::approximateForce(Coordinate particle_pos, float particle_mass, float theta) 79 | { 80 | Real2DVector force = Real2DVector(0.0, 0.0); 81 | std::queue cells_to_check; 82 | cells_to_check.push(root_cell); 83 | 84 | BarnesHutCell *cur_cell; 85 | while (!cells_to_check.empty()) 86 | { 87 | cur_cell = cells_to_check.front(); 88 | cells_to_check.pop(); 89 | 90 | const float D2 = distance2(particle_pos, cur_cell->mass_center); 91 | if (D2 == 0) 92 | { 93 | // If we approximate the force of a particle on itself... 94 | if (cur_cell->num_subparticles == 0) continue; 95 | else return Real2DVector(rand(), rand()); 96 | 97 | } 98 | 99 | // length / D >= theta is the criterion to divide into subcells. 100 | if (cur_cell->length*cur_cell->length / D2 < theta*theta || cur_cell->num_subparticles == 0) 101 | force += direction(particle_pos, cur_cell->mass_center) * 102 | (particle_mass * cur_cell->total_mass / D2); 103 | 104 | else 105 | for (int i = 0; i < 4; ++i) 106 | if (cur_cell->sub_cells[i] != nullptr) cells_to_check.push(cur_cell->sub_cells[i]); 107 | } 108 | return force; 109 | } 110 | 111 | void BarnesHutApproximator::insertParticle(RPGraph::Coordinate particle_position, float particle_mass) 112 | { 113 | if(not root_cell) 114 | { 115 | root_cell = new BarnesHutCell(this->root_center, this->root_length, 116 | particle_position, particle_mass); 117 | } 118 | 119 | else 120 | { 121 | BarnesHutCell *cur_cell = root_cell; 122 | while (true) 123 | { 124 | const int quadrant_new_particle = (particle_position-cur_cell->cell_center).quadrant(); 125 | 126 | if (particle_position.y > cur_cell->ub or particle_position.x > cur_cell->rb or 127 | particle_position.x < cur_cell->lb or particle_position.y < cur_cell->bb) 128 | { 129 | //fprintf(stderr, "error: Barnes-Hut: Can't insert particle out of bounds of this cell.\n"); 130 | return; 131 | } 132 | 133 | // N.B. a BarnesHutCell is never empty, but can lack subparticles/cells. 134 | // If so, we need to create, and insert, a subcell for the single particle that 135 | // is stored in this cell. 136 | if (cur_cell->num_subparticles == 0) 137 | { 138 | if (particle_position == cur_cell->mass_center) 139 | { 140 | // We want two particles in the same place... 141 | // Thats equivalent to a single particle with summed masses. 142 | // mass_center won't change. 143 | cur_cell->total_mass += particle_mass; 144 | return; 145 | } 146 | 147 | // We move the single particle to a subcell. 148 | int quadrant_existing_particle = (cur_cell->mass_center - cur_cell->cell_center).quadrant(); 149 | cur_cell->add_leafcell(quadrant_existing_particle, cur_cell->total_mass, cur_cell->mass_center); 150 | } 151 | 152 | // We assume inserting will succeed, and update total_mass and mass_center accordingly 153 | cur_cell->total_mass += particle_mass; 154 | cur_cell->mass_center = cur_cell->mass_center * (float) (cur_cell->num_subparticles); 155 | cur_cell->mass_center += particle_position; 156 | cur_cell->mass_center /= (cur_cell->num_subparticles+1); 157 | 158 | // If we can add a leaf-cell in an empty slot, we do so. 159 | if (cur_cell->sub_cells[quadrant_new_particle] == nullptr) 160 | { 161 | cur_cell->add_leafcell(quadrant_new_particle, particle_mass, particle_position); 162 | return; 163 | } 164 | 165 | // Else we recurse to the occupied cell. 166 | else 167 | { 168 | cur_cell->num_subparticles += 1; 169 | cur_cell = cur_cell->sub_cells[quadrant_new_particle]; 170 | } 171 | } 172 | } 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/RPBarnesHutApproximator.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPBarnesHutApproximator.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPBarnesHutApproximator_hpp 25 | #define RPBarnesHutApproximator_hpp 26 | 27 | #include "RPGraph.hpp" 28 | #include "RPCommon.hpp" 29 | 30 | namespace RPGraph 31 | { 32 | class BarnesHutCell 33 | { 34 | public: 35 | void add_leafcell(int quadrant, float mass, Coordinate pos); 36 | float lb, rb, ub, bb; 37 | 38 | // BarnesHutCell always contain either a single particle, or subcells (at most 4). 39 | BarnesHutCell(Coordinate position, float length, Coordinate particle_position, float particle_mass); 40 | ~BarnesHutCell(); 41 | 42 | Coordinate cell_center, mass_center; 43 | nid_t num_subparticles = 0; 44 | float total_mass; 45 | const float length; // length of a cell = width = height 46 | BarnesHutCell *sub_cells[4] = {nullptr, nullptr, nullptr, nullptr}; // per quadrant. 47 | 48 | void insertParticle(Coordinate particle_position, float particle_mass); 49 | }; 50 | 51 | class BarnesHutApproximator 52 | { 53 | public: 54 | BarnesHutApproximator(Coordinate root_center, float root_length, float theta); 55 | Real2DVector approximateForce(Coordinate particle_pos, float particle_mass, float theta); 56 | void insertParticle(Coordinate particle_position, float particle_mass); 57 | 58 | void reset(Coordinate root_center, float root_length); 59 | void setTheta(float theta); 60 | 61 | private: 62 | BarnesHutCell *root_cell = nullptr; 63 | const float theta; 64 | Coordinate root_center; 65 | float root_length; 66 | 67 | }; 68 | } 69 | 70 | #endif /* RPBarnesHutApproximator_hpp */ 71 | -------------------------------------------------------------------------------- /src/RPCPUForceAtlas2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPCPUForceAtlas2.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include "RPCPUForceAtlas2.hpp" 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace RPGraph 32 | { 33 | // CPUForceAtlas2 definitions. 34 | CPUForceAtlas2::CPUForceAtlas2(GraphLayout &layout, bool use_barneshut, 35 | bool strong_gravity, float gravity, 36 | float scale) 37 | : ForceAtlas2(layout, use_barneshut, strong_gravity, gravity, scale), 38 | BH_Approximator{layout.getCenter(), layout.getSpan()+10, theta} 39 | { 40 | forces = (Real2DVector *)malloc(sizeof(Real2DVector) * layout.graph.num_nodes()); 41 | prev_forces = (Real2DVector *)malloc(sizeof(Real2DVector) * layout.graph.num_nodes()); 42 | for (nid_t n = 0; n < layout.graph.num_nodes(); ++n) 43 | { 44 | forces[n] = Real2DVector(0.0f, 0.0f); 45 | prev_forces[n] = Real2DVector(0.0f, 0.0f); 46 | } 47 | } 48 | 49 | CPUForceAtlas2::~CPUForceAtlas2() 50 | { 51 | free(forces); 52 | free(prev_forces); 53 | } 54 | 55 | void CPUForceAtlas2::apply_attract(nid_t n) 56 | { 57 | Real2DVector f = Real2DVector(0.0, 0.0); 58 | for (nid_t t : layout.graph.neighbors_with_geq_id(n)) 59 | { 60 | // Here we define the magnitude of the attractive force `f_a' 61 | // *divided* by the length distance between `n' and `t', i.e. `f_a_over_d' 62 | float f_a_over_d; 63 | if (use_linlog) 64 | { 65 | float dist = layout.getDistance(n, t); 66 | f_a_over_d = dist == 0.0 ? std::numeric_limits::max() : logf(1+dist) / dist; 67 | } 68 | 69 | else 70 | { 71 | f_a_over_d = 1.0; 72 | } 73 | 74 | f += layout.getDistanceVector(n, t) * f_a_over_d; 75 | 76 | //TODO: this is temporary, but required due to 77 | // iteration over neighbors_with_geq_id 78 | forces[t] += layout.getDistanceVector(n, t) * (-f_a_over_d); 79 | 80 | // forces[n] += getNormalizedDistanceVector(n, t) * f_a(n, t); 81 | } 82 | forces[n] += f; 83 | } 84 | 85 | void CPUForceAtlas2::apply_repulsion(nid_t n) 86 | { 87 | if (use_barneshut) 88 | { 89 | forces[n] += (BH_Approximator.approximateForce(layout.getCoordinate(n), mass(n), theta) * k_r); 90 | } 91 | 92 | else 93 | { 94 | for (nid_t t = 0; t < layout.graph.num_nodes(); ++t) 95 | { 96 | if (n == t) continue; 97 | float distance = layout.getDistance(n, t); 98 | float f_r = distance == 0.0 ? std::numeric_limits::max() : k_r * mass(n) * mass(t) / distance / distance; 99 | forces[n] += layout.getDistanceVector(n, t) * f_r; 100 | } 101 | } 102 | } 103 | 104 | void CPUForceAtlas2::apply_gravity(nid_t n) 105 | { 106 | float f_g, d; 107 | 108 | // `d' is the distance from `n' to the center (0.0, 0.0) 109 | d = std::sqrt(layout.getX(n)*layout.getX(n) + layout.getY(n)*layout.getY(n)); 110 | if(d == 0.0) return; 111 | 112 | // Here we define the magnitude of the gravitational force `f_g'. 113 | if (strong_gravity) 114 | { 115 | f_g = k_g*mass(n); 116 | } 117 | 118 | else 119 | { 120 | f_g = k_g*mass(n) / d; 121 | } 122 | 123 | forces[n] += (Real2DVector(-layout.getX(n), -layout.getY(n)) * f_g); 124 | } 125 | 126 | // Eq. (8) 127 | float CPUForceAtlas2::swg(nid_t n) 128 | { 129 | return (forces[n] - prev_forces[n]).magnitude(); 130 | } 131 | 132 | // Eq. (9) 133 | float CPUForceAtlas2::s(nid_t n) 134 | { 135 | return (k_s * global_speed)/(1.0f+global_speed*std::sqrt(swg(n))); 136 | } 137 | 138 | // Eq. (12) 139 | float CPUForceAtlas2::tra(nid_t n) 140 | { 141 | return (forces[n] + prev_forces[n]).magnitude() / 2.0; 142 | } 143 | 144 | void CPUForceAtlas2::updateSpeeds() 145 | { 146 | // The following speed-update procedure for ForceAtlas2 follows 147 | // the one by Gephi: 148 | // https://github.com/gephi/gephi/blob/6efb108718fa67d1055160f3a18b63edb4ca7be2/modules/LayoutPlugin/src/main/java/org/gephi/layout/plugin/forceAtlas2/ForceAtlas2.java 149 | 150 | // `Auto adjust speeds' 151 | float total_swinging = 0.0; 152 | float total_effective_traction = 0.0; 153 | for (nid_t nid = 0; nid < layout.graph.num_nodes(); ++nid) 154 | { 155 | total_swinging += mass(nid) * swg(nid); // Eq. (11) 156 | total_effective_traction += mass(nid) * tra(nid); // Eq. (13) 157 | } 158 | 159 | // We want to find the right jitter tollerance for this graph, 160 | // such that totalSwinging < tolerance * totalEffectiveTraction 161 | 162 | float estimated_optimal_jitter_tollerance = 0.05 * std::sqrt(layout.graph.num_nodes()); 163 | float minJT = std::sqrt(estimated_optimal_jitter_tollerance); 164 | float jt = jitter_tolerance * fmaxf(minJT, 165 | fminf(k_s_max, 166 | estimated_optimal_jitter_tollerance * total_effective_traction / powf(layout.graph.num_nodes(), 2.0) 167 | ) 168 | ); 169 | float min_speed_efficiency = 0.05; 170 | 171 | // `Protect against erratic behavior' 172 | if (total_swinging / total_effective_traction > 2.0) 173 | { 174 | if (speed_efficiency > min_speed_efficiency) speed_efficiency *= 0.5; 175 | jt = fmaxf(jt, jitter_tolerance); 176 | } 177 | 178 | // `Speed efficiency is how the speed really corrosponds to the swinging vs. convergence tradeoff.' 179 | // `We adjust it slowly and carefully' 180 | float targetSpeed = jt * speed_efficiency * total_effective_traction / total_swinging; 181 | 182 | if (total_swinging > jt * total_effective_traction) 183 | { 184 | if (speed_efficiency > min_speed_efficiency) 185 | { 186 | speed_efficiency *= 0.7; 187 | } 188 | } 189 | else if (global_speed < 1000) 190 | { 191 | speed_efficiency *= 1.3; 192 | } 193 | 194 | // `But the speed shouldn't rise much too quickly, ... would make convergence drop dramatically'. 195 | float max_rise = 0.5; 196 | global_speed += fminf(targetSpeed - global_speed, max_rise * global_speed); 197 | } 198 | 199 | void CPUForceAtlas2::apply_displacement(nid_t n) 200 | { 201 | if (prevent_overlap) 202 | { 203 | // Not yet implemented 204 | exit(EXIT_FAILURE); 205 | } 206 | 207 | else 208 | { 209 | 210 | float factor = global_speed / (1.0 + std::sqrt(global_speed * swg(n))); 211 | layout.moveNode(n, forces[n] * factor); 212 | } 213 | } 214 | 215 | void CPUForceAtlas2::rebuild_bh() 216 | { 217 | BH_Approximator.reset(layout.getCenter(), layout.getSpan()+10); 218 | 219 | for (nid_t n = 0; n < layout.graph.num_nodes(); ++n) 220 | { 221 | BH_Approximator.insertParticle(layout.getCoordinate(n), 222 | layout.graph.degree(n)+1); 223 | } 224 | } 225 | 226 | void CPUForceAtlas2::doStep() 227 | { 228 | if (use_barneshut) rebuild_bh(); 229 | 230 | for (nid_t n = 0; n < layout.graph.num_nodes(); ++n) 231 | { 232 | apply_gravity(n); 233 | apply_attract(n); 234 | apply_repulsion(n); 235 | } 236 | 237 | updateSpeeds(); 238 | 239 | for (nid_t n = 0; n < layout.graph.num_nodes(); ++n) 240 | { 241 | apply_displacement(n); 242 | prev_forces[n] = forces[n]; 243 | forces[n] = Real2DVector(0.0f, 0.0f); 244 | } 245 | iteration++; 246 | } 247 | 248 | void CPUForceAtlas2::sync_layout() {} 249 | 250 | } 251 | -------------------------------------------------------------------------------- /src/RPCPUForceAtlas2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPCPUForceAtlas2.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPCPUForceAtlas2_hpp 25 | #define RPCPUForceAtlas2_hpp 26 | 27 | #include "RPForceAtlas2.hpp" 28 | 29 | namespace RPGraph 30 | { 31 | class CPUForceAtlas2 : public ForceAtlas2 32 | { 33 | public: 34 | CPUForceAtlas2(GraphLayout &layout, bool use_barneshut, 35 | bool strong_gravity, float gravity, float scale); 36 | ~CPUForceAtlas2(); 37 | void doStep() override; 38 | void sync_layout() override; 39 | 40 | private: 41 | Real2DVector *forces, *prev_forces; 42 | BarnesHutApproximator BH_Approximator; 43 | 44 | float swg(nid_t n); // swinging .. 45 | float s(nid_t n); // swinging as well .. 46 | float tra(nid_t n); // traction .. 47 | 48 | // Substeps of one step in layout process. 49 | void rebuild_bh(); 50 | void apply_repulsion(nid_t n); 51 | void apply_gravity(nid_t n); 52 | void apply_attract(nid_t n); 53 | void updateSpeeds(); 54 | void apply_displacement(nid_t n); 55 | }; 56 | } 57 | #endif 58 | -------------------------------------------------------------------------------- /src/RPCommon.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPCommon.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include "RPCommon.hpp" 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | // by http://stackoverflow.com/a/19841704 32 | bool is_file_exists(std::string filepath) 33 | { 34 | std::ifstream infile(filepath); 35 | return infile.good(); 36 | } 37 | 38 | // wrap libgen basename until C++17 39 | std::string basename(std::string filepath) 40 | { 41 | char *result_p = new char[filepath.size() + 1]; 42 | strcpy(result_p, filepath.c_str()); 43 | std::string result = basename(result_p); 44 | delete[] result_p; 45 | return result; 46 | } 47 | 48 | namespace RPGraph 49 | { 50 | float get_random(float lowerbound, float upperbound) 51 | { 52 | return lowerbound + (upperbound-lowerbound) * static_cast (random()) / static_cast (RAND_MAX); 53 | } 54 | 55 | 56 | /* Definitions for Real2DVector */ 57 | Real2DVector::Real2DVector(float x, float y): x(x), y(y) {}; 58 | 59 | float Real2DVector::magnitude() 60 | { 61 | return std::sqrt(x*x + y*y); 62 | } 63 | 64 | float Real2DVector::distance(RPGraph::Real2DVector to) 65 | { 66 | const float dx = (x - to.x)*(x - to.x); 67 | const float dy = (y - to.y)*(y - to.y); 68 | return std::sqrt(dx*dx + dy*dy); 69 | } 70 | 71 | // Various operators on Real2DVector 72 | Real2DVector Real2DVector::operator*(float b) 73 | { 74 | return Real2DVector(this->x * b, this->y * b); 75 | } 76 | 77 | Real2DVector Real2DVector::operator/(float b) 78 | { 79 | return Real2DVector(this->x / b, this->y / b); 80 | } 81 | 82 | 83 | Real2DVector Real2DVector::operator+(Real2DVector b) 84 | { 85 | return Real2DVector(this->x + b.x, this->y + b.y); 86 | } 87 | 88 | 89 | Real2DVector Real2DVector::operator-(Real2DVector b) 90 | { 91 | return Real2DVector(this->x - b.x, this->y - b.y); 92 | } 93 | 94 | void Real2DVector::operator+=(Real2DVector b) 95 | { 96 | this->x += b.x; 97 | this->y += b.y; 98 | } 99 | 100 | Real2DVector Real2DVector::getNormalized() 101 | { 102 | return Real2DVector(this->x / magnitude(), this->y / magnitude()); 103 | } 104 | 105 | Real2DVector Real2DVector::normalize() 106 | { 107 | const float m = magnitude(); 108 | this->x /= m; 109 | this->y /= m; 110 | return *this; 111 | } 112 | 113 | /* Definitions for Coordinate */ 114 | Coordinate::Coordinate(float x, float y) : x(x), y(y) {}; 115 | 116 | // Various operators on Coordinate 117 | Coordinate Coordinate::operator+(float b) 118 | { 119 | return Coordinate(x + b, y + b); 120 | } 121 | 122 | Coordinate Coordinate::operator*(float b) 123 | { 124 | return Coordinate(this->x*b, this->y*b); 125 | } 126 | 127 | Coordinate Coordinate::operator/(float b) 128 | { 129 | return Coordinate(this->x/b, this->y/b); 130 | } 131 | 132 | Coordinate Coordinate::operator+(Real2DVector b) 133 | { 134 | return Coordinate(this->x + b.x, this->y + b.y); 135 | } 136 | 137 | Coordinate Coordinate::operator-(Coordinate b) 138 | { 139 | return Coordinate(this->x - b.x, this->y - b.y); 140 | } 141 | 142 | bool Coordinate::operator==(Coordinate b) 143 | { 144 | return (this->x == b.x && this->y == b.y); 145 | } 146 | 147 | float Coordinate::distance(RPGraph::Coordinate to) 148 | { 149 | return std::sqrt((x - to.x)*(x - to.x) + (y - to.y)*(y - to.y)); 150 | } 151 | 152 | float Coordinate::distance2(RPGraph::Coordinate to) 153 | { 154 | return (x - to.x)*(x - to.x) + (y - to.y)*(y - to.y); 155 | } 156 | 157 | void Coordinate::operator/=(float b) 158 | { 159 | this->x /= b; 160 | this->y /= b; 161 | } 162 | 163 | void Coordinate::operator+=(RPGraph::Coordinate b) 164 | { 165 | this->x += b.x; 166 | this->y += b.y; 167 | } 168 | 169 | void Coordinate::operator+=(RPGraph::Real2DVector b) 170 | { 171 | this->x += b.x; 172 | this->y += b.y; 173 | } 174 | 175 | int Coordinate::quadrant() 176 | { 177 | if (x <= 0) 178 | { 179 | if (y >= 0) return 0; 180 | else return 3; 181 | 182 | } 183 | else 184 | { 185 | if (y >= 0) return 1; 186 | else return 2; 187 | } 188 | } 189 | 190 | float distance(Coordinate from, Coordinate to) 191 | { 192 | const float dx = from.x - to.x; 193 | const float dy = from.y - to.y; 194 | return std::sqrt(dx*dx + dy*dy); 195 | } 196 | 197 | float distance2(Coordinate from, Coordinate to) 198 | { 199 | const float dx = from.x - to.x; 200 | const float dy = from.y - to.y; 201 | return dx*dx + dy*dy; 202 | } 203 | 204 | Real2DVector normalizedDirection(Coordinate from, Coordinate to) 205 | { 206 | const float dx = from.x - to.x; 207 | const float dy = from.y - to.y; 208 | const float len = std::sqrt(dx*dx + dy*dy); 209 | return Real2DVector(dx/len, dy/len); 210 | } 211 | 212 | Real2DVector direction(Coordinate from, Coordinate to) 213 | { 214 | const float dx = from.x - to.x; 215 | const float dy = from.y - to.y; 216 | return Real2DVector(dx, dy); 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/RPCommon.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPCommon.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPCommonUtils_hpp 25 | #define RPCommonUtils_hpp 26 | #include 27 | 28 | #ifdef __NVCC__ 29 | #include 30 | #include 31 | #include 32 | 33 | #define cudaCatchError(ans) { assert_d((ans), __FILE__, __LINE__); } 34 | inline void assert_d(cudaError_t code, const char *file, int line, bool abort=true) 35 | { 36 | if (code != cudaSuccess) 37 | { 38 | fprintf(stderr,"error: (GPUassert) %s (error %d). %s:%d\n", cudaGetErrorString(code), code, file, line); 39 | if (abort) exit(code); 40 | } 41 | } 42 | #endif 43 | bool is_file_exists(std::string filepath); 44 | std::string basename(std::string filepath); 45 | 46 | namespace RPGraph 47 | { 48 | float get_random(float lowerbound, float upperbound); 49 | 50 | class Real2DVector 51 | { 52 | public: 53 | Real2DVector(float x, float y); 54 | float x, y; 55 | float magnitude(); 56 | float distance(Real2DVector to); // to some other Real2DVector `to' 57 | 58 | // Varous operators on Real2DVector 59 | Real2DVector operator*(float b); 60 | Real2DVector operator/(float b); 61 | Real2DVector operator+(Real2DVector b); 62 | Real2DVector operator-(Real2DVector b); 63 | void operator+=(Real2DVector b); 64 | 65 | Real2DVector getNormalized(); 66 | Real2DVector normalize(); 67 | }; 68 | 69 | class Coordinate 70 | { 71 | public: 72 | float x, y; 73 | Coordinate(float x, float y); 74 | 75 | // Various operators on Coordinate 76 | Coordinate operator+(float b); 77 | Coordinate operator*(float b); 78 | Coordinate operator/(float b); 79 | Coordinate operator+(Real2DVector b); 80 | Coordinate operator-(Coordinate b); 81 | bool operator==(Coordinate b); 82 | void operator/=(float b); 83 | void operator+=(Coordinate b); 84 | void operator+=(RPGraph::Real2DVector b); 85 | 86 | int quadrant(); // Of `this' wrt. (0,0). 87 | float distance(Coordinate to); 88 | float distance2(Coordinate to); 89 | 90 | }; 91 | 92 | float distance(Coordinate from, Coordinate to); 93 | float distance2(Coordinate from, Coordinate to); 94 | 95 | Real2DVector normalizedDirection(Coordinate from, Coordinate to); 96 | Real2DVector direction(Coordinate from, Coordinate to); 97 | 98 | } 99 | 100 | #endif /* RPCommonUtils_hpp */ 101 | -------------------------------------------------------------------------------- /src/RPFA2Kernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPFA2Kernels.cu 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include 25 | #include "RPFA2Kernels.cuh" 26 | #include "RPBHFA2LaunchParameters.cuh" 27 | 28 | /// Some variables for FA2 related to `speed' 29 | static __device__ float k_s_maxd = 10.0; 30 | static __device__ float global_speedd = 1.0; 31 | static __device__ float speed_efficiencyd = 1.0; 32 | static __device__ float jitter_toleranced = 1.0; 33 | static __device__ unsigned int blkcntd_speed_kernel = 0; 34 | 35 | __global__ 36 | __launch_bounds__(THREADS6, FACTOR6) 37 | void GravityKernel(int nbodiesd, const float k_g, const bool strong_gravity, 38 | volatile float * __restrict body_massd, 39 | volatile float2 * __restrict body_posd, 40 | volatile float * __restrict fxd, volatile float * __restrict fyd) 41 | { 42 | register int i, inc; 43 | 44 | // iterate over all bodies assigned to thread 45 | inc = blockDim.x * gridDim.x; 46 | for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nbodiesd; i += inc) 47 | { 48 | const float px = body_posd[i].x; 49 | const float py = body_posd[i].y; 50 | 51 | // `f_g' is the magnitude of gravitational force 52 | float f_g; 53 | if(strong_gravity) 54 | { 55 | f_g = k_g * body_massd[i]; 56 | } 57 | else // weak gravity 58 | { 59 | if (px != 0.0 || py != 0.0) 60 | { 61 | f_g = k_g * body_massd[i] * rsqrtf(px*px + py*py); 62 | } 63 | 64 | else 65 | { 66 | f_g = 0.0; 67 | } 68 | } 69 | 70 | fxd[i] += (-px * f_g); 71 | fyd[i] += (-py * f_g); 72 | } 73 | } 74 | 75 | __global__ 76 | __launch_bounds__(THREADS6, FACTOR6) 77 | void AttractiveForceKernel(int nedgesd, 78 | volatile float2 * __restrict body_posd, 79 | volatile float * __restrict fxd, volatile float * __restrict fyd, 80 | volatile int * __restrict sourcesd, volatile int * __restrict targetsd) 81 | { 82 | register int i, inc, source, target; 83 | // iterate over all edges assigned to thread 84 | inc = blockDim.x * gridDim.x; 85 | for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nedgesd; i += inc) 86 | { 87 | source = sourcesd[i]; 88 | target = targetsd[i]; 89 | 90 | // dx and dy are distance to between the neighbors. 91 | const float dx = body_posd[target].x-body_posd[source].x; 92 | const float dy = body_posd[target].y-body_posd[source].y; 93 | 94 | // Force just depends linearly on distance. 95 | const float fsx = dx; 96 | const float fsy = dy; 97 | 98 | const float ftx = -dx; 99 | const float fty = -dy; 100 | 101 | 102 | // these memory accesses aren't coalesced... 103 | atomicAdd((float*)fxd+source, fsx); 104 | atomicAdd((float*)fyd+source, fsy); 105 | 106 | atomicAdd((float*)fxd+target, ftx); 107 | atomicAdd((float*)fyd+target, fty); 108 | } 109 | } 110 | 111 | __global__ 112 | __launch_bounds__(THREADS1, FACTOR1) 113 | void SpeedKernel(int nbodiesd, 114 | volatile float * __restrict fxd , volatile float * __restrict fyd, 115 | volatile float * __restrict fx_prevd , volatile float * __restrict fy_prevd, 116 | volatile float * __restrict body_massd, volatile float * __restrict swgd, volatile float * __restrict etrad) 117 | { 118 | register int i, j, k, inc; 119 | register float swg_thread, swg_body, etra_thread, etra_body, dx, dy, mass; 120 | // setra: effective_traction (in shared mem.) 121 | // sswg: swing per node (in shared mem.) 122 | __shared__ volatile float sswg[THREADS1], setra[THREADS1]; 123 | 124 | // initialize with valid data (in case #bodies < #threads) 125 | swg_thread = 0; 126 | etra_thread = 0; 127 | 128 | // scan all bodies 129 | i = threadIdx.x; 130 | inc = THREADS1 * gridDim.x; 131 | 132 | for (j = i + blockIdx.x * THREADS1; j < nbodiesd; j += inc) 133 | { 134 | mass = body_massd[j]; 135 | 136 | dx = fxd[j] - fx_prevd[j]; 137 | dy = fyd[j] - fy_prevd[j]; 138 | swg_body = sqrtf(dx*dx + dy*dy); 139 | swg_thread += mass * swg_body; 140 | 141 | dx = fxd[j] + fx_prevd[j]; 142 | dy = fyd[j] + fy_prevd[j]; 143 | etra_body = sqrtf(dx*dx + dy*dy) / 2.0; 144 | etra_thread += mass * etra_body; 145 | } 146 | 147 | // reduction in shared memory 148 | sswg[i] = swg_thread; 149 | setra[i] = etra_thread; 150 | 151 | for (j = THREADS1 / 2; j > 0; j /= 2) 152 | { 153 | __syncthreads(); 154 | if (i < j) 155 | { 156 | k = i + j; 157 | sswg[i] = swg_thread = sswg[i] + sswg[k]; 158 | setra[i] = etra_thread = setra[i] + setra[k]; 159 | } 160 | } 161 | 162 | // swg_thread and etra_thread are now the total swinging 163 | // and the total effective traction (accross all threads) 164 | 165 | // write block result to global memory 166 | if (i == 0) 167 | { 168 | k = blockIdx.x; 169 | swgd[k] = swg_thread; 170 | etrad[k] = etra_thread; 171 | __threadfence(); 172 | 173 | inc = gridDim.x - 1; 174 | if (inc == atomicInc(&blkcntd_speed_kernel, inc)) 175 | { 176 | swg_thread = 0; 177 | etra_thread = 0; 178 | 179 | for (j = 0; j <= inc; j++) 180 | { 181 | swg_thread += swgd[j]; 182 | etra_thread += etrad[j]; 183 | } 184 | // we need to do some calculations to derive 185 | // from this the new global speed 186 | float estimated_optimal_jitter_tollerance = 0.05 * sqrtf(nbodiesd); 187 | float minJT = sqrtf(estimated_optimal_jitter_tollerance); 188 | float jt = jitter_toleranced * fmaxf(minJT, 189 | fminf(k_s_maxd, estimated_optimal_jitter_tollerance * etra_thread / powf(nbodiesd, 2.0) 190 | )); 191 | float min_speed_efficiency = 0.05; 192 | 193 | // `Protect against erratic behavior' 194 | if (swg_thread / etra_thread > 2.0) 195 | { 196 | if (speed_efficiencyd > min_speed_efficiency) speed_efficiencyd *= 0.5; 197 | jt = fmaxf(jt, jitter_toleranced); 198 | } 199 | 200 | // `Speed efficiency is how the speed really corrosponds to the swinging vs. convergence tradeoff.' 201 | // `We adjust it slowly and carefully' 202 | float targetSpeed = jt * speed_efficiencyd * etra_thread / swg_thread; 203 | 204 | if (swg_thread > jt * etra_thread) 205 | { 206 | if (speed_efficiencyd > min_speed_efficiency) 207 | { 208 | speed_efficiencyd *= 0.7; 209 | } 210 | } 211 | else if (global_speedd < 1000) 212 | { 213 | speed_efficiencyd *= 1.3; 214 | } 215 | 216 | // `But the speed shouldn't rise much too quickly, ... would make convergence drop dramatically'. 217 | float max_rise = 0.5; 218 | global_speedd += fminf(targetSpeed - global_speedd, max_rise * global_speedd); 219 | } 220 | } 221 | } 222 | 223 | __global__ 224 | __launch_bounds__(THREADS6, FACTOR6) 225 | void DisplacementKernel(int nbodiesd, 226 | volatile float2 * __restrict body_posd, 227 | volatile float * __restrict fxd, volatile float * __restrict fyd, 228 | volatile float * __restrict fx_prevd, volatile float * __restrict fy_prevd) 229 | { 230 | register int i, inc; 231 | register float factor, swg, dx, dy, fx, fy; 232 | register float global_speed = global_speedd; 233 | // iterate over all bodies assigned to thread 234 | inc = blockDim.x * gridDim.x; 235 | for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nbodiesd; i += inc) 236 | { 237 | fx = fxd[i]; 238 | fy = fyd[i]; 239 | dx = fx - fx_prevd[i]; 240 | dy = fy - fy_prevd[i]; 241 | swg = sqrtf(dx*dx + dy*dy); 242 | factor = global_speed / (1.0 + sqrtf(global_speed * swg)); 243 | 244 | body_posd[i].x += fx * factor; 245 | body_posd[i].y += fy * factor; 246 | fx_prevd[i] = fx; 247 | fy_prevd[i] = fy; 248 | fxd[i] = 0.0; 249 | fyd[i] = 0.0; 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /src/RPFA2Kernels.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPFA2Kernels.cuh 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPFA2Kernels_cuh 25 | #define RPFA2Kernels_cuh 26 | 27 | #include "RPBHFA2LaunchParameters.cuh" 28 | 29 | __global__ 30 | __launch_bounds__(THREADS6, FACTOR6) 31 | void GravityKernel(int nbodiesd, const float k_g, const bool strong_gravity, 32 | volatile float * __restrict body_massd, 33 | volatile float2 * __restrict body_posd, 34 | volatile float * __restrict fxd, volatile float * __restrict fyd); 35 | 36 | __global__ 37 | __launch_bounds__(THREADS6, FACTOR6) 38 | void AttractiveForceKernel(int nedgesd, 39 | volatile float2 * __restrict body_posd, 40 | volatile float * __restrict fxd, volatile float * __restrict fyd, 41 | volatile int * __restrict sourcesd, volatile int * __restrict targetsd); 42 | 43 | __global__ 44 | __launch_bounds__(THREADS1, FACTOR1) 45 | void SpeedKernel(int nbodiesd, 46 | volatile float * __restrict fxd , volatile float * __restrict fyd, 47 | volatile float * __restrict fx_prevd , volatile float * __restrict fy_prevd, 48 | volatile float * __restrict body_massd, volatile float * __restrict swgd, volatile float * __restrict etrad); 49 | 50 | __global__ 51 | __launch_bounds__(THREADS6, FACTOR6) 52 | void DisplacementKernel(int nbodiesd, 53 | volatile float2 * __restrict body_posd, 54 | volatile float * __restrict fxd, volatile float * __restrict fyd, 55 | volatile float * __restrict fx_prevd, volatile float * __restrict fy_prevd); 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /src/RPForceAtlas2.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPForceAtlas2.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include "RPForceAtlas2.hpp" 25 | 26 | namespace RPGraph 27 | { 28 | ForceAtlas2::ForceAtlas2(GraphLayout &layout, bool use_barneshut, 29 | bool strong_gravity, float gravity, float scale) 30 | : LayoutAlgorithm(layout), use_barneshut{use_barneshut}, 31 | strong_gravity{strong_gravity} 32 | { 33 | iteration = 0; 34 | 35 | setGravity(gravity); 36 | setScale(scale); 37 | 38 | global_speed = 1.0; 39 | speed_efficiency = 1.0; 40 | jitter_tolerance = 1.0; 41 | 42 | k_s = 0.1; 43 | k_s_max = 10.0; 44 | 45 | theta = 1.0; 46 | epssq = 0.05 * 0.05; 47 | itolsq = 1.0f / (theta * theta); 48 | 49 | delta = 0.0; 50 | 51 | prevent_overlap = false; 52 | use_linlog = false; 53 | 54 | layout.randomizePositions(); 55 | } 56 | 57 | ForceAtlas2::~ForceAtlas2(){}; 58 | 59 | void ForceAtlas2::doSteps(int n) 60 | { 61 | for (int i = 0; i < n; ++i) doStep(); 62 | } 63 | 64 | void ForceAtlas2::setScale(float s) 65 | { 66 | k_r = s; 67 | } 68 | 69 | void ForceAtlas2::setGravity(float g) 70 | { 71 | k_g = g; 72 | } 73 | 74 | float ForceAtlas2::mass(nid_t n) 75 | { 76 | return layout.graph.degree(n) + 1.0; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/RPForceAtlas2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPForceAtlas2.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPForceAtlas2_hpp 25 | #define RPForceAtlas2_hpp 26 | 27 | #include "RPLayoutAlgorithm.hpp" 28 | #include "RPBarnesHutApproximator.hpp" 29 | 30 | namespace RPGraph 31 | { 32 | class ForceAtlas2 : public LayoutAlgorithm 33 | { 34 | public: 35 | ForceAtlas2(GraphLayout &layout, bool use_barneshut, 36 | bool strong_gravity, float gravity, float scale); 37 | ~ForceAtlas2(); 38 | 39 | virtual void doStep() = 0; 40 | void doSteps(int n); 41 | void setScale(float s); 42 | void setGravity(float s); 43 | float mass(nid_t n); 44 | bool prevent_overlap, use_barneshut, use_linlog, strong_gravity; 45 | 46 | protected: 47 | int iteration; 48 | float k_r, k_g; // scalars for repulsive and gravitational force. 49 | float delta; // edgeweight influence. 50 | float global_speed; 51 | 52 | // Parameters used in adaptive temperature 53 | float speed_efficiency, jitter_tolerance; 54 | float k_s, k_s_max; // magic constants related to swinging. 55 | 56 | // Barnes-Hut parameters 57 | float theta; // Accuracy 58 | float epssq; // Softening (Epsilon, squared) 59 | float itolsq; // Inverse tolerance, squared 60 | }; 61 | } 62 | #endif 63 | -------------------------------------------------------------------------------- /src/RPGPUForceAtlas2.cu: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGPUForceAtlas2.cu 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "time.h" 29 | 30 | #include "RPGPUForceAtlas2.hpp" 31 | #include "RPBHFA2LaunchParameters.cuh" 32 | #include "RPBHKernels.cuh" 33 | #include "RPFA2Kernels.cuh" 34 | 35 | namespace RPGraph 36 | { 37 | CUDAForceAtlas2::CUDAForceAtlas2(GraphLayout &layout, bool use_barneshut, 38 | bool strong_gravity, float gravity, 39 | float scale) 40 | : ForceAtlas2(layout, use_barneshut, strong_gravity, gravity, scale) 41 | { 42 | int deviceCount; 43 | cudaGetDeviceCount(&deviceCount); 44 | if (deviceCount == 0) 45 | { 46 | fprintf(stderr, "error: No CUDA devices found.\n"); 47 | exit(EXIT_FAILURE); 48 | } 49 | 50 | // Host initialization and setup // 51 | nbodies = layout.graph.num_nodes(); 52 | nedges = layout.graph.num_edges(); 53 | 54 | body_pos = (float2 *)malloc(sizeof(float2) * layout.graph.num_nodes()); 55 | body_mass = (float *)malloc(sizeof(float) * layout.graph.num_nodes()); 56 | sources = (int *) malloc(sizeof(int) * layout.graph.num_edges()); 57 | targets = (int *) malloc(sizeof(int) * layout.graph.num_edges()); 58 | fx = (float *)malloc(sizeof(float) * layout.graph.num_nodes()); 59 | fy = (float *)malloc(sizeof(float) * layout.graph.num_nodes()); 60 | fx_prev = (float *)malloc(sizeof(float) * layout.graph.num_nodes()); 61 | fy_prev = (float *)malloc(sizeof(float) * layout.graph.num_nodes()); 62 | 63 | for (nid_t n = 0; n < layout.graph.num_nodes(); ++n) 64 | { 65 | body_pos[n] = {layout.getX(n), layout.getY(n)}; 66 | body_mass[n] = ForceAtlas2::mass(n); 67 | fx[n] = 0.0; 68 | fy[n] = 0.0; 69 | fx_prev[n] = 0.0; 70 | fy_prev[n] = 0.0; 71 | } 72 | 73 | int cur_sources_idx = 0; 74 | int cur_targets_idx = 0; 75 | 76 | // Initialize the sources and targets arrays with edge-data. 77 | for (nid_t source_id = 0; source_id < layout.graph.num_nodes(); ++source_id) 78 | { 79 | for (nid_t target_id : layout.graph.neighbors_with_geq_id(source_id)) 80 | { 81 | sources[cur_sources_idx++] = source_id; 82 | targets[cur_targets_idx++] = target_id; 83 | } 84 | } 85 | 86 | // GPU initialization and setup // 87 | cudaDeviceProp deviceProp; 88 | cudaGetDeviceProperties(&deviceProp, 0); 89 | 90 | if (deviceProp.warpSize != WARPSIZE) 91 | { 92 | printf("Warpsize of device is %d, but we anticipated %d\n", deviceProp.warpSize, WARPSIZE); 93 | exit(EXIT_FAILURE); 94 | 95 | } 96 | cudaFuncSetCacheConfig(BoundingBoxKernel, cudaFuncCachePreferShared); 97 | cudaFuncSetCacheConfig(TreeBuildingKernel, cudaFuncCachePreferL1); 98 | cudaFuncSetCacheConfig(ClearKernel1, cudaFuncCachePreferL1); 99 | cudaFuncSetCacheConfig(ClearKernel2, cudaFuncCachePreferL1); 100 | cudaFuncSetCacheConfig(SummarizationKernel, cudaFuncCachePreferShared); 101 | cudaFuncSetCacheConfig(SortKernel, cudaFuncCachePreferL1); 102 | #if __CUDA_ARCH__ < 300 103 | cudaFuncSetCacheConfig(ForceCalculationKernel, cudaFuncCachePreferL1); 104 | #endif 105 | cudaFuncSetCacheConfig(DisplacementKernel, cudaFuncCachePreferL1); 106 | 107 | cudaGetLastError(); // reset error value 108 | 109 | // Allocate space on device. 110 | mp_count = deviceProp.multiProcessorCount; 111 | max_threads_per_block = deviceProp.maxThreadsPerBlock; 112 | 113 | nnodes = std::max(2 * nbodies, mp_count * max_threads_per_block); 114 | 115 | // Round up to next multiple of WARPSIZE 116 | while ((nnodes & (WARPSIZE-1)) != 0) nnodes++; 117 | nnodes--; 118 | 119 | // child stores structure of the quadtree. values point to IDs. 120 | cudaCatchError(cudaMalloc((void **)&childl, sizeof(int) * (nnodes+1) * 4)); 121 | 122 | // the following properties, for each node in the quadtree (both internal and leaf) 123 | cudaCatchError(cudaMalloc((void **)&body_massl, sizeof(float) * nbodies)); 124 | cudaCatchError(cudaMalloc((void **)&node_massl, sizeof(float) * (nnodes+1))); 125 | cudaCatchError(cudaMalloc((void **)&body_posl,sizeof(float2) * nbodies)); 126 | cudaCatchError(cudaMalloc((void **)&node_posl, sizeof(float2) * (nnodes+1))); 127 | // count contains the number of nested nodes for each node in quadtree 128 | cudaCatchError(cudaMalloc((void **)&countl, sizeof(int) * (nnodes+1))); 129 | // start contains ... 130 | cudaCatchError(cudaMalloc((void **)&startl, sizeof(int) * (nnodes+1))); 131 | cudaCatchError(cudaMalloc((void **)&sortl, sizeof(int) * (nnodes+1))); 132 | 133 | 134 | cudaCatchError(cudaMalloc((void **)&sourcesl,sizeof(int) * (nedges))); 135 | cudaCatchError(cudaMalloc((void **)&targetsl,sizeof(int) * (nedges))); 136 | cudaCatchError(cudaMalloc((void **)&fxl, sizeof(float) * (nbodies))); 137 | cudaCatchError(cudaMalloc((void **)&fyl, sizeof(float) * (nbodies))); 138 | cudaCatchError(cudaMalloc((void **)&fx_prevl,sizeof(float) * (nbodies))); 139 | cudaCatchError(cudaMalloc((void **)&fy_prevl,sizeof(float) * (nbodies))); 140 | 141 | // Used for reduction in BoundingBoxKernel 142 | cudaCatchError(cudaMalloc((void **)&maxxl, sizeof(float) * mp_count * FACTOR1)); 143 | cudaCatchError(cudaMalloc((void **)&maxyl, sizeof(float) * mp_count * FACTOR1)); 144 | cudaCatchError(cudaMalloc((void **)&minxl, sizeof(float) * mp_count * FACTOR1)); 145 | cudaCatchError(cudaMalloc((void **)&minyl, sizeof(float) * mp_count * FACTOR1)); 146 | 147 | // Used for reduction in SpeedKernel 148 | cudaCatchError(cudaMalloc((void **)&swgl, sizeof(float) * mp_count * FACTOR1)); 149 | cudaCatchError(cudaMalloc((void **)&etral, sizeof(float) * mp_count * FACTOR1)); 150 | 151 | // Copy host data to device. 152 | cudaCatchError(cudaMemcpy(body_massl, body_mass, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 153 | cudaCatchError(cudaMemcpy(body_posl, body_pos, sizeof(float2) * nbodies, cudaMemcpyHostToDevice)); 154 | cudaCatchError(cudaMemcpy(sourcesl, sources, sizeof(int) * nedges, cudaMemcpyHostToDevice)); 155 | cudaCatchError(cudaMemcpy(targetsl, targets, sizeof(int) * nedges, cudaMemcpyHostToDevice)); 156 | 157 | // cpy fx, fy , fx_prevl, fy_prevl so they are all initialized to 0 in device memory. 158 | cudaCatchError(cudaMemcpy(fxl, fx, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 159 | cudaCatchError(cudaMemcpy(fyl, fy, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 160 | cudaCatchError(cudaMemcpy(fx_prevl, fx_prev, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 161 | cudaCatchError(cudaMemcpy(fy_prevl, fy_prev, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 162 | } 163 | 164 | void CUDAForceAtlas2::freeGPUMemory() 165 | { 166 | cudaFree(childl); 167 | 168 | cudaFree(body_massl); 169 | cudaFree(node_massl); 170 | cudaFree(body_posl); 171 | cudaFree(node_posl); 172 | cudaFree(sourcesl); 173 | cudaFree(targetsl); 174 | cudaFree(countl); 175 | cudaFree(startl); 176 | cudaFree(sortl); 177 | 178 | cudaFree(fxl); 179 | cudaFree(fx_prevl); 180 | cudaFree(fyl); 181 | cudaFree(fy_prevl); 182 | 183 | cudaFree(maxxl); 184 | cudaFree(maxyl); 185 | cudaFree(minxl); 186 | cudaFree(minyl); 187 | 188 | cudaFree(swgl); 189 | cudaFree(etral); 190 | } 191 | 192 | CUDAForceAtlas2::~CUDAForceAtlas2() 193 | { 194 | free(body_mass); 195 | free(body_pos); 196 | free(sources); 197 | free(targets); 198 | free(fx); 199 | free(fy); 200 | free(fx_prev); 201 | free(fy_prev); 202 | 203 | freeGPUMemory(); 204 | } 205 | 206 | void CUDAForceAtlas2::doStep() 207 | { 208 | cudaGetLastError(); // clear any errors 209 | GravityKernel<<>>(nbodies, k_g, strong_gravity, body_massl, body_posl, fxl, fyl); 210 | cudaCatchError(cudaGetLastError()); 211 | 212 | AttractiveForceKernel<<>>(nedges, body_posl, fxl, fyl, sourcesl, targetsl); 213 | cudaCatchError(cudaGetLastError()); 214 | 215 | BoundingBoxKernel<<>>(nnodes, nbodies, startl, childl, node_massl, body_posl, node_posl, maxxl, maxyl, minxl, minyl); 216 | cudaCatchError(cudaGetLastError()); 217 | 218 | // Build Barnes-Hut Tree 219 | // 1.) Set all child pointers of internal nodes (in childl) to null (-1) 220 | ClearKernel1<<>>(nnodes, nbodies, childl); 221 | cudaCatchError(cudaGetLastError()); 222 | // 2.) Build the tree 223 | TreeBuildingKernel<<>>(nnodes, nbodies, childl, body_posl, node_posl); 224 | cudaCatchError(cudaGetLastError()); 225 | // 3.) Set all cell mass values to -1.0, set all startd to null (-1) 226 | ClearKernel2<<>>(nnodes, startl, node_massl); 227 | cudaCatchError(cudaGetLastError()); 228 | 229 | // Recursively compute mass for each BH. cell. 230 | SummarizationKernel<<>>(nnodes, nbodies, countl, childl, body_massl, node_massl, body_posl, node_posl); 231 | cudaCatchError(cudaGetLastError()); 232 | 233 | SortKernel<<>>(nnodes, nbodies, sortl, countl, startl, childl); 234 | cudaCatchError(cudaGetLastError()); 235 | 236 | // Compute repulsive forces between nodes using BH. tree. 237 | ForceCalculationKernel<<>>(nnodes, nbodies, itolsq, epssq, sortl, childl, body_massl, node_massl, body_posl, node_posl, fxl, fyl, k_r); 238 | cudaCatchError(cudaGetLastError()); 239 | 240 | SpeedKernel<<>>(nbodies, fxl, fyl, fx_prevl, fy_prevl, body_massl, swgl, etral); 241 | cudaCatchError(cudaGetLastError()); 242 | 243 | DisplacementKernel<<>>(nbodies, body_posl, fxl, fyl, fx_prevl, fy_prevl); 244 | cudaCatchError(cudaGetLastError()); 245 | 246 | cudaCatchError(cudaDeviceSynchronize()); 247 | iteration++; 248 | } 249 | 250 | void CUDAForceAtlas2::retrieveLayoutFromGPU() 251 | { 252 | cudaCatchError(cudaMemcpy(body_pos, body_posl, sizeof(float2) * nbodies, cudaMemcpyDeviceToHost)); 253 | cudaDeviceSynchronize(); 254 | } 255 | 256 | void CUDAForceAtlas2::sendLayoutToGPU() 257 | { 258 | cudaCatchError(cudaMemcpy(body_posl, body_pos, sizeof(float2) * nbodies, cudaMemcpyHostToDevice)); 259 | cudaDeviceSynchronize(); 260 | } 261 | 262 | void CUDAForceAtlas2::sendGraphToGPU() 263 | { 264 | cudaCatchError(cudaMemcpy(body_massl, body_mass, sizeof(float) * nbodies, cudaMemcpyHostToDevice)); 265 | cudaCatchError(cudaMemcpy(sourcesl, sources, sizeof(int) * nedges, cudaMemcpyHostToDevice)); 266 | cudaCatchError(cudaMemcpy(targetsl, targets, sizeof(int) * nedges, cudaMemcpyHostToDevice)); 267 | cudaDeviceSynchronize(); 268 | } 269 | 270 | void CUDAForceAtlas2::sync_layout() 271 | { 272 | retrieveLayoutFromGPU(); 273 | for(nid_t n = 0; n < layout.graph.num_nodes(); ++n) 274 | { 275 | layout.setX(n, body_pos[n].x); 276 | layout.setY(n, body_pos[n].y); 277 | } 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /src/RPGPUForceAtlas2.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGPUForceAtlas2.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPGPUForceAtlas2_hpp 25 | #define RPGPUForceAtlas2_hpp 26 | #include "RPForceAtlas2.hpp" 27 | 28 | namespace RPGraph 29 | { 30 | class CUDAForceAtlas2: public ForceAtlas2 31 | { 32 | public: 33 | CUDAForceAtlas2(GraphLayout &layout, bool use_barneshut, 34 | bool strong_gravity, float gravity, float scale); 35 | ~CUDAForceAtlas2(); 36 | void doStep() override; 37 | void sync_layout() override; 38 | 39 | private: 40 | /// CUDA Specific stuff. 41 | // Host storage. 42 | float *body_mass; 43 | float2 *body_pos; 44 | float *fx, *fy, *fx_prev, *fy_prev; 45 | 46 | // Quick way to represent a graph on the GPU 47 | int *sources, *targets; 48 | 49 | // Pointers to device memory (all suffixed with 'l'). 50 | int *errl, *sortl, *childl, *countl, *startl; 51 | int *sourcesl, *targetsl; 52 | float *body_massl, *node_massl; 53 | float2 *body_posl, *node_posl; 54 | float *minxl, *minyl, *maxxl, *maxyl; 55 | float *fxl, *fyl, *fx_prevl, *fy_prevl; 56 | float *swgl, *etral; 57 | 58 | int mp_count; // Number of multiprocessors on GPU. 59 | int max_threads_per_block; 60 | int nnodes; 61 | int nbodies; 62 | int nedges; 63 | 64 | void sendGraphToGPU(); 65 | void sendLayoutToGPU(); 66 | void retrieveLayoutFromGPU(); 67 | void freeGPUMemory(); 68 | }; 69 | }; 70 | 71 | 72 | #endif /* RPGPUForceAtlas2_hpp */ 73 | -------------------------------------------------------------------------------- /src/RPGraph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGraph.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "RPGraph.hpp" 31 | 32 | namespace RPGraph 33 | { 34 | /* Definitions for UGraph */ 35 | UGraph::UGraph(std::string edgelist_path) 36 | { 37 | node_count = 0; 38 | edge_count = 0; 39 | 40 | std::fstream edgelist_file(edgelist_path, std::ifstream::in); 41 | 42 | std::string line; 43 | while(std::getline(edgelist_file, line)) 44 | { 45 | // Skip any comments 46 | if(line[0] == '#') continue; 47 | 48 | // Read source and target from file 49 | nid_t s, t; 50 | std::istringstream(line) >> s >> t; 51 | 52 | if(s != t and !has_edge(s, t)) add_edge(s, t); 53 | } 54 | 55 | edgelist_file.close(); 56 | } 57 | 58 | bool UGraph::has_node(nid_t nid) 59 | { 60 | return node_map.count(nid) > 0; 61 | } 62 | 63 | bool UGraph::has_edge(nid_t s, nid_t t) 64 | { 65 | if(!has_node(s) or !has_node(t)) return false; 66 | 67 | nid_t s_mapped = node_map[s]; 68 | nid_t t_mapped = node_map[t]; 69 | 70 | if(adjacency_list.count(std::min(s_mapped, t_mapped)) == 0) return false; 71 | 72 | std::vector neighbors = adjacency_list[std::min(s_mapped, t_mapped)]; 73 | if(std::find(neighbors.begin(), neighbors.end(), std::max(s_mapped, t_mapped)) == neighbors.end()) 74 | return false; 75 | else 76 | return true; 77 | } 78 | 79 | void UGraph::add_node(nid_t nid) 80 | { 81 | if(!has_node(nid)) 82 | { 83 | node_map[nid] = node_count; 84 | node_map_r[node_count] = nid; 85 | node_count++; 86 | } 87 | } 88 | 89 | void UGraph::add_edge(nid_t s, nid_t t) 90 | { 91 | if(has_edge(s, t)) return; 92 | if(!has_node(s)) add_node(s); 93 | if(!has_node(t)) add_node(t); 94 | nid_t s_mapped = node_map[s]; 95 | nid_t t_mapped = node_map[t]; 96 | 97 | // Insert edge into adjacency_list 98 | adjacency_list[std::min(s_mapped, t_mapped)].push_back(std::max(s_mapped, t_mapped)); 99 | degrees[s_mapped] += 1; 100 | degrees[t_mapped] += 1; 101 | edge_count++; 102 | } 103 | 104 | nid_t UGraph::num_nodes() 105 | { 106 | return node_count; 107 | } 108 | 109 | nid_t UGraph::num_edges() 110 | { 111 | return edge_count; 112 | } 113 | 114 | nid_t UGraph::degree(nid_t nid) 115 | { 116 | return degrees[nid]; 117 | } 118 | 119 | nid_t UGraph::in_degree(nid_t nid) 120 | { 121 | return degree(nid); 122 | } 123 | 124 | nid_t UGraph::out_degree(nid_t nid) 125 | { 126 | return degree(nid); 127 | } 128 | std::vector UGraph::neighbors_with_geq_id(nid_t nid) 129 | { 130 | return adjacency_list[nid]; 131 | } 132 | 133 | /* Definitions for CSRUGraph */ 134 | 135 | // CSRUGraph represents an undirected graph using a 136 | // compressed sparse row (CSR) datastructure. 137 | CSRUGraph::CSRUGraph(nid_t num_nodes, nid_t num_edges) 138 | { 139 | // `edges' is a concatenation of all edgelists 140 | // `offsets' contains offset (in `edges`) for each nodes' edgelist. 141 | // `nid_to_offset` maps nid to index to be used in `offset' 142 | 143 | // e.g. the edgelist of node with id `nid' starts at 144 | // edges[offsets[nid_to_offset[nid]]] and ends at edges[offsets[nid_to_offset[nid]] + 1] 145 | // (left bound inclusive right bound exclusive) 146 | 147 | edge_count = num_edges; // num_edges counts each bi-directional edge once. 148 | node_count = num_nodes; 149 | edges = (nid_t *) malloc(sizeof(nid_t) * 2 * edge_count); 150 | offsets = (nid_t *) malloc(sizeof(nid_t) * node_count); 151 | offset_to_nid = (nid_t *) malloc(sizeof(nid_t) * node_count); 152 | 153 | // Create a map from original ids to ids used throughout CSRUGraph 154 | nid_to_offset.reserve(node_count); 155 | 156 | first_free_id = 0; 157 | edges_seen = 0; 158 | } 159 | 160 | CSRUGraph::~CSRUGraph() 161 | { 162 | free(edges); 163 | free(offsets); 164 | free(offset_to_nid); 165 | } 166 | 167 | void CSRUGraph::insert_node(nid_t node_id, std::vector nbr_ids) 168 | { 169 | nid_t source_id_old = node_id; 170 | nid_t source_id_new = first_free_id; 171 | nid_to_offset[source_id_old] = first_free_id; 172 | offset_to_nid[first_free_id] = source_id_old; 173 | first_free_id++; 174 | 175 | offsets[source_id_new] = edges_seen; 176 | for (auto nbr_id : nbr_ids) 177 | { 178 | nid_t dest_id_old = nbr_id; 179 | edges[edges_seen] = dest_id_old; 180 | edges_seen++; 181 | } 182 | } 183 | 184 | void CSRUGraph::fix_edge_ids() 185 | { 186 | for (eid_t ei = 0; ei < 2*edge_count; ei++) 187 | { 188 | edges[ei] = nid_to_offset[edges[ei]]; 189 | } 190 | } 191 | 192 | nid_t CSRUGraph::degree(nid_t nid) 193 | { 194 | // If nid is last element of `offsets'... we prevent out of bounds. 195 | nid_t r_bound; 196 | if (nid < node_count - 1) r_bound = offsets[nid+1]; 197 | else r_bound = edge_count * 2; 198 | nid_t l_bound = offsets[nid]; 199 | return (r_bound - l_bound); 200 | } 201 | 202 | nid_t CSRUGraph::out_degree(nid_t nid) 203 | { 204 | return degree(nid); 205 | } 206 | 207 | nid_t CSRUGraph::in_degree(nid_t nid) 208 | { 209 | return degree(nid); 210 | } 211 | 212 | nid_t CSRUGraph::nbr_id_for_node(nid_t nid, nid_t edge_no) 213 | { 214 | return edges[offsets[nid] + edge_no]; 215 | } 216 | nid_t CSRUGraph::num_nodes() 217 | { 218 | return node_count; 219 | } 220 | 221 | nid_t CSRUGraph::num_edges() 222 | { 223 | return edge_count; 224 | } 225 | }; 226 | -------------------------------------------------------------------------------- /src/RPGraph.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGraph.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | 25 | #ifndef RPGraph_hpp 26 | #define RPGraph_hpp 27 | #include 28 | #include 29 | #include 30 | 31 | namespace RPGraph 32 | { 33 | // Type to represent node IDs. 34 | // NOTE: we limit to 4,294,967,296 nodes through uint32_t. 35 | typedef uint32_t nid_t; 36 | 37 | // Type to represent edge IDs. 38 | // NOTE: uint32_t limits density to 50% for directed graphs. 39 | typedef uint32_t eid_t; 40 | 41 | // Virtual base class to derive different Graph types from. 42 | class Graph 43 | { 44 | public: 45 | virtual nid_t num_nodes() = 0; 46 | virtual nid_t num_edges() = 0; 47 | virtual nid_t degree(nid_t nid) = 0; 48 | virtual nid_t in_degree(nid_t nid) = 0; 49 | virtual nid_t out_degree(nid_t nid) = 0; 50 | virtual std::vector neighbors_with_geq_id(nid_t nid) = 0; 51 | 52 | }; 53 | 54 | // Very basic (adjacency list) representation of an undirected graph. 55 | class UGraph : public Graph 56 | { 57 | private: 58 | nid_t node_count, edge_count; 59 | std::unordered_map degrees; 60 | std::unordered_map> adjacency_list; 61 | 62 | bool has_node(nid_t nid); 63 | bool has_edge(nid_t s, nid_t t); 64 | void add_node(nid_t nid); 65 | void add_edge(nid_t s, nid_t t); 66 | 67 | public: 68 | // Construct UGraph from edgelist. IDs in edgelist are mapped to 69 | // [0, 1, ..., num_nodes-1]. Removes any self-edges. 70 | UGraph(std::string edgelist_path); 71 | std::unordered_map node_map; // el id -> UGraph id 72 | std::unordered_map node_map_r; // UGraph id -> el id 73 | 74 | virtual nid_t num_nodes() override; 75 | virtual nid_t num_edges() override; 76 | virtual nid_t degree(nid_t nid) override; 77 | virtual nid_t in_degree(nid_t nid) override; 78 | virtual nid_t out_degree(nid_t nid) override; 79 | 80 | std::vector neighbors_with_geq_id(nid_t nid) override; 81 | }; 82 | 83 | // Compressed sparserow (CSR) for undirected graphs. 84 | class CSRUGraph : public Graph 85 | { 86 | private: 87 | nid_t *edges; // All edgelists, concatenated. 88 | nid_t *offsets; // For each node, into edges. 89 | nid_t node_count, edge_count; 90 | nid_t first_free_id, edges_seen; 91 | 92 | public: 93 | std::unordered_map nid_to_offset; 94 | nid_t *offset_to_nid; 95 | 96 | CSRUGraph(nid_t num_nodes, nid_t num_edges); 97 | ~CSRUGraph(); 98 | 99 | /// Inserts node_id and its edges. Once inserted, edges 100 | /// can't be altered for this node. 101 | void insert_node(nid_t node_id, std::vector nbr_ids); 102 | void fix_edge_ids(); // this should go... 103 | 104 | virtual nid_t num_nodes() override; 105 | virtual nid_t num_edges() override; 106 | virtual nid_t degree(nid_t nid) override; 107 | virtual nid_t in_degree(nid_t nid) override; 108 | virtual nid_t out_degree(nid_t nid) override; 109 | 110 | nid_t nbr_id_for_node(nid_t nid, nid_t nbr_no); 111 | }; 112 | } 113 | 114 | #endif /* Graph_h */ 115 | -------------------------------------------------------------------------------- /src/RPGraphLayout.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGraphLayout.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | 25 | #include "RPGraphLayout.hpp" 26 | #include "../lib/pngwriter/src/pngwriter.h" 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | namespace RPGraph 33 | { 34 | GraphLayout::GraphLayout(UGraph &graph, float width, float height) 35 | : graph(graph), width(width), height(height) 36 | { 37 | coordinates = (Coordinate *) malloc(graph.num_nodes() * sizeof(Coordinate)); 38 | } 39 | 40 | GraphLayout::~GraphLayout() 41 | { 42 | free(coordinates); 43 | } 44 | 45 | void GraphLayout::randomizePositions() 46 | { 47 | for (nid_t i = 0; i < graph.num_nodes(); ++i) 48 | { 49 | setX(i, get_random(-width/2.0, width/2.0)); 50 | setY(i, get_random(-height/2.0, height/2.0)); 51 | } 52 | } 53 | 54 | float GraphLayout::getX(nid_t node_id) 55 | { 56 | return coordinates[node_id].x; 57 | } 58 | 59 | float GraphLayout::getY(nid_t node_id) 60 | { 61 | return coordinates[node_id].y; 62 | } 63 | 64 | float GraphLayout::minX() 65 | { 66 | float minX = std::numeric_limits::max(); 67 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 68 | if (getX(n) < minX) minX = getX(n); 69 | return minX; 70 | } 71 | 72 | float GraphLayout::maxX() 73 | { 74 | float maxX = std::numeric_limits::min(); 75 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 76 | if (getX(n) > maxX) maxX = getX(n); 77 | return maxX; 78 | } 79 | 80 | float GraphLayout::minY() 81 | { 82 | float minY = std::numeric_limits::max(); 83 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 84 | if (getY(n) < minY) minY = getY(n); 85 | return minY; 86 | } 87 | 88 | float GraphLayout::maxY() 89 | { 90 | float maxY = std::numeric_limits::min(); 91 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 92 | if (getY(n) > maxY) maxY = getY(n); 93 | return maxY; 94 | } 95 | 96 | float GraphLayout::getXRange() 97 | { 98 | return maxX()- minX(); 99 | } 100 | 101 | float GraphLayout::getYRange() 102 | { 103 | return maxY() - minY(); 104 | } 105 | 106 | float GraphLayout::getSpan() 107 | { 108 | return ceil(fmaxf(getXRange(), getYRange())); 109 | } 110 | 111 | float GraphLayout::getDistance(nid_t n1, nid_t n2) 112 | { 113 | const float dx = getX(n1)-getX(n2); 114 | const float dy = getY(n1)-getY(n2); 115 | return std::sqrt(dx*dx + dy*dy); 116 | } 117 | 118 | Real2DVector GraphLayout::getDistanceVector(nid_t n1, nid_t n2) 119 | { 120 | return Real2DVector(getX(n2) - getX(n1), getY(n2) - getY(n1)); 121 | } 122 | 123 | Real2DVector GraphLayout::getNormalizedDistanceVector(nid_t n1, nid_t n2) 124 | { 125 | const float x1 = getX(n1); 126 | const float x2 = getX(n2); 127 | const float y1 = getY(n1); 128 | const float y2 = getY(n2); 129 | const float dx = x2 - x1; 130 | const float dy = y2 - y1; 131 | const float len = std::sqrt(dx*dx + dy*dy); 132 | 133 | return Real2DVector(dx / len, dy / len); 134 | } 135 | 136 | Coordinate GraphLayout::getCoordinate(nid_t node_id) 137 | { 138 | return coordinates[node_id]; 139 | } 140 | 141 | Coordinate GraphLayout::getCenter() 142 | { 143 | float x = minX() + getXRange()/2.0; 144 | float y = minY() + getYRange()/2.0; 145 | return Coordinate(x, y); 146 | } 147 | 148 | void GraphLayout::setX(nid_t node_id, float x_value) 149 | { 150 | coordinates[node_id].x = x_value; 151 | } 152 | 153 | void GraphLayout::setY(nid_t node_id, float y_value) 154 | { 155 | coordinates[node_id].y = y_value; 156 | } 157 | 158 | void GraphLayout::moveNode(nid_t n, RPGraph::Real2DVector v) 159 | { 160 | setX(n, getX(n) + v.x); 161 | setY(n, getY(n) + v.y); 162 | } 163 | 164 | void GraphLayout::setCoordinates(nid_t node_id, Coordinate c) 165 | { 166 | setX(node_id, c.x); 167 | setY(node_id, c.y); 168 | } 169 | 170 | void GraphLayout::writeToPNG(const int image_w, const int image_h, 171 | std::string path) 172 | { 173 | const float xRange = getXRange(); 174 | const float yRange = getYRange(); 175 | const RPGraph::Coordinate center = getCenter(); 176 | const float xCenter = center.x; 177 | const float yCenter = center.y; 178 | const float minX = xCenter - xRange/2.0; 179 | const float minY = yCenter - yRange/2.0; 180 | const float xScale = image_w/xRange; 181 | const float yScale = image_h/yRange; 182 | 183 | // Here we need to do some guessing as to what the optimal 184 | // opacity of nodes and edges might be, given network size. 185 | const float node_opacity = 10000.0 / graph.num_nodes(); 186 | const float edge_opacity = 100000.0 / graph.num_edges(); 187 | 188 | // Write to file. 189 | pngwriter layout_png(image_w, image_h, 0, path.c_str()); 190 | layout_png.invert(); // set bg. to white 191 | 192 | for (nid_t n1 = 0; n1 < graph.num_nodes(); ++n1) 193 | { 194 | // Plot node, 195 | layout_png.filledcircle_blend((getX(n1) - minX)*xScale, 196 | (getY(n1) - minY)*yScale, 197 | 3, node_opacity, 0, 0, 0); 198 | for (nid_t n2 : graph.neighbors_with_geq_id(n1)) { 199 | // ... and edge. 200 | layout_png.line_blend((getX(n1) - minX)*xScale, (getY(n1) - minY)*yScale, 201 | (getX(n2) - minX)*xScale, (getY(n2) - minY)*yScale, 202 | edge_opacity, 0, 0, 0); 203 | } 204 | } 205 | // Write it to disk. 206 | layout_png.write_png(); 207 | } 208 | 209 | void GraphLayout::writeToCSV(std::string path) 210 | { 211 | if (is_file_exists(path.c_str())) 212 | { 213 | printf("Error: File exists at %s\n", path.c_str()); 214 | exit(EXIT_FAILURE); 215 | } 216 | 217 | std::ofstream out_file(path); 218 | 219 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 220 | { 221 | nid_t id = graph.node_map_r[n]; // id as found in edgelist 222 | out_file << id << "," << getX(n) << "," << getY(n) << "\n"; 223 | } 224 | 225 | out_file.close(); 226 | } 227 | 228 | void GraphLayout::writeToBin(std::string path) 229 | { 230 | if (is_file_exists(path.c_str())) 231 | { 232 | printf("Error: File exists at %s\n", path.c_str()); 233 | exit(EXIT_FAILURE); 234 | } 235 | 236 | std::ofstream out_file(path, std::ofstream::binary); 237 | 238 | for (nid_t n = 0; n < graph.num_nodes(); ++n) 239 | { 240 | nid_t id = graph.node_map_r[n]; // id as found in edgelist 241 | float x = getX(n); 242 | float y = getY(n); 243 | 244 | out_file.write(reinterpret_cast(&id), sizeof(id)); 245 | out_file.write(reinterpret_cast(&x), sizeof(x)); 246 | out_file.write(reinterpret_cast(&y), sizeof(y)); 247 | } 248 | 249 | out_file.close(); 250 | } 251 | 252 | } 253 | -------------------------------------------------------------------------------- /src/RPGraphLayout.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPGraphLayout.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPGraphLayout_hpp 25 | #define RPGraphLayout_hpp 26 | 27 | #include "RPGraph.hpp" 28 | #include "RPCommon.hpp" 29 | #include 30 | 31 | namespace RPGraph 32 | { 33 | class GraphLayout 34 | { 35 | private: 36 | Coordinate *coordinates; 37 | 38 | protected: 39 | float width, height; 40 | float minX(), minY(), maxX(), maxY(); 41 | 42 | public: 43 | GraphLayout(RPGraph::UGraph &graph, 44 | float width = 10000, float height = 10000); 45 | ~GraphLayout(); 46 | 47 | UGraph &graph; // to lay-out 48 | 49 | // randomize the layout position of all nodes. 50 | void randomizePositions(); 51 | 52 | float getX(nid_t node_id), getY(nid_t node_id); 53 | float getXRange(), getYRange(), getSpan(); 54 | float getDistance(nid_t n1, nid_t n2); 55 | Real2DVector getDistanceVector(nid_t n1, nid_t n2); 56 | Real2DVector getNormalizedDistanceVector(nid_t n1, nid_t n2); 57 | Coordinate getCoordinate(nid_t node_id); 58 | Coordinate getCenter(); 59 | 60 | 61 | void setX(nid_t node_id, float x_value), setY(nid_t node_id, float y_value); 62 | void moveNode(nid_t, Real2DVector v); 63 | void setCoordinates(nid_t node_id, Coordinate c); 64 | void writeToPNG(const int image_w, const int image_h, std::string path); 65 | void writeToCSV(std::string path); 66 | void writeToBin(std::string path); 67 | }; 68 | } 69 | 70 | #endif /* RPGraphLayout_hpp */ 71 | -------------------------------------------------------------------------------- /src/RPLayoutAlgorithm.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPLayoutAlgorithm.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #include "RPLayoutAlgorithm.hpp" 25 | 26 | namespace RPGraph 27 | { 28 | LayoutAlgorithm::LayoutAlgorithm(GraphLayout &layout): layout(layout){} 29 | LayoutAlgorithm::~LayoutAlgorithm(){} 30 | } 31 | -------------------------------------------------------------------------------- /src/RPLayoutAlgorithm.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | RPLayoutAlgorithm.hpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ============================================================================== 22 | */ 23 | 24 | #ifndef RPLayoutAlgorithm_hpp 25 | #define RPLayoutAlgorithm_hpp 26 | 27 | #include "RPGraphLayout.hpp" 28 | 29 | namespace RPGraph 30 | { 31 | class LayoutAlgorithm 32 | { 33 | public: 34 | LayoutAlgorithm(GraphLayout &layout); 35 | ~LayoutAlgorithm(); 36 | GraphLayout &layout; 37 | 38 | virtual void sync_layout() = 0; // write current layout to `layout'. 39 | }; 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/graph_viewer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | graph_viewer.cpp 5 | Copyright © 2016, 2017, 2018 G. Brinkmann 6 | 7 | This file is part of graph_viewer. 8 | 9 | graph_viewer is free software: you can redistribute it and/or modify 10 | it under the terms of version 3 of the GNU Affero General Public License as 11 | published by the Free Software Foundation. 12 | 13 | graph_viewer is distributed in the hope that it will be useful, 14 | but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | GNU Affero General Public License for more details. 17 | 18 | You should have received a copy of the GNU Affero General Public License 19 | along with graph_viewer. If not, see . 20 | 21 | ------------------------------------------------------------------------------- 22 | 23 | This code was written as part of a research project at the Leiden Institute of 24 | Advanced Computer Science (www.liacs.nl). For other resources related to this 25 | project, see https://liacs.leidenuniv.nl/~takesfw/GPUNetworkVis/. 26 | 27 | ============================================================================== 28 | */ 29 | 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "RPCommon.hpp" 37 | #include "RPGraph.hpp" 38 | #include "RPGraphLayout.hpp" 39 | #include "RPCPUForceAtlas2.hpp" 40 | 41 | #ifdef __NVCC__ 42 | #include 43 | #include "RPGPUForceAtlas2.hpp" 44 | #endif 45 | 46 | int main(int argc, const char **argv) 47 | { 48 | // For reproducibility. 49 | srandom(1234); 50 | 51 | // Parse commandline arguments 52 | if (argc < 10 or (std::string(argv[10]) == "png" and argc < 12)) 53 | { 54 | fprintf(stderr, "Usage: graph_viewer gpu|cpu max_iterations num_snaps sg|wg scale gravity exact|approximate edgelist_path out_path [png image_w image_h|csv|bin]\n"); 55 | exit(EXIT_FAILURE); 56 | } 57 | 58 | const bool cuda_requested = std::string(argv[1]) == "gpu" or std::string(argv[1]) == "cuda"; 59 | const int max_iterations = std::stoi(argv[2]); 60 | const int num_screenshots = std::stoi(argv[3]); 61 | const bool strong_gravity = std::string(argv[4]) == "sg"; 62 | const float scale = std::stof(argv[5]); 63 | const float gravity = std::stof(argv[6]); 64 | const bool approximate = std::string(argv[7]) == "approximate"; 65 | std::string edgelist_path = argv[8]; 66 | std::string out_path = argv[9]; 67 | std::string out_format = "png"; 68 | int image_w = 1250; 69 | int image_h = 1250; 70 | 71 | for (int arg_no = 10; arg_no < argc; arg_no++) 72 | { 73 | if(std::string(argv[arg_no]) == "png") 74 | { 75 | out_format = "png"; 76 | image_w = std::stoi(argv[arg_no+1]); 77 | image_h = std::stoi(argv[arg_no+2]); 78 | arg_no += 2; 79 | } 80 | 81 | else if(std::string(argv[arg_no]) == "csv") 82 | { 83 | out_format = "csv"; 84 | } 85 | 86 | else if(std::string(argv[arg_no]) == "bin") 87 | { 88 | out_format = "bin"; 89 | } 90 | } 91 | 92 | 93 | if(cuda_requested and not approximate) 94 | { 95 | fprintf(stderr, "error: The CUDA implementation (currently) requires Barnes-Hut approximation.\n"); 96 | exit(EXIT_FAILURE); 97 | } 98 | 99 | // Check in_path and out_path 100 | if (!is_file_exists(edgelist_path)) 101 | { 102 | fprintf(stderr, "error: No edgelist at %s\n", edgelist_path.c_str()); 103 | exit(EXIT_FAILURE); 104 | } 105 | if (!is_file_exists(out_path)) 106 | { 107 | fprintf(stderr, "error: No output folder at %s\n", out_path.c_str()); 108 | exit(EXIT_FAILURE); 109 | } 110 | 111 | // If not compiled with cuda support, check if cuda is requested. 112 | #ifndef __NVCC__ 113 | if(cuda_requested) 114 | { 115 | fprintf(stderr, "error: CUDA was requested, but not compiled for.\n"); 116 | exit(EXIT_FAILURE); 117 | } 118 | #endif 119 | 120 | // Load graph. 121 | printf("Loading edgelist at '%s'...", edgelist_path.c_str()); 122 | fflush(stdout); 123 | RPGraph::UGraph graph = RPGraph::UGraph(edgelist_path); 124 | printf("done.\n"); 125 | printf(" fetched %d nodes and %d edges.\n", graph.num_nodes(), graph.num_edges()); 126 | 127 | // Create the GraphLayout and ForceAtlas2 objects. 128 | RPGraph::GraphLayout layout(graph); 129 | RPGraph::ForceAtlas2 *fa2; 130 | #ifdef __NVCC__ 131 | if(cuda_requested) 132 | fa2 = new RPGraph::CUDAForceAtlas2(layout, approximate, 133 | strong_gravity, gravity, scale); 134 | else 135 | #endif 136 | fa2 = new RPGraph::CPUForceAtlas2(layout, approximate, 137 | strong_gravity, gravity, scale); 138 | 139 | printf("Started Layout algorithm...\n"); 140 | const int snap_period = ceil((float)max_iterations/num_screenshots); 141 | const int print_period = ceil((float)max_iterations*0.05); 142 | 143 | for (int iteration = 1; iteration <= max_iterations; ++iteration) 144 | { 145 | fa2->doStep(); 146 | // If we need to, write the result to a png 147 | if (num_screenshots > 0 && (iteration % snap_period == 0 || iteration == max_iterations)) 148 | { 149 | // Determine output filename 150 | std::string edgelist_basename = basename(edgelist_path); 151 | std::string out_filename = edgelist_basename + "_" + std::to_string(iteration) + "." + out_format; 152 | std::string out_filepath = out_path + "/" + out_filename; 153 | printf("Starting iteration %d (%.2f%%), writing %s...", iteration, 100*(float)iteration/max_iterations, out_format.c_str()); 154 | fflush(stdout); 155 | fa2->sync_layout(); 156 | 157 | if (out_format == "png") 158 | layout.writeToPNG(image_w, image_h, out_filepath); 159 | else if (out_format == "csv") 160 | layout.writeToCSV(out_filepath); 161 | else if (out_format == "bin") 162 | layout.writeToBin(out_filepath); 163 | 164 | printf("done.\n"); 165 | } 166 | 167 | // Else we print (if we need to) 168 | else if (iteration % print_period == 0) 169 | { 170 | printf("Starting iteration %d (%.2f%%).\n", iteration, 100*(float)iteration/max_iterations); 171 | } 172 | } 173 | 174 | delete fa2; 175 | exit(EXIT_SUCCESS); 176 | } 177 | --------------------------------------------------------------------------------