├── .github
    └── workflows
    │   ├── ccpp.yml
    │   └── validate-citation-cff.yml
├── .gitignore
├── .gitmodules
├── CITATION.cff
├── COPYING
├── LICENSE
├── README.md
├── builds
    └── linux
    │   └── Makefile
└── src
    ├── RPBHFA2LaunchParameters.cuh
    ├── RPBHKernels.cu
    ├── RPBHKernels.cuh
    ├── RPBarnesHutApproximator.cpp
    ├── RPBarnesHutApproximator.hpp
    ├── RPCPUForceAtlas2.cpp
    ├── RPCPUForceAtlas2.hpp
    ├── RPCommon.cpp
    ├── RPCommon.hpp
    ├── RPFA2Kernels.cu
    ├── RPFA2Kernels.cuh
    ├── RPForceAtlas2.cpp
    ├── RPForceAtlas2.hpp
    ├── RPGPUForceAtlas2.cu
    ├── RPGPUForceAtlas2.hpp
    ├── RPGraph.cpp
    ├── RPGraph.hpp
    ├── RPGraphLayout.cpp
    ├── RPGraphLayout.hpp
    ├── RPLayoutAlgorithm.cpp
    ├── RPLayoutAlgorithm.hpp
    └── graph_viewer.cpp


/.github/workflows/ccpp.yml:
--------------------------------------------------------------------------------
 1 | name: compile CPU code
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     strategy:
 8 |       matrix:
 9 |         os: [ubuntu-18.04, ubuntu-20.04, ubuntu-22.04]
10 |     runs-on: ${{ matrix.os }}
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |       with:
14 |         submodules: true
15 |     - name: make (without CUDA support)
16 |       run: make graph_viewer CUDA_SUPPORT=0
17 |       working-directory: ./builds/linux
18 | 


--------------------------------------------------------------------------------
/.github/workflows/validate-citation-cff.yml:
--------------------------------------------------------------------------------
 1 | name: "Validate CITATION.cff"
 2 | 
 3 | on:
 4 |   push:
 5 |     paths:
 6 |       - "CITATION.cff"
 7 | jobs:
 8 |   validate:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: "Checkout repository"
12 |         uses: actions/checkout@v3
13 | 
14 |       - name: "Run validation"
15 |         uses: "citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084"
16 |         with:
17 |           args: "--validate"
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | *.swp
 4 | 
 5 | *.trace
 6 | *.o
 7 | *.out
 8 | # Xcode
 9 | #
10 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
11 | 
12 | ## Build generated
13 | build/
14 | DerivedData/
15 | 
16 | ## Various settings
17 | *.pbxuser
18 | !default.pbxuser
19 | *.mode1v3
20 | !default.mode1v3
21 | *.mode2v3
22 | !default.mode2v3
23 | *.perspectivev3
24 | !default.perspectivev3
25 | xcuserdata/
26 | 
27 | ## Other
28 | *.moved-aside
29 | *.xccheckout
30 | *.xcscmblueprint
31 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "lib/pngwriter"]
2 | 	path = lib/pngwriter
3 | 	url = https://github.com/pngwriter/pngwriter.git
4 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "Please use the conference paper under 'preferred-citation' to cite this project."
 3 | title: "GPUGraphLayout"
 4 | authors:
 5 |   - family-names: "Brinkmann"
 6 |     given-names: "Govert G."
 7 |     orcid: "https://orcid.org/0000-0002-2713-213X"
 8 | date-released: 2017-09-18
 9 | repository: "https://github.com/govertb/GPUGraphLayout"
10 | identifiers:
11 |   - description: "Collection of archived snapshots of GPUGraphLayout"
12 |     type: doi
13 |     value: 10.5281/zenodo.6334692
14 | keywords:
15 |   - "visualization"
16 |   - "graph-algorithms"
17 |   - "cuda"
18 |   - "gephi"
19 |   - "social-network-analysis"
20 |   - "forceatlas2"
21 |   - "graph-layout"
22 | license: "AGPL-3.0"
23 | preferred-citation:
24 |   type: conference-paper
25 |   title: "Exploiting GPUs for Fast Force-Directed Visualization of Large-Scale Networks"
26 |   journal: "46th International Conference on Parallel Processing"
27 |   year: 2017
28 |   month: 9
29 |   start: 382
30 |   end: 391
31 |   date-released: 2017-09-07
32 |   languages:
33 |     - "en"
34 |   authors:
35 |     - family-names: "Brinkmann"
36 |       given-names: "Govert G."
37 |       affiliation: "Leiden Institute of Advanced Computer Science (LIACS)"
38 |       orcid: "https://orcid.org/0000-0002-2713-213X"
39 |     - family-names: "Rietveld"
40 |       given-names: "Kristian F. D."
41 |       affiliation: "Leiden Institute of Advanced Computer Science (LIACS)"
42 |       orcid: "https://orcid.org/0000-0003-0455-3430"
43 |     - family-names: "Takes"
44 |       given-names: "Frank W."
45 |       affiliation: "Leiden Institute of Advanced Computer Science (LIACS)"
46 |       orcid: "https://orcid.org/0000-0001-5468-1030"
47 |   institution:
48 |     name: "Leiden Institute of Advanced Computer Science"
49 |     alias: "LIACS"
50 |     address: "Niels Bohrweg 2"
51 |     post-code: "2333 CA"
52 |     city: "Leiden"
53 |     country: "NL"
54 |     website: "https://liacs.leidenuniv.nl"
55 |   conference:
56 |     name: "46th International Conference on Parallel Processing"
57 |     alias: "ICPP 2017"
58 |     country: "GB"
59 |     city: "Bristol"
60 |     date-start: 2017-08-14
61 |     date-end: 2017-08-17
62 |     website: "https://www.icpp-conf.org/2017/"
63 |   publisher:
64 |     name: "IEEE"
65 |     website: "https://ieee.org"
66 |   copyright: "© 2017 IEEE"
67 |   doi: "10.1109/ICPP.2017.47"
68 |   url: "https://doi.org/10.1109/ICPP.2017.47"
69 |   isbn: "9781538610428"
70 |   issn: "2332-5690"
71 |   keywords:
72 |   - "Network visualization"
73 |   - "force-directed graph layout"
74 |   - "large-scale networks"
75 |   - "parallel programming"
76 |   - "CUDA"
77 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
  1 |                     GNU AFFERO GENERAL PUBLIC LICENSE
  2 |                        Version 3, 19 November 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU Affero General Public License is a free, copyleft license for
 11 | software and other kinds of works, specifically designed to ensure
 12 | cooperation with the community in the case of network server software.
 13 | 
 14 |   The licenses for most software and other practical works are designed
 15 | to take away your freedom to share and change the works.  By contrast,
 16 | our General Public Licenses are intended to guarantee your freedom to
 17 | share and change all versions of a program--to make sure it remains free
 18 | software for all its users.
 19 | 
 20 |   When we speak of free software, we are referring to freedom, not
 21 | price.  Our General Public Licenses are designed to make sure that you
 22 | have the freedom to distribute copies of free software (and charge for
 23 | them if you wish), that you receive source code or can get it if you
 24 | want it, that you can change the software or use pieces of it in new
 25 | free programs, and that you know you can do these things.
 26 | 
 27 |   Developers that use our General Public Licenses protect your rights
 28 | with two steps: (1) assert copyright on the software, and (2) offer
 29 | you this License which gives you legal permission to copy, distribute
 30 | and/or modify the software.
 31 | 
 32 |   A secondary benefit of defending all users' freedom is that
 33 | improvements made in alternate versions of the program, if they
 34 | receive widespread use, become available for other developers to
 35 | incorporate.  Many developers of free software are heartened and
 36 | encouraged by the resulting cooperation.  However, in the case of
 37 | software used on network servers, this result may fail to come about.
 38 | The GNU General Public License permits making a modified version and
 39 | letting the public access it on a server without ever releasing its
 40 | source code to the public.
 41 | 
 42 |   The GNU Affero General Public License is designed specifically to
 43 | ensure that, in such cases, the modified source code becomes available
 44 | to the community.  It requires the operator of a network server to
 45 | provide the source code of the modified version running there to the
 46 | users of that server.  Therefore, public use of a modified version, on
 47 | a publicly accessible server, gives the public access to the source
 48 | code of the modified version.
 49 | 
 50 |   An older license, called the Affero General Public License and
 51 | published by Affero, was designed to accomplish similar goals.  This is
 52 | a different license, not a version of the Affero GPL, but Affero has
 53 | released a new version of the Affero GPL which permits relicensing under
 54 | this license.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                        TERMS AND CONDITIONS
 60 | 
 61 |   0. Definitions.
 62 | 
 63 |   "This License" refers to version 3 of the GNU Affero General Public License.
 64 | 
 65 |   "Copyright" also means copyright-like laws that apply to other kinds of
 66 | works, such as semiconductor masks.
 67 | 
 68 |   "The Program" refers to any copyrightable work licensed under this
 69 | License.  Each licensee is addressed as "you".  "Licensees" and
 70 | "recipients" may be individuals or organizations.
 71 | 
 72 |   To "modify" a work means to copy from or adapt all or part of the work
 73 | in a fashion requiring copyright permission, other than the making of an
 74 | exact copy.  The resulting work is called a "modified version" of the
 75 | earlier work or a work "based on" the earlier work.
 76 | 
 77 |   A "covered work" means either the unmodified Program or a work based
 78 | on the Program.
 79 | 
 80 |   To "propagate" a work means to do anything with it that, without
 81 | permission, would make you directly or secondarily liable for
 82 | infringement under applicable copyright law, except executing it on a
 83 | computer or modifying a private copy.  Propagation includes copying,
 84 | distribution (with or without modification), making available to the
 85 | public, and in some countries other activities as well.
 86 | 
 87 |   To "convey" a work means any kind of propagation that enables other
 88 | parties to make or receive copies.  Mere interaction with a user through
 89 | a computer network, with no transfer of a copy, is not conveying.
 90 | 
 91 |   An interactive user interface displays "Appropriate Legal Notices"
 92 | to the extent that it includes a convenient and prominently visible
 93 | feature that (1) displays an appropriate copyright notice, and (2)
 94 | tells the user that there is no warranty for the work (except to the
 95 | extent that warranties are provided), that licensees may convey the
 96 | work under this License, and how to view a copy of this License.  If
 97 | the interface presents a list of user commands or options, such as a
 98 | menu, a prominent item in the list meets this criterion.
 99 | 
100 |   1. Source Code.
101 | 
102 |   The "source code" for a work means the preferred form of the work
103 | for making modifications to it.  "Object code" means any non-source
104 | form of a work.
105 | 
106 |   A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 | 
111 |   The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form.  A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 | 
122 |   The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities.  However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work.  For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 | 
135 |   The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 | 
139 |   The Corresponding Source for a work in source code form is that
140 | same work.
141 | 
142 |   2. Basic Permissions.
143 | 
144 |   All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met.  This License explicitly affirms your unlimited
147 | permission to run the unmodified Program.  The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work.  This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 | 
152 |   You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force.  You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright.  Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 | 
163 |   Conveying under any other circumstances is permitted solely under
164 | the conditions stated below.  Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 | 
167 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 | 
169 |   No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 | 
175 |   When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 | 
183 |   4. Conveying Verbatim Copies.
184 | 
185 |   You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 | 
193 |   You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 | 
196 |   5. Conveying Modified Source Versions.
197 | 
198 |   You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 | 
202 |     a) The work must carry prominent notices stating that you modified
203 |     it, and giving a relevant date.
204 | 
205 |     b) The work must carry prominent notices stating that it is
206 |     released under this License and any conditions added under section
207 |     7.  This requirement modifies the requirement in section 4 to
208 |     "keep intact all notices".
209 | 
210 |     c) You must license the entire work, as a whole, under this
211 |     License to anyone who comes into possession of a copy.  This
212 |     License will therefore apply, along with any applicable section 7
213 |     additional terms, to the whole of the work, and all its parts,
214 |     regardless of how they are packaged.  This License gives no
215 |     permission to license the work in any other way, but it does not
216 |     invalidate such permission if you have separately received it.
217 | 
218 |     d) If the work has interactive user interfaces, each must display
219 |     Appropriate Legal Notices; however, if the Program has interactive
220 |     interfaces that do not display Appropriate Legal Notices, your
221 |     work need not make them do so.
222 | 
223 |   A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit.  Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 | 
233 |   6. Conveying Non-Source Forms.
234 | 
235 |   You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 | 
240 |     a) Convey the object code in, or embodied in, a physical product
241 |     (including a physical distribution medium), accompanied by the
242 |     Corresponding Source fixed on a durable physical medium
243 |     customarily used for software interchange.
244 | 
245 |     b) Convey the object code in, or embodied in, a physical product
246 |     (including a physical distribution medium), accompanied by a
247 |     written offer, valid for at least three years and valid for as
248 |     long as you offer spare parts or customer support for that product
249 |     model, to give anyone who possesses the object code either (1) a
250 |     copy of the Corresponding Source for all the software in the
251 |     product that is covered by this License, on a durable physical
252 |     medium customarily used for software interchange, for a price no
253 |     more than your reasonable cost of physically performing this
254 |     conveying of source, or (2) access to copy the
255 |     Corresponding Source from a network server at no charge.
256 | 
257 |     c) Convey individual copies of the object code with a copy of the
258 |     written offer to provide the Corresponding Source.  This
259 |     alternative is allowed only occasionally and noncommercially, and
260 |     only if you received the object code with such an offer, in accord
261 |     with subsection 6b.
262 | 
263 |     d) Convey the object code by offering access from a designated
264 |     place (gratis or for a charge), and offer equivalent access to the
265 |     Corresponding Source in the same way through the same place at no
266 |     further charge.  You need not require recipients to copy the
267 |     Corresponding Source along with the object code.  If the place to
268 |     copy the object code is a network server, the Corresponding Source
269 |     may be on a different server (operated by you or a third party)
270 |     that supports equivalent copying facilities, provided you maintain
271 |     clear directions next to the object code saying where to find the
272 |     Corresponding Source.  Regardless of what server hosts the
273 |     Corresponding Source, you remain obligated to ensure that it is
274 |     available for as long as needed to satisfy these requirements.
275 | 
276 |     e) Convey the object code using peer-to-peer transmission, provided
277 |     you inform other peers where the object code and Corresponding
278 |     Source of the work are being offered to the general public at no
279 |     charge under subsection 6d.
280 | 
281 |   A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 | 
285 |   A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling.  In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage.  For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product.  A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 | 
298 |   "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source.  The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 | 
306 |   If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information.  But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 | 
317 |   The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed.  Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 | 
325 |   Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 | 
331 |   7. Additional Terms.
332 | 
333 |   "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law.  If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 | 
342 |   When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it.  (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.)  You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 | 
349 |   Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 | 
353 |     a) Disclaiming warranty or limiting liability differently from the
354 |     terms of sections 15 and 16 of this License; or
355 | 
356 |     b) Requiring preservation of specified reasonable legal notices or
357 |     author attributions in that material or in the Appropriate Legal
358 |     Notices displayed by works containing it; or
359 | 
360 |     c) Prohibiting misrepresentation of the origin of that material, or
361 |     requiring that modified versions of such material be marked in
362 |     reasonable ways as different from the original version; or
363 | 
364 |     d) Limiting the use for publicity purposes of names of licensors or
365 |     authors of the material; or
366 | 
367 |     e) Declining to grant rights under trademark law for use of some
368 |     trade names, trademarks, or service marks; or
369 | 
370 |     f) Requiring indemnification of licensors and authors of that
371 |     material by anyone who conveys the material (or modified versions of
372 |     it) with contractual assumptions of liability to the recipient, for
373 |     any liability that these contractual assumptions directly impose on
374 |     those licensors and authors.
375 | 
376 |   All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10.  If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term.  If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 | 
386 |   If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 | 
391 |   Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 | 
395 |   8. Termination.
396 | 
397 |   You may not propagate or modify a covered work except as expressly
398 | provided under this License.  Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 | 
403 |   However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 | 
410 |   Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 | 
417 |   Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License.  If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 | 
423 |   9. Acceptance Not Required for Having Copies.
424 | 
425 |   You are not required to accept this License in order to receive or
426 | run a copy of the Program.  Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance.  However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work.  These actions infringe copyright if you do
431 | not accept this License.  Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 | 
434 |   10. Automatic Licensing of Downstream Recipients.
435 | 
436 |   Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License.  You are not responsible
439 | for enforcing compliance by third parties with this License.
440 | 
441 |   An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations.  If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 | 
451 |   You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License.  For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 | 
459 |   11. Patents.
460 | 
461 |   A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based.  The
463 | work thus licensed is called the contributor's "contributor version".
464 | 
465 |   A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version.  For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 | 
475 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 | 
480 |   In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement).  To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 | 
487 |   If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients.  "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 | 
501 |   If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 | 
509 |   A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License.  You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 | 
524 |   Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 | 
528 |   12. No Surrender of Others' Freedom.
529 | 
530 |   If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License.  If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all.  For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 | 
540 |   13. Remote Network Interaction; Use with the GNU General Public License.
541 | 
542 |   Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software.  This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 | 
553 |   Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work.  The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 | 
561 |   14. Revised Versions of this License.
562 | 
563 |   The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time.  Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 | 
568 |   Each version is given a distinguishing version number.  If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation.  If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 | 
577 |   If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 | 
582 |   Later license versions may give you additional or different
583 | permissions.  However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 | 
587 |   15. Disclaimer of Warranty.
588 | 
589 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 | 
598 |   16. Limitation of Liability.
599 | 
600 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 | 
610 |   17. Interpretation of Sections 15 and 16.
611 | 
612 |   If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 | 
619 |                      END OF TERMS AND CONDITIONS
620 | 
621 |             How to Apply These Terms to Your New Programs
622 | 
623 |   If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 | 
627 |   To do so, attach the following notices to the program.  It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 | 
632 |     <one line to give the program's name and a brief idea of what it does.>
633 |     Copyright (C) <year>  <name of author>
634 | 
635 |     This program is free software: you can redistribute it and/or modify
636 |     it under the terms of the GNU Affero General Public License as published by
637 |     the Free Software Foundation, either version 3 of the License, or
638 |     (at your option) any later version.
639 | 
640 |     This program is distributed in the hope that it will be useful,
641 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
642 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
643 |     GNU Affero General Public License for more details.
644 | 
645 |     You should have received a copy of the GNU Affero General Public License
646 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
647 | 
648 | Also add information on how to contact you by electronic and paper mail.
649 | 
650 |   If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source.  For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code.  There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 | 
658 |   You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | <http://www.gnu.org/licenses/>.
662 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | COPYING


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | `graph_viewer` | GPU accelerated graph layout
 2 | =============================================
 3 | 
 4 | This repository contains experimental code for large scale graph layout using the GPU. Currently we only implement the basics of ForceAtlas2, a graph layout algorithm  designed for social network visualization in Gephi<sup>[1](#jacomy14),[2](#bastian09)</sup>. Our implementation of ForceAtlas2 is based on [the open source implementation](https://github.com/gephi/gephi/tree/6efb108718fa67d1055160f3a18b63edb4ca7be2/modules/LayoutPlugin/src/main/java/org/gephi/layout/plugin/forceAtlas2) used in Gephi itself, and considers the graph to be undirected. For force approximation, we use a CUDA implementation of the Barnes-Hut approximation algorithm<sup>[3](#barnes86)</sup> by Martin Burtscher and Keshav Pingali<sup>[4](#burtscher11)</sup>. This implementation is available as part of [LonstarGPU](http://iss.ices.utexas.edu/?p=projects/galois/lonestargpu). The average speedup, compared to a *de facto* CPU implementation of ForceAtlas2, is over 40x. This makes it feasible to compute layouts for networks with millions of nodes and edges. More details and results can be found in:
 5 | 
 6 | * G.G. Brinkmann, [K.F.D. Rietveld](https://liacs.leidenuniv.nl/~rietveldkfd) and [F.W. Takes](https://liacs.leidenuniv.nl/~takesfw), [Exploiting GPUs for fast force-directed visualization of large-scale networks](https://dx.doi.org/10.1109/ICPP.2017.47), in Proceedings of the 46th International Conference on Parallel Processing (ICPP), pp. 382-391, 2017.
 7 | 
 8 | 
 9 | #### Citing
10 | To cite this software, please use the aforementioned reference, or the `preferred-citation` section in [CITATION.cff](./CITATION.cff). The latter can be converted to the desired format using [various tools](https://github.com/citation-file-format/citation-file-format/tree/52647a247e9b1a5b04154934f39615b5ee8c4d65#tools-to-work-with-citationcff-files-wrench), or using the _Cite this repository_ button in the _About_ section of [this project's GitHub page](https://github.com/govertb/GPUGraphLayout).
11 | 
12 | 
13 | #### System Requirements
14 | 
15 | A CUDA capable GPU. Currently only Linux is supported.
16 | 
17 | #### Obtaining all code
18 | This repository contains a submodule (`lib/pngwriter`). Be sure to run
19 | ```
20 | git submodule init && git submodule update
21 | ```
22 | from the root of this Git repository before compiling. The code also depends on the `libpng` library (including its development headers). It should be possible to obtain this using the package manager for your Linux distribution.
23 | 
24 | #### Compiling
25 | A `Makefile` is located in `builds/linux`. Running
26 | ```
27 | make graph_viewer
28 | ```
29 | from this directory compiles `graph_viewer` with CUDA support.
30 | To compile without CUDA support, run `make graph_viewer CUDA_SUPPORT=0`.
31 | 
32 | #### Usage
33 | `graph_viewer gpu|cpu max_iterations num_snaps sg|wg scale gravity exact|approximate edgelist_path out_path [png|csv|bin]`
34 | 
35 | 
36 | | Argument             | Description |
37 | | -------------------- | ----------- |
38 | | `gpu\|cpu`           | Choose between a parallel GPU implementation or a serial CPU implementation. |
39 | | `max_iterations`     | How many iterations of the layout algorithm to run. |
40 | | `num_snaps`          | Choose how many times during the layout process a visualization should be rendered. |
41 | | `wg\|sg`             | Choose between weak gravity (inversely proportional to distance) or strong gravity. |
42 | | `scale`              | Scale repulsive force. |
43 | | `gravity`            | Scale gravitational force. |
44 | | `exact\|approximate` | Choose between the exact/pairwise $O(\|V\|^2)$ repulsive force calculation or the $O(\|V\| \log \|V\|)$ approximation using Barnes-Hut (GPU implementation only supports Barnes-Hut). |
45 | | `edgelist_path`      | Text file (ascii) containing node IDs for each edge on a separate line (whitespace separated). Lines starting with a `#`, the direction of edges, and self-loops are ignored. |
46 | | `out_path`           | Path to write resulting layout to. |
47 | 
48 | `[png|csv|bin]` is optional, defaulting to `png`, and determines the format of the layout written to `out_path`.
49 | 
50 | #### References
51 | <a name="jacomy14"><sup>1</sup></a> M. Jacomy, T. Venturini, S. Heymann, and M. Bastian, ["Forceatlas2, a continuous graph layout algorithm for handy network visualization designed for the Gephi software"](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0098679), PLoS ONE, vol. 9, no. 6, pp. 1–12, 2014.
52 | 
53 | <a name="bastian09"><sup>2</sup></a> M. Bastian, S. Heymann, and M. Jacomy, ["Gephi: an open source software for exploring and manipulating networks."](https://aaai.org/ocs/index.php/ICWSM/09/paper/view/154) in Proceedings of International Conference on Web and Social Media (ICWSM), 2009, pp. 361–362.
54 | 
55 | <a name="barnes86"><sup>3</sup></a>J. Barnes and P. Hut, ["A hierarchical O(N log N) force-calculation algorithm"](https://www.nature.com/nature/journal/v324/n6096/abs/324446a0.html), Nature, vol. 324, pp. 446–449, 1986.
56 | 
57 | <a name="burtscher11"><sup>4</sup></a> M. Burtscher and K. Pingali, ["An efficient CUDA implementation of the tree-based Barnes Hut n-body algorithm"](https://www.sciencedirect.com/science/article/pii/B9780123849885000061), in GPU Computing Gems Emerald Edition, W. mei W. Hwu, Ed., 2011, ch. 6, pp. 75–92.
58 | 
59 | #### License
60 | Most source files for this program are released under the GNU Affero General Public License. The license notice in each file provides more information. A copy of the GNU Affero General Public License can be found in the `LICENCE` file.
61 | 
62 | #### Disclaimer
63 | This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
64 | 


--------------------------------------------------------------------------------
/builds/linux/Makefile:
--------------------------------------------------------------------------------
 1 | CC:=gcc
 2 | 
 3 | # Directories
 4 | L_SRC_DIR         := ../../lib
 5 | SRC_DIR           := ../../src
 6 | OBJ_DIR           := obj
 7 | 
 8 | # Compile with CUDA support by default
 9 | CUDA_SUPPORT ?= 1
10 | ifeq ($(CUDA_SUPPORT), 1)
11 | 	CC:=nvcc
12 | endif
13 | 
14 | # Debug compilation, disable by default
15 | DEBUG ?= 0
16 | ifeq ($(DEBUG), 1)
17 | 	CXXFLAGS:=-g
18 | 	NVCCFLAGS:=-G
19 | 	O_LVL:=-O0
20 | else
21 | 	NVCCFLAGS:=-D NDEBUG
22 | 	O_LVL:=-O3
23 | endif
24 | 
25 | # C preprocessor flags
26 | CPPFLAGS :=-D NO_FREETYPE
27 | 
28 | # C/C++ compiler flags
29 | CFLAGS   :=$(O_LVL)
30 | CXXFLAGS :=$(CXXFLAGS) $(O_LVL) -std=c++11
31 | 
32 | # Linker flags
33 | LDFLAGS  :=-lc -lm -lstdc++ -lpng
34 | 
35 | # src./obj. files
36 | GRAPH_VIEWER_SRCS := $(wildcard $(SRC_DIR)/*.cpp)
37 | GRAPH_VIEWER_OBJS := $(GRAPH_VIEWER_SRCS:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.o)
38 | 
39 | PNGWRITER_SRCS    := $(L_SRC_DIR)/pngwriter/src/pngwriter.cc
40 | PNGWRITER_OBJS    := $(PNGWRITER_SRCS:$(L_SRC_DIR)/%.cc=$(OBJ_DIR)/%.o)
41 | 
42 | CUDA_SRCS         := $(wildcard $(SRC_DIR)/*.cu)
43 | CUDA_OBJS         := $(CUDA_SRCS:$(SRC_DIR)/%.cu=$(OBJ_DIR)/%.o)
44 | CUDA_DEPS         := $(wildcard $(SRC_DIR)/*.cuh)
45 | 
46 | CPP_SRC           := $(GRAPH_VIEWER_SRCS) $(PNGWRITER_SRCS)
47 | CUDA_SRC          := $(CUDA_SRCS)
48 | SOURCES           := $(C_SRC) $(CPP_SRC)
49 | OBJECTS           := $(GRAPH_VIEWER_OBJS) $(PNGWRITER_OBJS)
50 | 
51 | ifeq ($(CUDA_SUPPORT), 1)
52 | OBJECTS           := $(OBJECTS) $(CUDA_OBJS)
53 | SOURCES           := $(SOURCES) $(CUDA_SRCS)
54 | endif
55 | 
56 | # Generate dependency (.h, .hpp)
57 | # ala http://stackoverflow.com/questions/2394609/makefile-header-dependencies
58 | depend: .depend
59 | .depend: $(SRCS)
60 | 	rm -f .depend
61 | 	$(CC) $(CXXFLAGS) $(CPPFLAGS) -M $(CPP_SRC) >> .depend
62 | 
63 | include ./.depend
64 | 
65 | graph_viewer: $(OBJECTS)
66 | 	$(CC) $(OBJECTS) $(LDFLAGS) -o graph_viewer
67 | 
68 | $(GRAPH_VIEWER_OBJS): $(GRAPH_VIEWER_SRCS)
69 | 	mkdir -p $(@D)
70 | 	$(CC) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.cpp)
71 | 
72 | $(CUDA_OBJS): $(CUDA_SRCS) $(CUDA_DEPS)
73 | 	mkdir -p $(@D)
74 | 	nvcc --device-c $(CXXFLAGS) $(NVCCFLAGS) $(CPPFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(SRC_DIR)/%.cu)
75 | 
76 | $(PNGWRITER_OBJS): $(PNGWRITER_SRCS)
77 | 	mkdir -p $(@D)
78 | 	$(CC) -c $(CPPFLAGS) $(CXXFLAGS) -o $@ $(@:$(OBJ_DIR)/%.o=$(L_SRC_DIR)/%.cc)
79 | 
80 | clear: clean
81 | 
82 | clean:
83 | 	rm -r graph_viewer $(OBJ_DIR)/* ./.depend
84 | 
85 | .PHONY: all clear clean depend
86 | 


--------------------------------------------------------------------------------
/src/RPBHFA2LaunchParameters.cuh:
--------------------------------------------------------------------------------
 1 | /// Per kernel launch configuration parameters
 2 | // 1: BoundingBoxKernel, SpeedKernel (reductions of size |V|)
 3 | // 2: TreeBuild
 4 | // 3: Summarization
 5 | // 4: SortKernel
 6 | // 5: ForceKernel
 7 | // 6: DisplacementKernel, GravityKernel, AttractiveForce (all 'streaming' kernels)
 8 | // InitializationKernel, ClearKernel1, ClearKernel2 don't use macros for launch configuration.
 9 | 
10 | #if __CUDA_ARCH__ >= 500 // Maxwell (5.x) or Pascal (6.x)
11 | 
12 | #define THREADS1 512  /* must be a power of 2 */
13 | #define THREADS2 512
14 | #define THREADS3 128
15 | #define THREADS4 64
16 | #define THREADS5 256
17 | #define THREADS6 1024
18 | 
19 | #define FACTOR1 3
20 | #define FACTOR2 3
21 | #define FACTOR3 6  /* must all be resident at the same time */
22 | #define FACTOR4 6  /* must all be resident at the same time */
23 | #define FACTOR5 5
24 | #define FACTOR6 1
25 | 
26 | 
27 | #elif __CUDA_ARCH__ >= 300 // Kepler (3.x)
28 | 
29 | #define THREADS1 512  /* must be a power of 2 */
30 | #define THREADS2 512
31 | #define THREADS3 128
32 | #define THREADS4 64
33 | #define THREADS5 256
34 | #define THREADS6 1024
35 | 
36 | #define FACTOR1 3
37 | #define FACTOR2 3
38 | #define FACTOR3 6  /* must all be resident at the same time */
39 | #define FACTOR4 6  /* must all be resident at the same time */
40 | #define FACTOR5 5
41 | #define FACTOR6 1
42 | 
43 | #elif __CUDA_ARCH__ < 300 // Fermi (2.x) or Tesla (1.x)
44 | 
45 | #define THREADS1 512  /* must be a power of 2 */
46 | #define THREADS2 512
47 | #define THREADS3 128
48 | #define THREADS4 64
49 | #define THREADS5 256
50 | #define THREADS6 1024
51 | 
52 | #define FACTOR1 3
53 | #define FACTOR2 3
54 | #define FACTOR3 6  /* must all be resident at the same time */
55 | #define FACTOR4 6  /* must all be resident at the same time */
56 | #define FACTOR5 5
57 | #define FACTOR6 1
58 | 
59 | #endif
60 | 
61 | #define WARPSIZE 32
62 | #define MAXDEPTH 32
63 | 


--------------------------------------------------------------------------------
/src/RPBHKernels.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  The following code is a modified version of the CUDA BarnesHut v3.1 code
  3 |  by Martin Burtscher. Modifications were made to transform the code from a
  4 |  three-dimensional Barnes-Hut implementation to a two-dimensional implementation.
  5 |  Since our application (graph layout) only needs two dimensions.
  6 | 
  7 |  What follows is the copyright notice associated with that
  8 |  original code, as it is provided by the copyright holder:
  9 |  Texas State University-San Macros.
 10 | */
 11 | 
 12 | 
 13 | /*
 14 |  CUDA BarnesHut v3.1: Simulation of the gravitational forces
 15 |  in a galactic cluster using the Barnes-Hut n-body algorithm
 16 | 
 17 |  Copyright (c) 2013, Texas State University-San Marcos. All rights reserved.
 18 | 
 19 |  Redistribution and use in source and binary forms, with or without modification,
 20 |  are permitted for academic, research, experimental, or personal use provided that
 21 |  the following conditions are met:
 22 | 
 23 |  * Redistributions of source code must retain the above copyright notice,
 24 |  this list of conditions and the following disclaimer.
 25 |  * Redistributions in binary form must reproduce the above copyright notice,
 26 |  this list of conditions and the following disclaimer in the documentation
 27 |  and/or other materials provided with the distribution.
 28 |  * Neither the name of Texas State University-San Marcos nor the names of its
 29 |  contributors may be used to endorse or promote products derived from this
 30 |  software without specific prior written permission.
 31 | 
 32 |  For all other uses, please contact the Office for Commercialization and Industry
 33 |  Relations at Texas State University-San Marcos <http://www.txstate.edu/ocir/>.
 34 | 
 35 |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 36 |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 37 |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED
 38 |  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 39 |  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 40 |  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 41 |  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 42 |  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 43 |  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 44 |  OF THE POSSIBILITY OF SUCH DAMAGE.
 45 | 
 46 |  Author: Martin Burtscher <burtscher@txstate.edu>
 47 |  */
 48 | 
 49 | #include <stdio.h>
 50 | #include <assert.h>
 51 | #include "RPBHKernels.cuh"
 52 | 
 53 | // Variables marked extern in header.
 54 | __device__ float minxdg, minydg, maxxdg, maxydg;
 55 | 
 56 | 
 57 | // Variables for use in this file only.
 58 | static __device__ volatile int stepd = -1;
 59 | static __device__ volatile int maxdepthd = 1;
 60 | static __device__ volatile int bottomd; // initialized by BoundingBoxKernel
 61 | static __device__ unsigned int blkcntd = 0;
 62 | static __device__ volatile float radiusd;
 63 | 
 64 | 
 65 | /*** The Kernel Definitions ***/
 66 | /******************************************************************************/
 67 | /*** compute center and radius ************************************************/
 68 | /******************************************************************************/
 69 | 
 70 | __global__
 71 | __launch_bounds__(THREADS1, FACTOR1)
 72 | void BoundingBoxKernel(int nnodesd, int nbodiesd, volatile int * __restrict startd,
 73 |                        volatile int   * __restrict childd, volatile float * __restrict node_massd,
 74 |                        volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd,
 75 |                        volatile float * __restrict maxxd,  volatile float * __restrict maxyd,
 76 |                        volatile float * __restrict minxd,  volatile float * __restrict minyd)
 77 | {
 78 |     register int i, j, k, inc;
 79 |     register float val, minx, maxx, miny, maxy;
 80 |     __shared__ volatile float sminx[THREADS1], smaxx[THREADS1], sminy[THREADS1], smaxy[THREADS1];
 81 | 
 82 |     // initialize with valid data (in case #bodies < #threads)
 83 |     minx = maxx = body_posd[0].x;
 84 |     miny = maxy = body_posd[0].y;
 85 | 
 86 |     // scan all bodies
 87 |     i = threadIdx.x;
 88 |     inc = THREADS1 * gridDim.x;
 89 |     for (j = i + blockIdx.x * THREADS1; j < nbodiesd; j += inc)
 90 |     {
 91 |         val = body_posd[j].x;
 92 |         minx = fminf(minx, val);
 93 |         maxx = fmaxf(maxx, val);
 94 |         val = body_posd[j].y;
 95 |         miny = fminf(miny, val);
 96 |         maxy = fmaxf(maxy, val);
 97 |     }
 98 | 
 99 |     // reduction in shared memory
100 |     sminx[i] = minx;
101 |     smaxx[i] = maxx;
102 |     sminy[i] = miny;
103 |     smaxy[i] = maxy;
104 | 
105 |     for (j = THREADS1 / 2; j > 0; j /= 2)
106 |     {
107 |         __syncthreads();
108 |         if (i < j)
109 |         {
110 |             k = i + j;
111 |             sminx[i] = minx = fminf(minx, sminx[k]);
112 |             smaxx[i] = maxx = fmaxf(maxx, smaxx[k]);
113 |             sminy[i] = miny = fminf(miny, sminy[k]);
114 |             smaxy[i] = maxy = fmaxf(maxy, smaxy[k]);
115 |         }
116 |     }
117 | 
118 |     // write block result to global memory
119 |     if (i == 0)
120 |     {
121 |         k = blockIdx.x;
122 |         minxd[k] = minx;
123 |         maxxd[k] = maxx;
124 |         minyd[k] = miny;
125 |         maxyd[k] = maxy;
126 |         __threadfence();
127 | 
128 |         inc = gridDim.x - 1;
129 |         if (inc == atomicInc(&blkcntd, inc))
130 |         {
131 |             // I'm the last block, so combine all block results
132 |             for (j = 0; j <= inc; j++)
133 |             {
134 |                 minx = fminf(minx, minxd[j]);
135 |                 maxx = fmaxf(maxx, maxxd[j]);
136 |                 miny = fminf(miny, minyd[j]);
137 |                 maxy = fmaxf(maxy, maxyd[j]);
138 |             }
139 |             // compute 'radius'
140 |             radiusd = fmaxf(maxx - minx, maxy - miny) * 0.5f;
141 | 
142 |             // insert the root node into the BH tree.
143 |             k = nnodesd;
144 |             bottomd = k;
145 | 
146 |             node_massd[k] = -1.0f;
147 |             node_posd[k].x = (minx + maxx) * 0.5f;
148 |             node_posd[k].y = (miny + maxy) * 0.5f;
149 |             startd[k] = 0;
150 | 
151 |             k *= 4; // skip over the children of all nodes
152 |             for (i = 0; i < 4; i++) childd[k + i] = -1;
153 | 
154 |             stepd++;
155 |         }
156 |     }
157 | }
158 | 
159 | /******************************************************************************/
160 | /*** build tree ***************************************************************/
161 | /******************************************************************************/
162 | 
163 | // Sets all child pointers of internal nodes in BH tree to null (-1) in childd
164 | __global__
165 | __launch_bounds__(1024, 1)void ClearKernel1(int nnodesd, int nbodiesd, volatile int * __restrict childd)
166 | {
167 |     register int k, inc, top, bottom;
168 | 
169 |     top = 4 * nnodesd; // children of root node initialized before.
170 |     bottom = 4 * nbodiesd;
171 |     inc = blockDim.x * gridDim.x;
172 |     k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x;
173 |     if (k < bottom) k += inc;
174 | 
175 |     // iterate over all cells assigned to thread
176 |     while (k < top)
177 |     {
178 |         childd[k] = -1;
179 |         k += inc;
180 |     }
181 | }
182 | 
183 | 
184 | __global__
185 | __launch_bounds__(THREADS2, FACTOR2)
186 | void TreeBuildingKernel(int nnodesd, int nbodiesd, volatile int * __restrict childd,
187 |                         volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd)
188 | {
189 |     register int i, j, depth, localmaxdepth, skip, inc;
190 |     register float x, y, r;
191 |     register float px, py;
192 |     register float dx, dy;
193 |     register int ch, n, cell, locked, patch;
194 |     register float rootr, rootx, rooty;
195 | 
196 |     // cache root data
197 |     rootx = node_posd[nnodesd].x;
198 |     rooty = node_posd[nnodesd].y;
199 |     rootr = radiusd;
200 | 
201 |     localmaxdepth = 1;
202 |     skip = 1;
203 |     inc = blockDim.x * gridDim.x;
204 |     i = threadIdx.x + blockIdx.x * blockDim.x;
205 | 
206 |     // iterate over all bodies assigned to thread
207 |     while (i < nbodiesd)
208 |     {
209 |         if (skip != 0)
210 |         {
211 |             // new body, so start traversing at root
212 |             skip = 0;
213 |             px = body_posd[i].x;
214 |             py = body_posd[i].y;
215 |             n = nnodesd;
216 |             depth = 1;
217 |             r = rootr * 0.5f;
218 |             dx = dy = -r;
219 |             j = 0;
220 |             // determine which child to follow,
221 |             if (rootx < px) {j  = 1; dx = r;}
222 |             if (rooty < py) {j |= 2; dy = r;}
223 |             x = rootx + dx;
224 |             y = rooty + dy;
225 |         }
226 | 
227 |         // follow path to leaf cell
228 |         ch = childd[n*4+j];
229 | 
230 |         while (ch >= nbodiesd)
231 |         {
232 |             n = ch;
233 |             depth++;
234 |             r *= 0.5f;
235 |             dx = dy = -r;
236 |             j = 0;
237 |             // determine which child to follow
238 |             if (x < px) {j  = 1; dx = r;}
239 |             if (y < py) {j |= 2; dy = r;}
240 |             x += dx;
241 |             y += dy;
242 |             ch = childd[n*4+j];
243 |         }
244 | 
245 |         // here ch is either leaf (< nbodiesd), null (-1), locked (-2)
246 | 
247 |         if (ch != -2)
248 |         {
249 |         // here we insert body into either empty cell, or split leafcell.
250 |             // skip if child pointer is locked and try again later
251 |             locked = n*4+j;
252 |             if (ch == -1)
253 |             {
254 |                 if (-1 == atomicCAS((int *)&childd[locked], -1, i))
255 |                 {  // if null, just insert the new body
256 |                     localmaxdepth = max(depth, localmaxdepth);
257 |                     i += inc;  // move on to next body
258 |                     skip = 1;
259 |                 }
260 |                 // else: failed to claim cell, re-traverse next iteration.
261 |             }
262 |             else
263 |             {  // there already is a body in this position
264 |                 if (ch == atomicCAS((int *)&childd[locked], ch, -2))
265 |                 {
266 |                     // lock is now aquired on childd[locked].
267 |                     // ch is old BH node id living at childd[locked]
268 | 
269 |                     // if bodies have same position, offset the body to insert
270 |                     // and redo traversal
271 |                     if (body_posd[ch].x == px && body_posd[ch].y == py)
272 |                     {
273 |                         body_posd[i].x *= .99;
274 |                         body_posd[i].y *= .99;
275 |                         skip = 0; // start all over
276 |                         childd[locked] = ch; // release lock
277 |                         break;
278 |                     }
279 | 
280 |                     patch = -1;
281 |                     // create new cell(s) and insert the new and old body
282 |                     do
283 |                     {
284 |                         // 1.) Create new cell
285 |                         cell = atomicSub((int *)&bottomd, 1) - 1;
286 |                         assert(cell > nbodiesd);
287 | 
288 |                         if (patch != -1) childd[n*4+j] = cell;
289 |                         patch = max(patch, cell);
290 | 
291 |                         // 2.) Make newly created cell current
292 |                         depth++;
293 |                         n = cell;
294 |                         r *= 0.5f;
295 | 
296 |                         // 3.) Insert old body into correct quadrant
297 |                         j = 0;
298 |                         if (x < body_posd[ch].x) j  = 1;
299 |                         if (y < body_posd[ch].y) j |= 2;
300 |                         childd[cell*4+j] = ch;
301 | 
302 |                         // 4.) Determine center + quadrant for cell of new body
303 |                         j = 0;
304 |                         dx = dy = -r;
305 |                         if (x < px) {j  = 1; dx = r;}
306 |                         if (y < py) {j |= 2; dy = r;}
307 |                         x += dx;
308 |                         y += dy;
309 | 
310 |                         // 5.) Visit this cell/check if in use (possibly by old body)
311 |                         ch = childd[n*4+j];
312 |                         // repeat until the two bodies are different children
313 |                     } while (ch >= 0);
314 |                     childd[n*4+j] = i; // insert new body
315 | 
316 |                     localmaxdepth = max(depth, localmaxdepth);
317 |                     i += inc;  // move on to next body
318 |                     skip = 2;
319 |                 }
320 |                 // else: failed to aquire lock, re-traverse next iteration.
321 |             }
322 |         }
323 |         __syncthreads();  // __threadfence();
324 | 
325 |         if (skip == 2) childd[locked] = patch; // unlock
326 |     }
327 |     // record maximum tree depth
328 |     atomicMax((int *)&maxdepthd, localmaxdepth);
329 | }
330 | 
331 | // Sets mass of cells to -1.0, and all startd entries to null (-1).
332 | __global__
333 | __launch_bounds__(1024, 1)
334 | void ClearKernel2(int nnodesd, volatile int * __restrict startd, volatile float * __restrict node_massd)
335 | {
336 |     register int k, inc, bottom;
337 | 
338 |     bottom = bottomd;
339 |     inc = blockDim.x * gridDim.x;
340 |     k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x;
341 |     if (k < bottom) k += inc;
342 | 
343 |     // iterate over all cells assigned to thread, skip root cell.
344 |     while (k < nnodesd)
345 |     {
346 |         node_massd[k] = -1.0f;
347 |         startd[k] = -1;
348 |         k += inc;
349 |     }
350 | }
351 | 
352 | 
353 | /******************************************************************************/
354 | /*** compute center of mass ***************************************************/
355 | /******************************************************************************/
356 | 
357 | __global__
358 | __launch_bounds__(THREADS3, FACTOR3)
359 | void SummarizationKernel(const int nnodesd, const int nbodiesd, volatile int * __restrict countd, const int * __restrict childd,
360 |                          volatile float * __restrict body_massd, volatile float * __restrict node_massd, volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd)
361 | {
362 |     register int i, j, k, ch, inc, cnt, bottom, flag;
363 |     register float m, cm, px, py;
364 |     __shared__ int  child[THREADS3 * 4];
365 |     __shared__ float mass[THREADS3 * 4];
366 | 
367 |     bottom = bottomd;
368 |     inc = blockDim.x * gridDim.x;
369 |     k = (bottom & (-WARPSIZE)) + threadIdx.x + blockIdx.x * blockDim.x;
370 |     if (k < bottom) k += inc;
371 | 
372 |     register int restart = k;
373 |     for (j = 0; j < 5; j++)
374 |     {  // wait-free pre-passes
375 |         // iterate over all cells assigned to thread
376 |         while (k <= nnodesd)
377 |         {
378 |             if (node_massd[k] < 0.0f)
379 |             {
380 |                 for (i = 0; i < 4; i++)
381 |                 {
382 |                     ch = childd[k*4+i];
383 |                     child[i*THREADS3+threadIdx.x] = ch;  // cache children
384 |                     if ((ch >= nbodiesd) && ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) < 0.0f)) break;
385 |                 }
386 |                 if (i == 4)
387 |                 {
388 |                     // all children are ready
389 |                     cm = 0.0f;
390 |                     px = 0.0f;
391 |                     py = 0.0f;
392 |                     cnt = 0;
393 |                     for (i = 0; i < 4; i++)
394 |                     {
395 |                         ch = child[i*THREADS3+threadIdx.x];
396 |                         if (ch >= 0)
397 |                         {
398 |                             if (ch >= nbodiesd)
399 |                             {  // count bodies (needed later)
400 |                                 m = mass[i*THREADS3+threadIdx.x];
401 |                                 cnt += countd[ch];
402 |                                 px += node_posd[ch].x * m;
403 |                                 py += node_posd[ch].y * m;
404 |                             }
405 |                             else
406 |                             {
407 |                                 m = body_massd[ch];
408 |                                 cnt++;
409 |                                 px += body_posd[ch].x * m;
410 |                                 py += body_posd[ch].y * m;
411 |                             }
412 |                             // add child's contribution
413 |                             cm += m;
414 |                         }
415 |                     }
416 |                     countd[k] = cnt;
417 |                     m = 1.0f / cm;
418 |                     node_posd[k].x = px * m;
419 |                     node_posd[k].y = py * m;
420 |                     __threadfence();  // make sure data are visible before setting mass
421 |                     node_massd[k] = cm;
422 |                 }
423 |             }
424 |             k += inc;  // move on to next cell
425 |         }
426 |         k = restart;
427 |     }
428 | 
429 |     flag = 0;
430 |     j = 0;
431 |     // iterate over all cells assigned to thread
432 |     while (k <= nnodesd)
433 |     {
434 |         if (k < nbodiesd and body_massd[k] >= 0.0f)
435 |             k += inc;
436 |         else if(k >= nbodiesd and node_massd[k] >= 0.0f)
437 |             k += inc;
438 | 
439 |         else
440 |         {
441 |             if (j == 0)
442 |             {
443 |                 j = 4;
444 |                 for (i = 0; i < 4; i++)
445 |                 {
446 |                     ch = childd[k*4+i];
447 |                     child[i*THREADS3+threadIdx.x] = ch;  // cache children
448 |                     if ((ch < nbodiesd) || ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) >= 0.0f)) j--;
449 |                 }
450 |             }
451 |             else
452 |             {
453 |                 j = 4;
454 |                 for (i = 0; i < 4; i++)
455 |                 {
456 |                     ch = child[i*THREADS3+threadIdx.x];
457 |                     if ((ch < nbodiesd) || (mass[i*THREADS3+threadIdx.x] >= 0.0f) || ((mass[i*THREADS3+threadIdx.x] = node_massd[ch]) >= 0.0f)) j--;
458 |                 }
459 |             }
460 | 
461 |             if (j == 0)
462 |             {
463 |                 // all children are ready
464 |                 cm = 0.0f;
465 |                 px = 0.0f;
466 |                 py = 0.0f;
467 |                 cnt = 0;
468 |                 for (i = 0; i < 4; i++)
469 |                 {
470 |                     ch = child[i*THREADS3+threadIdx.x];
471 |                     if (ch >= 0)
472 |                     {
473 |                         if (ch >= nbodiesd)
474 |                         {  // count bodies (needed later)
475 |                             m = mass[i*THREADS3+threadIdx.x];
476 |                             cnt += countd[ch];
477 |                             px += node_posd[ch].x * m;
478 |                             py += node_posd[ch].y * m;
479 |                         }
480 |                         else
481 |                         {
482 |                             m = body_massd[ch];
483 |                             cnt++;
484 |                             px += body_posd[ch].x * m;
485 |                             py += body_posd[ch].y * m;
486 |                         }
487 |                         // add child's contribution
488 |                         cm += m;
489 |                     }
490 |                 }
491 |                 countd[k] = cnt;
492 |                 m = 1.0f / cm;
493 |                 node_posd[k].x = px * m;
494 |                 node_posd[k].y = py * m;
495 |                 flag = 1;
496 |             }
497 |         }
498 |         __syncthreads();  // __threadfence();
499 |         if (flag != 0)
500 |         {
501 |             k < nbodiesd ? body_massd[k] = cm : node_massd[k] = cm;
502 |             k += inc;
503 |             flag = 0;
504 |         }
505 |     }
506 | }
507 | 
508 | 
509 | /******************************************************************************/
510 | /*** sort bodies **************************************************************/
511 | /******************************************************************************/
512 | 
513 | __global__
514 | __launch_bounds__(THREADS4, FACTOR4)
515 | void SortKernel(int nnodesd, int nbodiesd, int * __restrict sortd, int * __restrict countd, volatile int * __restrict startd, int * __restrict childd)
516 | {
517 |     register int i, j, k, ch, dec, start, bottom;
518 | 
519 |     bottom = bottomd;
520 |     dec = blockDim.x * gridDim.x;
521 |     k = nnodesd + 1 - dec + threadIdx.x + blockIdx.x * blockDim.x;
522 | 
523 |     // iterate over all cells assigned to thread
524 |     while (k >= bottom)
525 |     {
526 |         start = startd[k];
527 |         if (start >= 0)
528 |         {
529 |             j = 0;
530 |             for (i = 0; i < 4; i++)
531 |             {
532 |                 ch = childd[k*4+i];
533 |                 if (ch >= 0)
534 |                 {
535 |                     if (i != j)
536 |                     {
537 |                         // move children to front (needed later for speed)
538 |                         childd[k*4+i] = -1;
539 |                         childd[k*4+j] = ch;
540 |                     }
541 |                     j++;
542 |                     if (ch >= nbodiesd)
543 |                     {
544 |                         // child is a cell
545 |                         startd[ch] = start;  // set start ID of child
546 |                         start += countd[ch];  // add #bodies in subtree
547 |                     }
548 |                     else
549 |                     {
550 |                         // child is a body
551 |                         sortd[start] = ch;  // record body in 'sorted' array
552 |                         start++;
553 |                     }
554 |                 }
555 |             }
556 |             k -= dec;  // move on to next cell
557 |         }
558 |     }
559 | }
560 | 
561 | 
562 | /******************************************************************************/
563 | /*** compute force ************************************************************/
564 | /******************************************************************************/
565 | 
566 | __global__
567 | __launch_bounds__(THREADS5, FACTOR5)
568 | void ForceCalculationKernel(int nnodesd, int nbodiesd, float itolsqd, float epssqd,
569 |                             volatile int * __restrict sortd, volatile int * __restrict childd,
570 |                             volatile float * __restrict body_massd, volatile float * __restrict node_massd,
571 |                             volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd,
572 |                             volatile float * __restrict fxd, volatile float * __restrict fyd, const float k_rd)
573 | {
574 |     register int i, j, k, n, depth, base, sbase, diff, pd, nd;
575 |     register float px, py, ax, ay, dx, dy, tmp;
576 |     __shared__ volatile int pos[MAXDEPTH * THREADS5/WARPSIZE], node[MAXDEPTH * THREADS5/WARPSIZE];
577 |     __shared__ float dq[MAXDEPTH * THREADS5/WARPSIZE];
578 | 
579 |     if (0 == threadIdx.x)
580 |     {
581 |         tmp = radiusd * 2;
582 |         // precompute values that depend only on tree level
583 |         dq[0] = tmp * tmp * itolsqd;
584 |         for (i = 1; i < maxdepthd; i++)
585 |         {
586 |             dq[i] = dq[i - 1] * 0.25f;
587 |             dq[i - 1] += epssqd;
588 |         }
589 |         dq[i - 1] += epssqd;
590 | 
591 |         assert(maxdepthd <= MAXDEPTH);
592 |     }
593 |     __syncthreads();
594 | 
595 |     if (maxdepthd <= MAXDEPTH)
596 |     {
597 |         // figure out first thread in each warp (lane 0)
598 |         base = threadIdx.x / WARPSIZE;
599 |         sbase = base * WARPSIZE;
600 |         j = base * MAXDEPTH;
601 | 
602 |         diff = threadIdx.x - sbase;
603 |         // make multiple copies to avoid index calculations later
604 |         if (diff < MAXDEPTH) dq[diff+j] = dq[diff];
605 | 
606 |         __syncthreads();
607 |         __threadfence_block();
608 | 
609 |         // iterate over all bodies assigned to thread
610 |         for (k = threadIdx.x + blockIdx.x * blockDim.x; k < nbodiesd; k += blockDim.x * gridDim.x)
611 |         {
612 |             i = sortd[k];  // get permuted/sorted
613 |             // cache position info
614 |             px = body_posd[i].x;
615 |             py = body_posd[i].y;
616 | 
617 |             ax = 0.0f;
618 |             ay = 0.0f;
619 | 
620 |             // initialize iteration stack, i.e., push root node onto stack
621 |             depth = j;
622 |             if (sbase == threadIdx.x)
623 |             {
624 |                 pos[j] = 0;
625 |                 node[j] = nnodesd * 4;
626 |             }
627 | 
628 |             do
629 |             {
630 |                 // stack is not empty
631 |                 pd = pos[depth];
632 |                 nd = node[depth];
633 |                 while (pd < 4)
634 |                 {
635 |                     // node on top of stack has more children to process
636 |                     n = childd[nd + pd];  // load child pointer
637 |                     pd++;
638 | 
639 |                     if (n >= 0)
640 |                     {
641 |                         if(n < nbodiesd)
642 |                         {
643 |                             dx = px - body_posd[n].x;
644 |                             dy = py - body_posd[n].y;
645 |                         }
646 |                         else
647 |                         {
648 |                             dx = px - node_posd[n].x;
649 |                             dy = py - node_posd[n].y;
650 |                         }
651 |                         tmp = dx*dx + dy*dy + epssqd;  // compute distance squared (plus softening)
652 | 
653 |                         // check body-body interaction
654 |                         if (n < nbodiesd)
655 |                         {
656 |                             ax += k_rd * dx * body_massd[i] * body_massd[n] / tmp;
657 |                             ay += k_rd * dy * body_massd[i] * body_massd[n] / tmp;
658 |                         }
659 | 
660 |                         // or, if n is cell, ensure all threads agree that cell is far enough away
661 |                         else if(__all_sync(__activemask(), tmp >= dq[depth]))
662 |                         {
663 |                             ax += k_rd * dx * body_massd[i] * node_massd[n] / tmp;
664 |                             ay += k_rd * dy * body_massd[i] * node_massd[n] / tmp;
665 |                         }
666 |                         else
667 |                         {
668 |                             // push cell onto stack
669 |                             if (sbase == threadIdx.x)
670 |                             {  // maybe don't push and inc if last child
671 |                                 pos[depth] = pd;
672 |                                 node[depth] = nd;
673 |                             }
674 |                             depth++;
675 |                             pd = 0;
676 |                             nd = n * 4;
677 |                         }
678 |                     }
679 |                     else
680 |                     {
681 |                         pd = 4;  // early out because all remaining children are also zero
682 |                     }
683 |                 }
684 |                 depth--;  // done with this level
685 |             } while (depth >= j);
686 | 
687 | 
688 |             // save computed acceleration
689 |             fxd[i] += ax;
690 |             fyd[i] += ay;
691 |         }
692 |     }
693 | }
694 | 


--------------------------------------------------------------------------------
/src/RPBHKernels.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  The following code is a modified version of the CUDA BarnesHut v3.1 code
 3 |  by Martin Burtscher. Modifications were made to transform the code from a
 4 |  three-dimensional Barnes-Hut implementation to a two-dimensional implementation.
 5 |  Since our application (graph layout) only needs two dimensions.
 6 | 
 7 |  What follows is the copyright notice associated with that
 8 |  original code, as it is provided by the copyright holder:
 9 |  Texas State University-San Macros.
10 | */
11 | 
12 | 
13 | /*
14 |  CUDA BarnesHut v3.1: Simulation of the gravitational forces
15 |  in a galactic cluster using the Barnes-Hut n-body algorithm
16 | 
17 |  Copyright (c) 2013, Texas State University-San Marcos. All rights reserved.
18 | 
19 |  Redistribution and use in source and binary forms, with or without modification,
20 |  are permitted for academic, research, experimental, or personal use provided that
21 |  the following conditions are met:
22 | 
23 |  * Redistributions of source code must retain the above copyright notice,
24 |  this list of conditions and the following disclaimer.
25 |  * Redistributions in binary form must reproduce the above copyright notice,
26 |  this list of conditions and the following disclaimer in the documentation
27 |  and/or other materials provided with the distribution.
28 |  * Neither the name of Texas State University-San Marcos nor the names of its
29 |  contributors may be used to endorse or promote products derived from this
30 |  software without specific prior written permission.
31 | 
32 |  For all other uses, please contact the Office for Commercialization and Industry
33 |  Relations at Texas State University-San Marcos <http://www.txstate.edu/ocir/>.
34 | 
35 |  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
36 |  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
37 |  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED
38 |  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
39 |  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
40 |  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 |  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 |  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
43 |  OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
44 |  OF THE POSSIBILITY OF SUCH DAMAGE.
45 | 
46 |  Author: Martin Burtscher <burtscher@txstate.edu>
47 |  */
48 | 
49 | #ifndef RPBHKernels_cuh
50 | #define RPBHKernels_cuh
51 | 
52 | #include "RPBHFA2LaunchParameters.cuh"
53 | 
54 | extern __device__ volatile int errd;
55 | extern __device__ float minxdg, minydg, maxxdg, maxydg;
56 | 
57 | __global__
58 | __launch_bounds__(THREADS1, FACTOR1)
59 | void BoundingBoxKernel(int nnodesd, int nbodiesd, volatile int * __restrict startd,
60 |                        volatile int   * __restrict childd, volatile float * __restrict node_massd,
61 |                        volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd,
62 |                        volatile float * __restrict maxxd,  volatile float * __restrict maxyd,
63 |                        volatile float * __restrict minxd,  volatile float * __restrict minyd);
64 | 
65 | __global__
66 | __launch_bounds__(1024, 1)
67 | void ClearKernel1(int nnodesd, int nbodiesd, volatile int * __restrict childd);
68 | 
69 | __global__
70 | __launch_bounds__(THREADS2, FACTOR2)
71 | void TreeBuildingKernel(int nnodesd, int nbodiesd, volatile int * __restrict childd,
72 |                         volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd);
73 | 
74 | __global__
75 | __launch_bounds__(1024, 1)
76 | void ClearKernel2(int nnodesd, volatile int * __restrict startd, volatile float * __restrict node_massd);
77 | 
78 | __global__
79 | __launch_bounds__(THREADS3, FACTOR3)
80 | void SummarizationKernel(const int nnodesd, const int nbodiesd, volatile int * __restrict countd, const int * __restrict childd,
81 |                          volatile float * __restrict body_massd, volatile float * __restrict node_massd, volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd);
82 | 
83 | __global__
84 | __launch_bounds__(THREADS4, FACTOR4)
85 | void SortKernel(int nnodesd, int nbodiesd, int * __restrict sortd, int * __restrict countd, volatile int * __restrict startd, int * __restrict childd);
86 | 
87 | __global__
88 | __launch_bounds__(THREADS5, FACTOR5)
89 | void ForceCalculationKernel(int nnodesd, int nbodiesd, float itolsqd, float epssqd,
90 |                             volatile int * __restrict sortd, volatile int * __restrict childd,
91 |                             volatile float * __restrict body_massd, volatile float * __restrict node_massd,
92 |                             volatile float2 * __restrict body_posd, volatile float2 * __restrict node_posd,
93 |                             volatile float * __restrict fxd, volatile float * __restrict fyd, const float k_rd);
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/src/RPBarnesHutApproximator.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPBarnesHutApproximator.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | #include "RPBarnesHutApproximator.hpp"
 25 | #include <math.h>
 26 | #include <stdlib.h>
 27 | #include <queue>
 28 | 
 29 | namespace RPGraph
 30 | {
 31 |     BarnesHutCell::BarnesHutCell(Coordinate position, float length, Coordinate particle_position, float particle_mass)
 32 |     : cell_center{position}, length{length}, mass_center(particle_position), total_mass{particle_mass}
 33 |     {
 34 |         lb = position.x - length/2.0;
 35 |         rb = position.x + length/2.0;
 36 |         bb = position.y - length/2.0;
 37 |         ub = position.y + length/2.0;
 38 |     }
 39 | 
 40 |     BarnesHutCell::~BarnesHutCell()
 41 |     {
 42 |         for (nid_t n = 0; n < 4; ++n) delete sub_cells[n];
 43 |     }
 44 | 
 45 |     void BarnesHutCell::add_leafcell(int quadrant, float mass, Coordinate pos)
 46 |     {
 47 |         Coordinate leafcell_center_coordinate = Coordinate(0,0);
 48 |         if (quadrant == 0)
 49 |             leafcell_center_coordinate = Coordinate(this->cell_center.x-length/4.0,this->cell_center.y+length/4);
 50 |         else if (quadrant == 1)
 51 |             leafcell_center_coordinate = Coordinate(this->cell_center.x+length/4.0,this->cell_center.y+length/4);
 52 |         else if (quadrant == 2)
 53 |             leafcell_center_coordinate = Coordinate(this->cell_center.x+length/4.0,this->cell_center.y-length/4);
 54 |         else if (quadrant == 3)
 55 |             leafcell_center_coordinate = Coordinate(this->cell_center.x-length/4.0,this->cell_center.y-length/4);
 56 | 
 57 |         sub_cells[quadrant] = new BarnesHutCell(leafcell_center_coordinate, this->length/2.0, pos, mass);
 58 |         num_subparticles += 1;
 59 | 
 60 |     }
 61 | 
 62 |     BarnesHutApproximator::BarnesHutApproximator(Coordinate root_center, float root_length, float theta)
 63 |     : root_center{root_center}, root_length{root_length}, theta{theta}
 64 |     {
 65 |         this->reset(root_center, root_length);
 66 |     }
 67 | 
 68 |     void BarnesHutApproximator::reset(Coordinate root_center, float root_length)
 69 |     {
 70 |         delete root_cell; // this recursively deletes the entire tree
 71 |         root_cell = nullptr;
 72 | 
 73 |         this->root_center = root_center;
 74 |         this->root_length = root_length;
 75 |     }
 76 | 
 77 | 
 78 |     Real2DVector BarnesHutApproximator::approximateForce(Coordinate particle_pos, float particle_mass, float theta)
 79 |     {
 80 |         Real2DVector force = Real2DVector(0.0, 0.0);
 81 |         std::queue<BarnesHutCell*> cells_to_check;
 82 |         cells_to_check.push(root_cell);
 83 | 
 84 |         BarnesHutCell *cur_cell;
 85 |         while (!cells_to_check.empty())
 86 |         {
 87 |             cur_cell = cells_to_check.front();
 88 |             cells_to_check.pop();
 89 | 
 90 |             const float D2 = distance2(particle_pos, cur_cell->mass_center);
 91 |             if (D2 == 0)
 92 |             {
 93 |                 // If we approximate the force of a particle on itself...
 94 |                 if (cur_cell->num_subparticles == 0) continue;
 95 |                 else return Real2DVector(rand(), rand());
 96 | 
 97 |             }
 98 | 
 99 |             // length / D >= theta is the criterion to divide into subcells.
100 |             if (cur_cell->length*cur_cell->length / D2 < theta*theta || cur_cell->num_subparticles == 0)
101 |                 force += direction(particle_pos, cur_cell->mass_center)  *
102 |                 (particle_mass * cur_cell->total_mass / D2);
103 | 
104 |             else
105 |                 for (int i = 0; i < 4; ++i)
106 |                     if (cur_cell->sub_cells[i] != nullptr) cells_to_check.push(cur_cell->sub_cells[i]);
107 |         }
108 |         return force;
109 |     }
110 | 
111 |     void BarnesHutApproximator::insertParticle(RPGraph::Coordinate particle_position, float particle_mass)
112 |     {
113 |         if(not root_cell)
114 |         {
115 |             root_cell = new BarnesHutCell(this->root_center, this->root_length,
116 |                                           particle_position, particle_mass);
117 |         }
118 | 
119 |         else
120 |         {
121 |             BarnesHutCell *cur_cell = root_cell;
122 |             while (true)
123 |             {
124 |                 const int quadrant_new_particle = (particle_position-cur_cell->cell_center).quadrant();
125 | 
126 |                 if (particle_position.y > cur_cell->ub or particle_position.x > cur_cell->rb or
127 |                     particle_position.x < cur_cell->lb or particle_position.y < cur_cell->bb)
128 |                 {
129 |                     //fprintf(stderr, "error: Barnes-Hut: Can't insert particle out of bounds of this cell.\n");
130 |                     return;
131 |                 }
132 | 
133 |                 // N.B. a BarnesHutCell is never empty, but can lack subparticles/cells.
134 |                 // If so, we need to create, and insert, a subcell for the single particle that
135 |                 // is stored in this cell.
136 |                 if (cur_cell->num_subparticles == 0)
137 |                 {
138 |                     if (particle_position == cur_cell->mass_center)
139 |                     {
140 |                         // We want two particles in the same place...
141 |                         // Thats equivalent to a single particle with summed masses.
142 |                         // mass_center won't change.
143 |                         cur_cell->total_mass += particle_mass;
144 |                         return;
145 |                     }
146 | 
147 |                     // We move the single particle to a subcell.
148 |                     int quadrant_existing_particle = (cur_cell->mass_center - cur_cell->cell_center).quadrant();
149 |                     cur_cell->add_leafcell(quadrant_existing_particle, cur_cell->total_mass, cur_cell->mass_center);
150 |                 }
151 | 
152 |                 // We assume inserting will succeed, and update total_mass and mass_center accordingly
153 |                 cur_cell->total_mass  += particle_mass;
154 |                 cur_cell->mass_center  = cur_cell->mass_center * (float) (cur_cell->num_subparticles);
155 |                 cur_cell->mass_center += particle_position;
156 |                 cur_cell->mass_center /= (cur_cell->num_subparticles+1);
157 | 
158 |                 // If we can add a leaf-cell in an empty slot, we do so.
159 |                 if (cur_cell->sub_cells[quadrant_new_particle] == nullptr)
160 |                 {
161 |                     cur_cell->add_leafcell(quadrant_new_particle, particle_mass, particle_position);
162 |                     return;
163 |                 }
164 | 
165 |                 // Else we recurse to the occupied cell.
166 |                 else
167 |                 {
168 |                     cur_cell->num_subparticles += 1;
169 |                     cur_cell = cur_cell->sub_cells[quadrant_new_particle];
170 |                 }
171 |             }
172 |         }
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/RPBarnesHutApproximator.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPBarnesHutApproximator.hpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPBarnesHutApproximator_hpp
25 | #define RPBarnesHutApproximator_hpp
26 | 
27 | #include "RPGraph.hpp"
28 | #include "RPCommon.hpp"
29 | 
30 | namespace RPGraph
31 | {
32 |     class BarnesHutCell
33 |     {
34 |     public:
35 |         void add_leafcell(int quadrant, float mass, Coordinate pos);
36 |         float lb, rb, ub, bb;
37 | 
38 |         // BarnesHutCell always contain either a single particle, or subcells (at most 4).
39 |         BarnesHutCell(Coordinate position, float length, Coordinate particle_position, float particle_mass);
40 |         ~BarnesHutCell();
41 | 
42 |         Coordinate cell_center, mass_center;
43 |         nid_t num_subparticles = 0;
44 |         float total_mass;
45 |         const float length;   // length of a cell = width = height
46 |         BarnesHutCell *sub_cells[4] = {nullptr, nullptr, nullptr, nullptr}; // per quadrant.
47 | 
48 |         void insertParticle(Coordinate particle_position, float particle_mass);
49 |     };
50 | 
51 |     class BarnesHutApproximator
52 |     {
53 |     public:
54 |         BarnesHutApproximator(Coordinate root_center, float root_length, float theta);
55 |         Real2DVector approximateForce(Coordinate particle_pos, float particle_mass, float theta);
56 |         void insertParticle(Coordinate particle_position, float particle_mass);
57 | 
58 |         void reset(Coordinate root_center, float root_length);
59 |         void setTheta(float theta);
60 | 
61 |     private:
62 |         BarnesHutCell *root_cell = nullptr;
63 |         const float theta;
64 |         Coordinate root_center;
65 |         float root_length;
66 | 
67 |     };
68 | }
69 | 
70 | #endif /* RPBarnesHutApproximator_hpp */
71 | 


--------------------------------------------------------------------------------
/src/RPCPUForceAtlas2.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPCPUForceAtlas2.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | #include "RPCPUForceAtlas2.hpp"
 25 | #include <stdlib.h>
 26 | #include <math.h>
 27 | #include <limits>
 28 | #include <cmath>
 29 | #include <chrono>
 30 | 
 31 | namespace RPGraph
 32 | {
 33 |     // CPUForceAtlas2 definitions.
 34 |     CPUForceAtlas2::CPUForceAtlas2(GraphLayout &layout, bool use_barneshut,
 35 |                                    bool strong_gravity, float gravity,
 36 |                                    float scale)
 37 |     :  ForceAtlas2(layout, use_barneshut, strong_gravity, gravity, scale),
 38 |        BH_Approximator{layout.getCenter(), layout.getSpan()+10, theta}
 39 |     {
 40 |         forces      = (Real2DVector *)malloc(sizeof(Real2DVector) * layout.graph.num_nodes());
 41 |         prev_forces = (Real2DVector *)malloc(sizeof(Real2DVector) * layout.graph.num_nodes());
 42 |         for (nid_t n = 0; n < layout.graph.num_nodes(); ++n)
 43 |         {
 44 |             forces[n]      = Real2DVector(0.0f, 0.0f);
 45 |             prev_forces[n] = Real2DVector(0.0f, 0.0f);
 46 |         }
 47 |     }
 48 | 
 49 |     CPUForceAtlas2::~CPUForceAtlas2()
 50 |     {
 51 |         free(forces);
 52 |         free(prev_forces);
 53 |     }
 54 | 
 55 |     void CPUForceAtlas2::apply_attract(nid_t n)
 56 |     {
 57 |         Real2DVector f = Real2DVector(0.0, 0.0);
 58 |         for (nid_t t : layout.graph.neighbors_with_geq_id(n))
 59 |         {
 60 |             // Here we define the magnitude of the attractive force `f_a'
 61 |             // *divided* by the length distance between `n' and `t', i.e. `f_a_over_d'
 62 |             float f_a_over_d;
 63 |             if (use_linlog)
 64 |             {
 65 |                 float dist = layout.getDistance(n, t);
 66 |                 f_a_over_d = dist == 0.0 ? std::numeric_limits<float>::max() : logf(1+dist) / dist;
 67 |             }
 68 | 
 69 |             else
 70 |             {
 71 |                 f_a_over_d = 1.0;
 72 |             }
 73 | 
 74 |             f += layout.getDistanceVector(n, t) * f_a_over_d;
 75 | 
 76 |             //TODO: this is temporary, but required due to
 77 |             //      iteration over neighbors_with_geq_id
 78 |             forces[t] += layout.getDistanceVector(n, t) * (-f_a_over_d);
 79 | 
 80 |     //            forces[n] += getNormalizedDistanceVector(n, t) * f_a(n, t);
 81 |         }
 82 |         forces[n] += f;
 83 |     }
 84 | 
 85 |     void CPUForceAtlas2::apply_repulsion(nid_t n)
 86 |     {
 87 |         if (use_barneshut)
 88 |         {
 89 |             forces[n] += (BH_Approximator.approximateForce(layout.getCoordinate(n), mass(n), theta) * k_r);
 90 |         }
 91 | 
 92 |         else
 93 |         {
 94 |             for (nid_t t = 0; t < layout.graph.num_nodes(); ++t)
 95 |             {
 96 |                 if (n == t) continue;
 97 |                 float  distance = layout.getDistance(n, t);
 98 |                 float f_r = distance == 0.0 ? std::numeric_limits<float>::max() : k_r * mass(n) * mass(t) / distance / distance;
 99 |                 forces[n] += layout.getDistanceVector(n, t) * f_r;
100 |             }
101 |         }
102 |     }
103 | 
104 |     void CPUForceAtlas2::apply_gravity(nid_t n)
105 |     {
106 |         float f_g, d;
107 | 
108 |         // `d' is the distance from `n' to the center (0.0, 0.0)
109 |         d = std::sqrt(layout.getX(n)*layout.getX(n) + layout.getY(n)*layout.getY(n));
110 |         if(d == 0.0) return;
111 | 
112 |         // Here we define the magnitude of the gravitational force `f_g'.
113 |         if (strong_gravity)
114 |         {
115 |             f_g = k_g*mass(n);
116 |         }
117 | 
118 |         else
119 |         {
120 |             f_g = k_g*mass(n) / d;
121 |         }
122 | 
123 |         forces[n] += (Real2DVector(-layout.getX(n), -layout.getY(n)) * f_g);
124 |     }
125 | 
126 |     // Eq. (8)
127 |     float CPUForceAtlas2::swg(nid_t n)
128 |     {
129 |         return (forces[n] - prev_forces[n]).magnitude();
130 |     }
131 | 
132 |     // Eq. (9)
133 |     float CPUForceAtlas2::s(nid_t n)
134 |     {
135 |         return (k_s * global_speed)/(1.0f+global_speed*std::sqrt(swg(n)));
136 |     }
137 | 
138 |     // Eq. (12)
139 |     float CPUForceAtlas2::tra(nid_t n)
140 |     {
141 |         return (forces[n] + prev_forces[n]).magnitude() / 2.0;
142 |     }
143 | 
144 |     void CPUForceAtlas2::updateSpeeds()
145 |     {
146 |         // The following speed-update procedure for ForceAtlas2 follows
147 |         // the one by Gephi:
148 |         // https://github.com/gephi/gephi/blob/6efb108718fa67d1055160f3a18b63edb4ca7be2/modules/LayoutPlugin/src/main/java/org/gephi/layout/plugin/forceAtlas2/ForceAtlas2.java
149 | 
150 |         // `Auto adjust speeds'
151 |         float total_swinging = 0.0;
152 |         float total_effective_traction = 0.0;
153 |         for (nid_t nid = 0; nid < layout.graph.num_nodes(); ++nid)
154 |         {
155 |             total_swinging += mass(nid) * swg(nid); // Eq. (11)
156 |             total_effective_traction += mass(nid) * tra(nid); // Eq. (13)
157 |         }
158 | 
159 |         // We want to find the right jitter tollerance for this graph,
160 |         // such that totalSwinging < tolerance * totalEffectiveTraction
161 | 
162 |         float estimated_optimal_jitter_tollerance = 0.05 * std::sqrt(layout.graph.num_nodes());
163 |         float minJT = std::sqrt(estimated_optimal_jitter_tollerance);
164 |         float jt = jitter_tolerance * fmaxf(minJT,
165 |                                            fminf(k_s_max,
166 |                                                  estimated_optimal_jitter_tollerance * total_effective_traction / powf(layout.graph.num_nodes(), 2.0)
167 |                                                  )
168 |                                            );
169 |         float min_speed_efficiency = 0.05;
170 | 
171 |         // `Protect against erratic behavior'
172 |         if (total_swinging / total_effective_traction > 2.0)
173 |         {
174 |             if (speed_efficiency > min_speed_efficiency) speed_efficiency *= 0.5;
175 |             jt = fmaxf(jt, jitter_tolerance);
176 |         }
177 | 
178 |         // `Speed efficiency is how the speed really corrosponds to the swinging vs. convergence tradeoff.'
179 |         // `We adjust it slowly and carefully'
180 |         float targetSpeed = jt * speed_efficiency * total_effective_traction / total_swinging;
181 | 
182 |         if (total_swinging > jt * total_effective_traction)
183 |         {
184 |             if (speed_efficiency > min_speed_efficiency)
185 |             {
186 |                 speed_efficiency *= 0.7;
187 |             }
188 |         }
189 |         else if (global_speed < 1000)
190 |         {
191 |             speed_efficiency *= 1.3;
192 |         }
193 | 
194 |         // `But the speed shouldn't rise much too quickly, ... would make convergence drop dramatically'.
195 |         float max_rise = 0.5;
196 |         global_speed += fminf(targetSpeed - global_speed, max_rise * global_speed);
197 |     }
198 | 
199 |     void CPUForceAtlas2::apply_displacement(nid_t n)
200 |     {
201 |         if (prevent_overlap)
202 |         {
203 |             // Not yet implemented
204 |             exit(EXIT_FAILURE);
205 |         }
206 | 
207 |         else
208 |         {
209 | 
210 |             float factor = global_speed / (1.0 + std::sqrt(global_speed * swg(n)));
211 |             layout.moveNode(n, forces[n] * factor);
212 |         }
213 |     }
214 | 
215 |     void CPUForceAtlas2::rebuild_bh()
216 |     {
217 |         BH_Approximator.reset(layout.getCenter(), layout.getSpan()+10);
218 | 
219 |         for (nid_t n = 0; n < layout.graph.num_nodes(); ++n)
220 |         {
221 |             BH_Approximator.insertParticle(layout.getCoordinate(n),
222 |                                            layout.graph.degree(n)+1);
223 |         }
224 |     }
225 | 
226 |     void CPUForceAtlas2::doStep()
227 |     {
228 |         if (use_barneshut) rebuild_bh();
229 | 
230 |         for (nid_t n = 0; n < layout.graph.num_nodes(); ++n)
231 |         {
232 |             apply_gravity(n);
233 |             apply_attract(n);
234 |             apply_repulsion(n);
235 |         }
236 | 
237 |         updateSpeeds();
238 | 
239 |         for (nid_t n = 0; n < layout.graph.num_nodes(); ++n)
240 |         {
241 |             apply_displacement(n);
242 |             prev_forces[n]  = forces[n];
243 |             forces[n]       = Real2DVector(0.0f, 0.0f);
244 |         }
245 |         iteration++;
246 |     }
247 | 
248 |     void CPUForceAtlas2::sync_layout() {}
249 | 
250 | }
251 | 


--------------------------------------------------------------------------------
/src/RPCPUForceAtlas2.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPCPUForceAtlas2.hpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPCPUForceAtlas2_hpp
25 | #define RPCPUForceAtlas2_hpp
26 | 
27 | #include "RPForceAtlas2.hpp"
28 | 
29 | namespace RPGraph
30 | {
31 |     class CPUForceAtlas2 : public ForceAtlas2
32 |     {
33 |     public:
34 |         CPUForceAtlas2(GraphLayout &layout, bool use_barneshut,
35 |                        bool strong_gravity, float gravity, float scale);
36 |         ~CPUForceAtlas2();
37 |         void doStep() override;
38 |         void sync_layout() override;
39 | 
40 |     private:
41 |         Real2DVector *forces, *prev_forces;
42 |         BarnesHutApproximator BH_Approximator;
43 | 
44 |         float swg(nid_t n);            // swinging ..
45 |         float s(nid_t n);              // swinging as well ..
46 |         float tra(nid_t n);            // traction ..
47 | 
48 |         // Substeps of one step in layout process.
49 |         void rebuild_bh();
50 |         void apply_repulsion(nid_t n);
51 |         void apply_gravity(nid_t n);
52 |         void apply_attract(nid_t n);
53 |         void updateSpeeds();
54 |         void apply_displacement(nid_t n);
55 |     };
56 | }
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/RPCommon.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPCommon.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | #include "RPCommon.hpp"
 25 | #include <stdlib.h>
 26 | #include <string.h>
 27 | #include <cmath>
 28 | #include <fstream>
 29 | #include <string>
 30 | 
 31 | // by http://stackoverflow.com/a/19841704
 32 | bool is_file_exists(std::string filepath)
 33 | {
 34 |     std::ifstream infile(filepath);
 35 |     return infile.good();
 36 | }
 37 | 
 38 | // wrap libgen basename until C++17
 39 | std::string basename(std::string filepath)
 40 | {
 41 |     char *result_p = new char[filepath.size() + 1];
 42 |     strcpy(result_p, filepath.c_str());
 43 |     std::string result = basename(result_p);
 44 |     delete[] result_p;
 45 |     return result;
 46 | }
 47 | 
 48 | namespace RPGraph
 49 | {
 50 |     float get_random(float lowerbound, float upperbound)
 51 |     {
 52 |         return lowerbound + (upperbound-lowerbound) * static_cast <float> (random()) / static_cast <float> (RAND_MAX);
 53 |     }
 54 | 
 55 | 
 56 |     /* Definitions for Real2DVector */
 57 |     Real2DVector::Real2DVector(float x, float y): x(x), y(y) {};
 58 | 
 59 |     float Real2DVector::magnitude()
 60 |     {
 61 |         return std::sqrt(x*x + y*y);
 62 |     }
 63 | 
 64 |     float Real2DVector::distance(RPGraph::Real2DVector to)
 65 |     {
 66 |         const float dx = (x - to.x)*(x - to.x);
 67 |         const float dy = (y - to.y)*(y - to.y);
 68 |         return std::sqrt(dx*dx + dy*dy);
 69 |     }
 70 | 
 71 |     // Various operators on Real2DVector
 72 |     Real2DVector Real2DVector::operator*(float b)
 73 |     {
 74 |         return Real2DVector(this->x * b, this->y * b);
 75 |     }
 76 | 
 77 |     Real2DVector Real2DVector::operator/(float b)
 78 |     {
 79 |         return Real2DVector(this->x / b, this->y / b);
 80 |     }
 81 | 
 82 | 
 83 |     Real2DVector Real2DVector::operator+(Real2DVector b)
 84 |     {
 85 |         return Real2DVector(this->x + b.x, this->y + b.y);
 86 |     }
 87 | 
 88 | 
 89 |     Real2DVector Real2DVector::operator-(Real2DVector b)
 90 |     {
 91 |         return Real2DVector(this->x - b.x, this->y - b.y);
 92 |     }
 93 | 
 94 |     void Real2DVector::operator+=(Real2DVector b)
 95 |     {
 96 |         this->x += b.x;
 97 |         this->y += b.y;
 98 |     }
 99 | 
100 |     Real2DVector Real2DVector::getNormalized()
101 |     {
102 |         return Real2DVector(this->x / magnitude(), this->y / magnitude());
103 |     }
104 | 
105 |     Real2DVector Real2DVector::normalize()
106 |     {
107 |         const float m = magnitude();
108 |         this->x /= m;
109 |         this->y /= m;
110 |         return *this;
111 |     }
112 | 
113 |     /* Definitions for Coordinate */
114 |     Coordinate::Coordinate(float x, float y) : x(x), y(y) {};
115 | 
116 |     // Various operators on Coordinate
117 |     Coordinate Coordinate::operator+(float b)
118 |     {
119 |         return Coordinate(x + b, y + b);
120 |     }
121 | 
122 |     Coordinate Coordinate::operator*(float b)
123 |     {
124 |         return Coordinate(this->x*b, this->y*b);
125 |     }
126 | 
127 |     Coordinate Coordinate::operator/(float b)
128 |     {
129 |         return Coordinate(this->x/b, this->y/b);
130 |     }
131 | 
132 |     Coordinate Coordinate::operator+(Real2DVector b)
133 |     {
134 |         return Coordinate(this->x + b.x, this->y + b.y);
135 |     }
136 | 
137 |     Coordinate Coordinate::operator-(Coordinate b)
138 |     {
139 |         return Coordinate(this->x - b.x, this->y - b.y);
140 |     }
141 | 
142 |     bool Coordinate::operator==(Coordinate b)
143 |     {
144 |         return (this->x == b.x && this->y == b.y);
145 |     }
146 | 
147 |     float Coordinate::distance(RPGraph::Coordinate to)
148 |     {
149 |         return std::sqrt((x - to.x)*(x - to.x) + (y - to.y)*(y - to.y));
150 |     }
151 | 
152 |     float Coordinate::distance2(RPGraph::Coordinate to)
153 |     {
154 |         return (x - to.x)*(x - to.x) + (y - to.y)*(y - to.y);
155 |     }
156 | 
157 |     void Coordinate::operator/=(float b)
158 |     {
159 |         this->x /= b;
160 |         this->y /= b;
161 |     }
162 | 
163 |     void Coordinate::operator+=(RPGraph::Coordinate b)
164 |     {
165 |         this->x += b.x;
166 |         this->y += b.y;
167 |     }
168 | 
169 |     void Coordinate::operator+=(RPGraph::Real2DVector b)
170 |     {
171 |         this->x += b.x;
172 |         this->y += b.y;
173 |     }
174 | 
175 |     int Coordinate::quadrant()
176 |     {
177 |         if (x <= 0)
178 |         {
179 |             if (y >= 0) return 0;
180 |             else        return 3;
181 | 
182 |         }
183 |         else
184 |         {
185 |             if (y >= 0) return 1;
186 |             else        return 2;
187 |         }
188 |     }
189 | 
190 |     float distance(Coordinate from, Coordinate to)
191 |     {
192 |         const float dx = from.x - to.x;
193 |         const float dy = from.y - to.y;
194 |         return std::sqrt(dx*dx + dy*dy);
195 |     }
196 | 
197 |     float distance2(Coordinate from, Coordinate to)
198 |     {
199 |         const float dx = from.x - to.x;
200 |         const float dy = from.y - to.y;
201 |         return dx*dx + dy*dy;
202 |     }
203 | 
204 |     Real2DVector normalizedDirection(Coordinate from, Coordinate to)
205 |     {
206 |         const float dx = from.x - to.x;
207 |         const float dy = from.y - to.y;
208 |         const float len = std::sqrt(dx*dx + dy*dy);
209 |         return Real2DVector(dx/len, dy/len);
210 |     }
211 | 
212 |     Real2DVector direction(Coordinate from, Coordinate to)
213 |     {
214 |         const float dx = from.x - to.x;
215 |         const float dy = from.y - to.y;
216 |         return Real2DVector(dx, dy);
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/src/RPCommon.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPCommon.hpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | #ifndef RPCommonUtils_hpp
 25 | #define RPCommonUtils_hpp
 26 | #include <string>
 27 | 
 28 | #ifdef __NVCC__
 29 | #include <cuda_runtime_api.h>
 30 | #include <stdio.h>
 31 | #include <stdlib.h>
 32 | 
 33 | #define cudaCatchError(ans) { assert_d((ans), __FILE__, __LINE__); }
 34 | inline void assert_d(cudaError_t code, const char *file, int line, bool abort=true)
 35 | {
 36 |     if (code != cudaSuccess)
 37 |     {
 38 |         fprintf(stderr,"error: (GPUassert) %s (error %d). %s:%d\n", cudaGetErrorString(code), code, file, line);
 39 |         if (abort) exit(code);
 40 |     }
 41 | }
 42 | #endif
 43 | bool is_file_exists(std::string filepath);
 44 | std::string basename(std::string filepath);
 45 | 
 46 | namespace RPGraph
 47 | {
 48 |     float get_random(float lowerbound, float upperbound);
 49 | 
 50 |     class Real2DVector
 51 |     {
 52 |     public:
 53 |         Real2DVector(float x, float y);
 54 |         float x, y;
 55 |         float magnitude();
 56 |         float distance(Real2DVector to); // to some other Real2DVector `to'
 57 | 
 58 |         // Varous operators on Real2DVector
 59 |         Real2DVector operator*(float b);
 60 |         Real2DVector operator/(float b);
 61 |         Real2DVector operator+(Real2DVector b);
 62 |         Real2DVector operator-(Real2DVector b);
 63 |         void operator+=(Real2DVector b);
 64 | 
 65 |         Real2DVector getNormalized();
 66 |         Real2DVector normalize();
 67 |     };
 68 | 
 69 |     class Coordinate
 70 |     {
 71 |     public:
 72 |         float x, y;
 73 |         Coordinate(float x, float y);
 74 | 
 75 |         // Various operators on Coordinate
 76 |         Coordinate operator+(float b);
 77 |         Coordinate operator*(float b);
 78 |         Coordinate operator/(float b);
 79 |         Coordinate operator+(Real2DVector b);
 80 |         Coordinate operator-(Coordinate b);
 81 |         bool operator==(Coordinate b);
 82 |         void operator/=(float b);
 83 |         void operator+=(Coordinate b);
 84 |         void operator+=(RPGraph::Real2DVector b);
 85 | 
 86 |         int quadrant(); // Of `this' wrt. (0,0).
 87 |         float distance(Coordinate to);
 88 |         float distance2(Coordinate to);
 89 | 
 90 |     };
 91 | 
 92 |     float distance(Coordinate from, Coordinate to);
 93 |     float distance2(Coordinate from, Coordinate to);
 94 | 
 95 |     Real2DVector normalizedDirection(Coordinate from, Coordinate to);
 96 |     Real2DVector direction(Coordinate from, Coordinate to);
 97 | 
 98 | }
 99 | 
100 | #endif /* RPCommonUtils_hpp */
101 | 


--------------------------------------------------------------------------------
/src/RPFA2Kernels.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPFA2Kernels.cu
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 |  */
 23 | 
 24 | #include <stdio.h>
 25 | #include "RPFA2Kernels.cuh"
 26 | #include "RPBHFA2LaunchParameters.cuh"
 27 | 
 28 | /// Some variables for FA2 related to `speed'
 29 | static __device__ float k_s_maxd = 10.0;
 30 | static __device__ float global_speedd = 1.0;
 31 | static __device__ float speed_efficiencyd = 1.0;
 32 | static __device__ float jitter_toleranced = 1.0;
 33 | static __device__ unsigned int blkcntd_speed_kernel = 0;
 34 | 
 35 | __global__
 36 | __launch_bounds__(THREADS6, FACTOR6)
 37 | void GravityKernel(int nbodiesd, const float k_g, const bool strong_gravity,
 38 |                    volatile float * __restrict body_massd,
 39 |                    volatile float2 * __restrict body_posd,
 40 |                    volatile float * __restrict fxd, volatile float * __restrict fyd)
 41 | {
 42 |     register int i, inc;
 43 | 
 44 |     // iterate over all bodies assigned to thread
 45 |     inc = blockDim.x * gridDim.x;
 46 |     for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nbodiesd; i += inc)
 47 |     {
 48 |         const float px = body_posd[i].x;
 49 |         const float py = body_posd[i].y;
 50 | 
 51 |         // `f_g' is the magnitude of gravitational force
 52 |         float f_g;
 53 |         if(strong_gravity)
 54 |         {
 55 |             f_g = k_g * body_massd[i];
 56 |         }
 57 |         else // weak gravity
 58 |         {
 59 |             if (px != 0.0 || py != 0.0)
 60 |             {
 61 |                 f_g = k_g * body_massd[i] * rsqrtf(px*px + py*py);
 62 |             }
 63 | 
 64 |             else
 65 |             {
 66 |                 f_g = 0.0;
 67 |             }
 68 |         }
 69 | 
 70 |         fxd[i] += (-px * f_g);
 71 |         fyd[i] += (-py * f_g);
 72 |     }
 73 | }
 74 | 
 75 | __global__
 76 | __launch_bounds__(THREADS6, FACTOR6)
 77 | void AttractiveForceKernel(int nedgesd,
 78 |                            volatile float2 * __restrict body_posd,
 79 |                            volatile float * __restrict fxd, volatile float * __restrict fyd,
 80 |                            volatile int * __restrict sourcesd, volatile int * __restrict targetsd)
 81 | {
 82 |     register int i, inc, source, target;
 83 |     // iterate over all edges assigned to thread
 84 |     inc = blockDim.x * gridDim.x;
 85 |     for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nedgesd; i += inc)
 86 |     {
 87 |         source = sourcesd[i];
 88 |         target = targetsd[i];
 89 | 
 90 |         // dx and dy are distance to between the neighbors.
 91 |         const float dx = body_posd[target].x-body_posd[source].x;
 92 |         const float dy = body_posd[target].y-body_posd[source].y;
 93 | 
 94 |         // Force just depends linearly on distance.
 95 |         const float fsx = dx;
 96 |         const float fsy = dy;
 97 | 
 98 |         const float ftx = -dx;
 99 |         const float fty = -dy;
100 | 
101 | 
102 |         // these memory accesses aren't coalesced...
103 |         atomicAdd((float*)fxd+source, fsx);
104 |         atomicAdd((float*)fyd+source, fsy);
105 | 
106 |         atomicAdd((float*)fxd+target, ftx);
107 |         atomicAdd((float*)fyd+target, fty);
108 |     }
109 | }
110 | 
111 | __global__
112 | __launch_bounds__(THREADS1, FACTOR1)
113 | void SpeedKernel(int nbodiesd,
114 |                  volatile float * __restrict fxd , volatile float * __restrict fyd,
115 |                  volatile float * __restrict fx_prevd , volatile float * __restrict fy_prevd,
116 |                  volatile float * __restrict body_massd, volatile float * __restrict swgd, volatile float * __restrict etrad)
117 | {
118 |     register int i, j, k, inc;
119 |     register float swg_thread, swg_body, etra_thread, etra_body, dx, dy, mass;
120 |     // setra: effective_traction (in shared mem.)
121 |     // sswg: swing per node (in shared mem.)
122 |     __shared__ volatile float sswg[THREADS1], setra[THREADS1];
123 | 
124 |     // initialize with valid data (in case #bodies < #threads)
125 |     swg_thread  = 0;
126 |     etra_thread = 0;
127 | 
128 |     // scan all bodies
129 |     i = threadIdx.x;
130 |     inc = THREADS1 * gridDim.x;
131 | 
132 |     for (j = i + blockIdx.x * THREADS1; j < nbodiesd; j += inc)
133 |     {
134 |         mass = body_massd[j];
135 | 
136 |         dx = fxd[j] - fx_prevd[j];
137 |         dy = fyd[j] - fy_prevd[j];
138 |         swg_body = sqrtf(dx*dx + dy*dy);
139 |         swg_thread += mass * swg_body;
140 | 
141 |         dx = fxd[j] + fx_prevd[j];
142 |         dy = fyd[j] + fy_prevd[j];
143 |         etra_body = sqrtf(dx*dx + dy*dy) / 2.0;
144 |         etra_thread += mass * etra_body;
145 |     }
146 | 
147 |     // reduction in shared memory
148 |     sswg[i]  = swg_thread;
149 |     setra[i] = etra_thread;
150 | 
151 |     for (j = THREADS1 / 2; j > 0; j /= 2)
152 |     {
153 |         __syncthreads();
154 |         if (i < j)
155 |         {
156 |             k = i + j;
157 |             sswg[i]  = swg_thread  = sswg[i]  + sswg[k];
158 |             setra[i] = etra_thread = setra[i] + setra[k];
159 |         }
160 |     }
161 | 
162 |     // swg_thread and etra_thread are now the total swinging
163 |     // and the total effective traction (accross all threads)
164 | 
165 |     // write block result to global memory
166 |     if (i == 0)
167 |     {
168 |         k = blockIdx.x;
169 |         swgd[k]  = swg_thread;
170 |         etrad[k] = etra_thread;
171 |         __threadfence();
172 | 
173 |         inc = gridDim.x - 1;
174 |         if (inc == atomicInc(&blkcntd_speed_kernel, inc))
175 |         {
176 |             swg_thread = 0;
177 |             etra_thread = 0;
178 | 
179 |             for (j = 0; j <= inc; j++)
180 |             {
181 |                 swg_thread  += swgd[j];
182 |                 etra_thread += etrad[j];
183 |             }
184 |             // we need to do some calculations to derive
185 |             // from this the new global speed
186 |             float estimated_optimal_jitter_tollerance = 0.05 * sqrtf(nbodiesd);
187 |             float minJT = sqrtf(estimated_optimal_jitter_tollerance);
188 |             float jt = jitter_toleranced * fmaxf(minJT,
189 |                                                  fminf(k_s_maxd, estimated_optimal_jitter_tollerance * etra_thread / powf(nbodiesd, 2.0)
190 |                                                        ));
191 |             float min_speed_efficiency = 0.05;
192 | 
193 |             // `Protect against erratic behavior'
194 |             if (swg_thread / etra_thread > 2.0)
195 |             {
196 |                 if (speed_efficiencyd > min_speed_efficiency) speed_efficiencyd *= 0.5;
197 |                 jt = fmaxf(jt, jitter_toleranced);
198 |             }
199 | 
200 |             // `Speed efficiency is how the speed really corrosponds to the swinging vs. convergence tradeoff.'
201 |             // `We adjust it slowly and carefully'
202 |             float targetSpeed = jt * speed_efficiencyd * etra_thread / swg_thread;
203 | 
204 |             if (swg_thread > jt * etra_thread)
205 |             {
206 |                 if (speed_efficiencyd > min_speed_efficiency)
207 |                 {
208 |                     speed_efficiencyd *= 0.7;
209 |                 }
210 |             }
211 |             else if (global_speedd < 1000)
212 |             {
213 |                 speed_efficiencyd *= 1.3;
214 |             }
215 | 
216 |             // `But the speed shouldn't rise much too quickly, ... would make convergence drop dramatically'.
217 |             float max_rise = 0.5;
218 |             global_speedd += fminf(targetSpeed - global_speedd, max_rise * global_speedd);
219 |         }
220 |     }
221 | }
222 | 
223 | __global__
224 | __launch_bounds__(THREADS6, FACTOR6)
225 | void DisplacementKernel(int nbodiesd,
226 |                        volatile float2 * __restrict body_posd,
227 |                        volatile float * __restrict fxd, volatile float * __restrict fyd,
228 |                        volatile float * __restrict fx_prevd, volatile float * __restrict fy_prevd)
229 | {
230 |     register int i, inc;
231 |     register float factor, swg, dx, dy, fx, fy;
232 |     register float global_speed = global_speedd;
233 |     // iterate over all bodies assigned to thread
234 |     inc = blockDim.x * gridDim.x;
235 |     for (i = threadIdx.x + blockIdx.x * blockDim.x; i < nbodiesd; i += inc)
236 |     {
237 |         fx = fxd[i];
238 |         fy = fyd[i];
239 |         dx = fx - fx_prevd[i];
240 |         dy = fy - fy_prevd[i];
241 |         swg = sqrtf(dx*dx + dy*dy);
242 |         factor = global_speed / (1.0 + sqrtf(global_speed * swg));
243 | 
244 |         body_posd[i].x += fx * factor;
245 |         body_posd[i].y += fy * factor;
246 |         fx_prevd[i] = fx;
247 |         fy_prevd[i] = fy;
248 |         fxd[i] = 0.0;
249 |         fyd[i] = 0.0;
250 |     }
251 | }
252 | 


--------------------------------------------------------------------------------
/src/RPFA2Kernels.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPFA2Kernels.cuh
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 |  */
23 | 
24 | #ifndef RPFA2Kernels_cuh
25 | #define RPFA2Kernels_cuh
26 | 
27 | #include "RPBHFA2LaunchParameters.cuh"
28 | 
29 | __global__
30 | __launch_bounds__(THREADS6, FACTOR6)
31 | void GravityKernel(int nbodiesd, const float k_g, const bool strong_gravity,
32 |                    volatile float * __restrict body_massd,
33 |                    volatile float2 * __restrict body_posd,
34 |                    volatile float * __restrict fxd, volatile float * __restrict fyd);
35 | 
36 | __global__
37 | __launch_bounds__(THREADS6, FACTOR6)
38 | void AttractiveForceKernel(int nedgesd,
39 |                            volatile float2 * __restrict body_posd,
40 |                            volatile float * __restrict fxd, volatile float * __restrict fyd,
41 |                            volatile int * __restrict sourcesd, volatile int * __restrict targetsd);
42 | 
43 | __global__
44 | __launch_bounds__(THREADS1, FACTOR1)
45 | void SpeedKernel(int nbodiesd,
46 |                  volatile float * __restrict fxd , volatile float * __restrict fyd,
47 |                  volatile float * __restrict fx_prevd , volatile float * __restrict fy_prevd,
48 |                  volatile float * __restrict body_massd, volatile float * __restrict swgd, volatile float * __restrict etrad);
49 | 
50 | __global__
51 | __launch_bounds__(THREADS6, FACTOR6)
52 | void DisplacementKernel(int nbodiesd,
53 |                        volatile float2 * __restrict body_posd,
54 |                        volatile float * __restrict fxd, volatile float * __restrict fyd,
55 |                        volatile float * __restrict fx_prevd, volatile float * __restrict fy_prevd);
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/RPForceAtlas2.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPForceAtlas2.cpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #include "RPForceAtlas2.hpp"
25 | 
26 | namespace RPGraph
27 | {
28 |     ForceAtlas2::ForceAtlas2(GraphLayout &layout, bool use_barneshut,
29 |                              bool strong_gravity, float gravity, float scale)
30 |     : LayoutAlgorithm(layout), use_barneshut{use_barneshut},
31 |       strong_gravity{strong_gravity}
32 |     {
33 |         iteration = 0;
34 | 
35 |         setGravity(gravity);
36 |         setScale(scale);
37 | 
38 |         global_speed = 1.0;
39 |         speed_efficiency = 1.0;
40 |         jitter_tolerance = 1.0;
41 | 
42 |         k_s = 0.1;
43 |         k_s_max = 10.0;
44 | 
45 |         theta = 1.0;
46 |         epssq  = 0.05 * 0.05;
47 |         itolsq = 1.0f / (theta * theta);
48 | 
49 |         delta = 0.0;
50 | 
51 |         prevent_overlap = false;
52 |         use_linlog = false;
53 | 
54 |         layout.randomizePositions();
55 |     }
56 | 
57 |     ForceAtlas2::~ForceAtlas2(){};
58 | 
59 |     void ForceAtlas2::doSteps(int n)
60 |     {
61 |         for (int i = 0; i < n; ++i) doStep();
62 |     }
63 | 
64 |     void ForceAtlas2::setScale(float s)
65 |     {
66 |         k_r = s;
67 |     }
68 | 
69 |     void ForceAtlas2::setGravity(float g)
70 |     {
71 |         k_g = g;
72 |     }
73 | 
74 |     float ForceAtlas2::mass(nid_t n)
75 |     {
76 |         return layout.graph.degree(n) + 1.0;
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/RPForceAtlas2.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPForceAtlas2.hpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPForceAtlas2_hpp
25 | #define RPForceAtlas2_hpp
26 | 
27 | #include "RPLayoutAlgorithm.hpp"
28 | #include "RPBarnesHutApproximator.hpp"
29 | 
30 | namespace RPGraph
31 | {
32 |     class ForceAtlas2 : public LayoutAlgorithm
33 |     {
34 |         public:
35 |             ForceAtlas2(GraphLayout &layout, bool use_barneshut,
36 |                         bool strong_gravity, float gravity, float scale);
37 |             ~ForceAtlas2();
38 | 
39 |             virtual void doStep() = 0;
40 |             void doSteps(int n);
41 |             void setScale(float s);
42 |             void setGravity(float s);
43 |             float mass(nid_t n);
44 |             bool prevent_overlap, use_barneshut, use_linlog, strong_gravity;
45 | 
46 |         protected:
47 |             int iteration;
48 |             float k_r, k_g; // scalars for repulsive and gravitational force.
49 |             float delta; // edgeweight influence.
50 |             float global_speed;
51 | 
52 |             // Parameters used in adaptive temperature
53 |             float speed_efficiency, jitter_tolerance;
54 |             float k_s, k_s_max; // magic constants related to swinging.
55 | 
56 |             // Barnes-Hut parameters
57 |             float theta;   // Accuracy
58 |             float epssq;   // Softening (Epsilon, squared)
59 |             float itolsq;  // Inverse tolerance, squared
60 |     };
61 | }
62 | #endif
63 | 


--------------------------------------------------------------------------------
/src/RPGPUForceAtlas2.cu:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPGPUForceAtlas2.cu
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | #include <stdio.h>
 25 | #include <fstream>
 26 | #include <chrono>
 27 | #include <algorithm>
 28 | #include "time.h"
 29 | 
 30 | #include "RPGPUForceAtlas2.hpp"
 31 | #include "RPBHFA2LaunchParameters.cuh"
 32 | #include "RPBHKernels.cuh"
 33 | #include "RPFA2Kernels.cuh"
 34 | 
 35 | namespace RPGraph
 36 | {
 37 |     CUDAForceAtlas2::CUDAForceAtlas2(GraphLayout &layout, bool use_barneshut,
 38 |                                      bool strong_gravity, float gravity,
 39 |                                      float scale)
 40 |     : ForceAtlas2(layout, use_barneshut, strong_gravity, gravity, scale)
 41 |     {
 42 |         int deviceCount;
 43 |         cudaGetDeviceCount(&deviceCount);
 44 |         if (deviceCount == 0)
 45 |         {
 46 |             fprintf(stderr, "error: No CUDA devices found.\n");
 47 |             exit(EXIT_FAILURE);
 48 |         }
 49 | 
 50 |         // Host initialization and setup //
 51 |         nbodies = layout.graph.num_nodes();
 52 |         nedges  = layout.graph.num_edges();
 53 | 
 54 |         body_pos = (float2 *)malloc(sizeof(float2) * layout.graph.num_nodes());
 55 |         body_mass = (float *)malloc(sizeof(float) * layout.graph.num_nodes());
 56 |         sources  = (int *)  malloc(sizeof(int)   * layout.graph.num_edges());
 57 |         targets  = (int *)  malloc(sizeof(int)   * layout.graph.num_edges());
 58 |         fx       = (float *)malloc(sizeof(float) * layout.graph.num_nodes());
 59 |         fy       = (float *)malloc(sizeof(float) * layout.graph.num_nodes());
 60 |         fx_prev  = (float *)malloc(sizeof(float) * layout.graph.num_nodes());
 61 |         fy_prev  = (float *)malloc(sizeof(float) * layout.graph.num_nodes());
 62 | 
 63 |         for (nid_t n = 0; n < layout.graph.num_nodes(); ++n)
 64 |         {
 65 |             body_pos[n] = {layout.getX(n), layout.getY(n)};
 66 |             body_mass[n] = ForceAtlas2::mass(n);
 67 |             fx[n] = 0.0;
 68 |             fy[n] = 0.0;
 69 |             fx_prev[n] = 0.0;
 70 |             fy_prev[n] = 0.0;
 71 |         }
 72 | 
 73 |         int cur_sources_idx = 0;
 74 |         int cur_targets_idx = 0;
 75 | 
 76 |         // Initialize the sources and targets arrays with edge-data.
 77 |         for (nid_t source_id = 0; source_id < layout.graph.num_nodes(); ++source_id)
 78 |         {
 79 |             for (nid_t target_id : layout.graph.neighbors_with_geq_id(source_id))
 80 |             {
 81 |                 sources[cur_sources_idx++] = source_id;
 82 |                 targets[cur_targets_idx++] = target_id;
 83 |             }
 84 |         }
 85 | 
 86 |         // GPU initialization and setup //
 87 |         cudaDeviceProp deviceProp;
 88 |         cudaGetDeviceProperties(&deviceProp, 0);
 89 | 
 90 |         if (deviceProp.warpSize != WARPSIZE)
 91 |         {
 92 |             printf("Warpsize of device is %d, but we anticipated %d\n", deviceProp.warpSize, WARPSIZE);
 93 |             exit(EXIT_FAILURE);
 94 | 
 95 |         }
 96 |         cudaFuncSetCacheConfig(BoundingBoxKernel, cudaFuncCachePreferShared);
 97 |         cudaFuncSetCacheConfig(TreeBuildingKernel, cudaFuncCachePreferL1);
 98 |         cudaFuncSetCacheConfig(ClearKernel1, cudaFuncCachePreferL1);
 99 |         cudaFuncSetCacheConfig(ClearKernel2, cudaFuncCachePreferL1);
100 |         cudaFuncSetCacheConfig(SummarizationKernel, cudaFuncCachePreferShared);
101 |         cudaFuncSetCacheConfig(SortKernel, cudaFuncCachePreferL1);
102 | #if __CUDA_ARCH__ < 300
103 |         cudaFuncSetCacheConfig(ForceCalculationKernel, cudaFuncCachePreferL1);
104 | #endif
105 |         cudaFuncSetCacheConfig(DisplacementKernel, cudaFuncCachePreferL1);
106 | 
107 |         cudaGetLastError();  // reset error value
108 | 
109 |         // Allocate space on device.
110 |         mp_count = deviceProp.multiProcessorCount;
111 |         max_threads_per_block = deviceProp.maxThreadsPerBlock;
112 | 
113 |         nnodes = std::max(2 * nbodies, mp_count * max_threads_per_block);
114 | 
115 |         // Round up to next multiple of WARPSIZE
116 |         while ((nnodes & (WARPSIZE-1)) != 0) nnodes++;
117 |         nnodes--;
118 | 
119 |         // child stores structure of the quadtree. values point to IDs.
120 |         cudaCatchError(cudaMalloc((void **)&childl,  sizeof(int)   * (nnodes+1) * 4));
121 | 
122 |         // the following properties, for each node in the quadtree (both internal and leaf)
123 |         cudaCatchError(cudaMalloc((void **)&body_massl,   sizeof(float) * nbodies));
124 |         cudaCatchError(cudaMalloc((void **)&node_massl,   sizeof(float) * (nnodes+1)));
125 |         cudaCatchError(cudaMalloc((void **)&body_posl,sizeof(float2) * nbodies));
126 |         cudaCatchError(cudaMalloc((void **)&node_posl,    sizeof(float2) * (nnodes+1)));
127 |         // count contains the number of nested nodes for each node in quadtree
128 |         cudaCatchError(cudaMalloc((void **)&countl,  sizeof(int)   * (nnodes+1)));
129 |         // start contains ...
130 |         cudaCatchError(cudaMalloc((void **)&startl,  sizeof(int)   * (nnodes+1)));
131 |         cudaCatchError(cudaMalloc((void **)&sortl,   sizeof(int)   * (nnodes+1)));
132 | 
133 | 
134 |         cudaCatchError(cudaMalloc((void **)&sourcesl,sizeof(int)   * (nedges)));
135 |         cudaCatchError(cudaMalloc((void **)&targetsl,sizeof(int)   * (nedges)));
136 |         cudaCatchError(cudaMalloc((void **)&fxl,     sizeof(float) * (nbodies)));
137 |         cudaCatchError(cudaMalloc((void **)&fyl,     sizeof(float) * (nbodies)));
138 |         cudaCatchError(cudaMalloc((void **)&fx_prevl,sizeof(float) * (nbodies)));
139 |         cudaCatchError(cudaMalloc((void **)&fy_prevl,sizeof(float) * (nbodies)));
140 | 
141 |         // Used for reduction in BoundingBoxKernel
142 |         cudaCatchError(cudaMalloc((void **)&maxxl,   sizeof(float) * mp_count * FACTOR1));
143 |         cudaCatchError(cudaMalloc((void **)&maxyl,   sizeof(float) * mp_count * FACTOR1));
144 |         cudaCatchError(cudaMalloc((void **)&minxl,   sizeof(float) * mp_count * FACTOR1));
145 |         cudaCatchError(cudaMalloc((void **)&minyl,   sizeof(float) * mp_count * FACTOR1));
146 | 
147 |         // Used for reduction in SpeedKernel
148 |         cudaCatchError(cudaMalloc((void **)&swgl,    sizeof(float) * mp_count * FACTOR1));
149 |         cudaCatchError(cudaMalloc((void **)&etral,   sizeof(float) * mp_count * FACTOR1));
150 | 
151 |         // Copy host data to device.
152 |         cudaCatchError(cudaMemcpy(body_massl, body_mass, sizeof(float) * nbodies, cudaMemcpyHostToDevice));
153 |         cudaCatchError(cudaMemcpy(body_posl,  body_pos,  sizeof(float2) * nbodies, cudaMemcpyHostToDevice));
154 |         cudaCatchError(cudaMemcpy(sourcesl, sources, sizeof(int) * nedges, cudaMemcpyHostToDevice));
155 |         cudaCatchError(cudaMemcpy(targetsl, targets, sizeof(int) * nedges, cudaMemcpyHostToDevice));
156 | 
157 |         // cpy fx, fy , fx_prevl, fy_prevl so they are all initialized to 0 in device memory.
158 |         cudaCatchError(cudaMemcpy(fxl, fx,           sizeof(float) * nbodies, cudaMemcpyHostToDevice));
159 |         cudaCatchError(cudaMemcpy(fyl, fy,           sizeof(float) * nbodies, cudaMemcpyHostToDevice));
160 |         cudaCatchError(cudaMemcpy(fx_prevl, fx_prev, sizeof(float) * nbodies, cudaMemcpyHostToDevice));
161 |         cudaCatchError(cudaMemcpy(fy_prevl, fy_prev, sizeof(float) * nbodies, cudaMemcpyHostToDevice));
162 |     }
163 | 
164 |     void CUDAForceAtlas2::freeGPUMemory()
165 |     {
166 |         cudaFree(childl);
167 | 
168 |         cudaFree(body_massl);
169 |         cudaFree(node_massl);
170 |         cudaFree(body_posl);
171 |         cudaFree(node_posl);
172 |         cudaFree(sourcesl);
173 |         cudaFree(targetsl);
174 |         cudaFree(countl);
175 |         cudaFree(startl);
176 |         cudaFree(sortl);
177 | 
178 |         cudaFree(fxl);
179 |         cudaFree(fx_prevl);
180 |         cudaFree(fyl);
181 |         cudaFree(fy_prevl);
182 | 
183 |         cudaFree(maxxl);
184 |         cudaFree(maxyl);
185 |         cudaFree(minxl);
186 |         cudaFree(minyl);
187 | 
188 |         cudaFree(swgl);
189 |         cudaFree(etral);
190 |     }
191 | 
192 |     CUDAForceAtlas2::~CUDAForceAtlas2()
193 |     {
194 |         free(body_mass);
195 |         free(body_pos);
196 |         free(sources);
197 |         free(targets);
198 |         free(fx);
199 |         free(fy);
200 |         free(fx_prev);
201 |         free(fy_prev);
202 | 
203 |         freeGPUMemory();
204 |     }
205 | 
206 |     void CUDAForceAtlas2::doStep()
207 |     {
208 |         cudaGetLastError(); // clear any errors
209 |         GravityKernel<<<mp_count * FACTOR6, THREADS6>>>(nbodies, k_g, strong_gravity, body_massl, body_posl, fxl, fyl);
210 |         cudaCatchError(cudaGetLastError());
211 | 
212 |         AttractiveForceKernel<<<mp_count * FACTOR6, THREADS6>>>(nedges, body_posl, fxl, fyl, sourcesl, targetsl);
213 |         cudaCatchError(cudaGetLastError());
214 | 
215 |         BoundingBoxKernel<<<mp_count * FACTOR1, THREADS1>>>(nnodes, nbodies, startl, childl, node_massl, body_posl, node_posl, maxxl, maxyl, minxl, minyl);
216 |         cudaCatchError(cudaGetLastError());
217 | 
218 |         // Build Barnes-Hut Tree
219 |         // 1.) Set all child pointers of internal nodes (in childl) to null (-1)
220 |         ClearKernel1<<<mp_count, 1024>>>(nnodes, nbodies, childl);
221 |         cudaCatchError(cudaGetLastError());
222 |         // 2.) Build the tree
223 |         TreeBuildingKernel<<<mp_count * FACTOR2, THREADS2>>>(nnodes, nbodies, childl, body_posl, node_posl);
224 |         cudaCatchError(cudaGetLastError());
225 |         // 3.) Set all cell mass values to -1.0, set all startd to null (-1)
226 |         ClearKernel2<<<mp_count, 1024>>>(nnodes, startl, node_massl);
227 |         cudaCatchError(cudaGetLastError());
228 | 
229 |         // Recursively compute mass for each BH. cell.
230 |         SummarizationKernel<<<mp_count * FACTOR3, THREADS3>>>(nnodes, nbodies, countl, childl, body_massl, node_massl, body_posl, node_posl);
231 |         cudaCatchError(cudaGetLastError());
232 | 
233 |         SortKernel<<<mp_count * FACTOR4, THREADS4>>>(nnodes, nbodies, sortl, countl, startl, childl);
234 |         cudaCatchError(cudaGetLastError());
235 | 
236 |         // Compute repulsive forces between nodes using BH. tree.
237 |         ForceCalculationKernel<<<mp_count * FACTOR5, THREADS5>>>(nnodes, nbodies, itolsq, epssq, sortl, childl, body_massl, node_massl, body_posl, node_posl, fxl, fyl, k_r);
238 |         cudaCatchError(cudaGetLastError());
239 | 
240 |         SpeedKernel<<<mp_count * FACTOR1, THREADS1>>>(nbodies, fxl, fyl, fx_prevl, fy_prevl, body_massl, swgl, etral);
241 |         cudaCatchError(cudaGetLastError());
242 | 
243 |         DisplacementKernel<<<mp_count * FACTOR6, THREADS6>>>(nbodies, body_posl, fxl, fyl, fx_prevl, fy_prevl);
244 |         cudaCatchError(cudaGetLastError());
245 | 
246 |         cudaCatchError(cudaDeviceSynchronize());
247 |         iteration++;
248 |     }
249 | 
250 |     void CUDAForceAtlas2::retrieveLayoutFromGPU()
251 |     {
252 |         cudaCatchError(cudaMemcpy(body_pos, body_posl, sizeof(float2) * nbodies, cudaMemcpyDeviceToHost));
253 |         cudaDeviceSynchronize();
254 |     }
255 | 
256 |     void CUDAForceAtlas2::sendLayoutToGPU()
257 |     {
258 |         cudaCatchError(cudaMemcpy(body_posl, body_pos, sizeof(float2) * nbodies, cudaMemcpyHostToDevice));
259 |         cudaDeviceSynchronize();
260 |     }
261 | 
262 |     void CUDAForceAtlas2::sendGraphToGPU()
263 |     {
264 |         cudaCatchError(cudaMemcpy(body_massl, body_mass, sizeof(float) * nbodies, cudaMemcpyHostToDevice));
265 |         cudaCatchError(cudaMemcpy(sourcesl, sources, sizeof(int) * nedges, cudaMemcpyHostToDevice));
266 |         cudaCatchError(cudaMemcpy(targetsl, targets, sizeof(int) * nedges, cudaMemcpyHostToDevice));
267 |         cudaDeviceSynchronize();
268 |     }
269 | 
270 |     void CUDAForceAtlas2::sync_layout()
271 |     {
272 |         retrieveLayoutFromGPU();
273 |         for(nid_t n = 0; n < layout.graph.num_nodes(); ++n)
274 |         {
275 |             layout.setX(n, body_pos[n].x);
276 |             layout.setY(n, body_pos[n].y);
277 |         }
278 |     }
279 | }
280 | 


--------------------------------------------------------------------------------
/src/RPGPUForceAtlas2.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPGPUForceAtlas2.hpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPGPUForceAtlas2_hpp
25 | #define RPGPUForceAtlas2_hpp
26 | #include "RPForceAtlas2.hpp"
27 | 
28 | namespace RPGraph
29 | {
30 |     class CUDAForceAtlas2: public ForceAtlas2
31 |     {
32 |     public:
33 |         CUDAForceAtlas2(GraphLayout &layout, bool use_barneshut,
34 |                         bool strong_gravity, float gravity, float scale);
35 |         ~CUDAForceAtlas2();
36 |         void doStep() override;
37 |         void sync_layout() override;
38 | 
39 |     private:
40 |         /// CUDA Specific stuff.
41 |         // Host storage.
42 |         float *body_mass;
43 |         float2 *body_pos;
44 |         float *fx, *fy, *fx_prev, *fy_prev;
45 | 
46 |         // Quick way to represent a graph on the GPU
47 |         int *sources, *targets;
48 | 
49 |         // Pointers to device memory (all suffixed with 'l').
50 |         int   *errl,  *sortl, *childl, *countl, *startl;
51 |         int   *sourcesl, *targetsl;
52 |         float *body_massl, *node_massl;
53 |         float2 *body_posl, *node_posl;
54 |         float *minxl, *minyl, *maxxl, *maxyl;
55 |         float *fxl, *fyl, *fx_prevl, *fy_prevl;
56 |         float *swgl, *etral;
57 | 
58 |         int mp_count; // Number of multiprocessors on GPU.
59 |         int max_threads_per_block;
60 |         int nnodes;
61 |         int nbodies;
62 |         int nedges;
63 | 
64 |         void sendGraphToGPU();
65 |         void sendLayoutToGPU();
66 |         void retrieveLayoutFromGPU();
67 |         void freeGPUMemory();
68 |     };
69 | };
70 | 
71 | 
72 | #endif /* RPGPUForceAtlas2_hpp */
73 | 


--------------------------------------------------------------------------------
/src/RPGraph.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPGraph.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | 
 25 | #include <stdio.h>
 26 | #include <stdlib.h>
 27 | #include <sstream>
 28 | #include <fstream>
 29 | #include <algorithm>
 30 | #include "RPGraph.hpp"
 31 | 
 32 | namespace RPGraph
 33 | {
 34 |     /* Definitions for UGraph */
 35 |     UGraph::UGraph(std::string edgelist_path)
 36 |     {
 37 |         node_count = 0;
 38 |         edge_count = 0;
 39 | 
 40 |         std::fstream edgelist_file(edgelist_path, std::ifstream::in);
 41 | 
 42 |         std::string line;
 43 |         while(std::getline(edgelist_file, line))
 44 |         {
 45 |             // Skip any comments
 46 |             if(line[0] == '#') continue;
 47 | 
 48 |             // Read source and target from file
 49 |             nid_t s, t;
 50 |             std::istringstream(line) >> s >> t;
 51 | 
 52 |             if(s != t and !has_edge(s, t)) add_edge(s, t);
 53 |         }
 54 | 
 55 |         edgelist_file.close();
 56 |     }
 57 | 
 58 |     bool UGraph::has_node(nid_t nid)
 59 |     {
 60 |         return node_map.count(nid) > 0;
 61 |     }
 62 | 
 63 |     bool UGraph::has_edge(nid_t s, nid_t t)
 64 |     {
 65 |         if(!has_node(s) or !has_node(t)) return false;
 66 | 
 67 |         nid_t s_mapped = node_map[s];
 68 |         nid_t t_mapped = node_map[t];
 69 | 
 70 |         if(adjacency_list.count(std::min(s_mapped, t_mapped)) == 0) return false;
 71 | 
 72 |         std::vector<nid_t> neighbors = adjacency_list[std::min(s_mapped, t_mapped)];
 73 |         if(std::find(neighbors.begin(), neighbors.end(), std::max(s_mapped, t_mapped)) == neighbors.end())
 74 |             return false;
 75 |         else
 76 |             return true;
 77 |     }
 78 | 
 79 |     void UGraph::add_node(nid_t nid)
 80 |     {
 81 |         if(!has_node(nid))
 82 |         {
 83 |             node_map[nid] = node_count;
 84 |             node_map_r[node_count] = nid;
 85 |             node_count++;
 86 |         }
 87 |     }
 88 | 
 89 |     void UGraph::add_edge(nid_t s, nid_t t)
 90 |     {
 91 |         if(has_edge(s, t)) return;
 92 |         if(!has_node(s)) add_node(s);
 93 |         if(!has_node(t)) add_node(t);
 94 |         nid_t s_mapped = node_map[s];
 95 |         nid_t t_mapped = node_map[t];
 96 | 
 97 |         // Insert edge into adjacency_list
 98 |         adjacency_list[std::min(s_mapped, t_mapped)].push_back(std::max(s_mapped, t_mapped));
 99 |         degrees[s_mapped] += 1;
100 |         degrees[t_mapped] += 1;
101 |         edge_count++;
102 |     }
103 | 
104 |     nid_t UGraph::num_nodes()
105 |     {
106 |         return node_count;
107 |     }
108 | 
109 |     nid_t UGraph::num_edges()
110 |     {
111 |         return edge_count;
112 |     }
113 | 
114 |     nid_t UGraph::degree(nid_t nid)
115 |     {
116 |         return degrees[nid];
117 |     }
118 | 
119 |     nid_t UGraph::in_degree(nid_t nid)
120 |     {
121 |         return degree(nid);
122 |     }
123 | 
124 |     nid_t UGraph::out_degree(nid_t nid)
125 |     {
126 |         return degree(nid);
127 |     }
128 |     std::vector<nid_t> UGraph::neighbors_with_geq_id(nid_t nid)
129 |     {
130 |         return adjacency_list[nid];
131 |     }
132 | 
133 |     /* Definitions for CSRUGraph */
134 | 
135 | // CSRUGraph represents an undirected graph using a
136 | // compressed sparse row (CSR) datastructure.
137 |     CSRUGraph::CSRUGraph(nid_t num_nodes, nid_t num_edges)
138 |     {
139 |         // `edges' is a concatenation of all edgelists
140 |         // `offsets' contains offset (in `edges`) for each nodes' edgelist.
141 |         // `nid_to_offset` maps nid to index to be used in `offset'
142 | 
143 |         // e.g. the edgelist of node with id `nid' starts at
144 |         // edges[offsets[nid_to_offset[nid]]] and ends at edges[offsets[nid_to_offset[nid]] + 1]
145 |         // (left bound inclusive right bound exclusive)
146 | 
147 |         edge_count = num_edges; // num_edges counts each bi-directional edge once.
148 |         node_count = num_nodes;
149 |         edges =   (nid_t *) malloc(sizeof(nid_t) * 2 * edge_count);
150 |         offsets = (nid_t *) malloc(sizeof(nid_t) * node_count);
151 |         offset_to_nid = (nid_t *) malloc(sizeof(nid_t) * node_count);
152 | 
153 |         // Create a map from original ids to ids used throughout CSRUGraph
154 |         nid_to_offset.reserve(node_count);
155 | 
156 |         first_free_id = 0;
157 |         edges_seen = 0;
158 |     }
159 | 
160 |     CSRUGraph::~CSRUGraph()
161 |     {
162 |         free(edges);
163 |         free(offsets);
164 |         free(offset_to_nid);
165 |     }
166 | 
167 |     void CSRUGraph::insert_node(nid_t node_id, std::vector<nid_t> nbr_ids)
168 |     {
169 |         nid_t source_id_old = node_id;
170 |         nid_t source_id_new = first_free_id;
171 |         nid_to_offset[source_id_old] = first_free_id;
172 |         offset_to_nid[first_free_id] = source_id_old;
173 |         first_free_id++;
174 | 
175 |         offsets[source_id_new] = edges_seen;
176 |         for (auto nbr_id : nbr_ids)
177 |         {
178 |             nid_t dest_id_old = nbr_id;
179 |             edges[edges_seen] = dest_id_old;
180 |             edges_seen++;
181 |         }
182 |     }
183 | 
184 |     void CSRUGraph::fix_edge_ids()
185 |     {
186 |         for (eid_t ei = 0; ei < 2*edge_count; ei++)
187 |         {
188 |             edges[ei] = nid_to_offset[edges[ei]];
189 |         }
190 |     }
191 | 
192 |     nid_t CSRUGraph::degree(nid_t nid)
193 |     {
194 |         // If nid is last element of `offsets'... we prevent out of bounds.
195 |         nid_t r_bound;
196 |         if (nid < node_count - 1) r_bound = offsets[nid+1];
197 |         else r_bound = edge_count * 2;
198 |         nid_t l_bound = offsets[nid];
199 |         return (r_bound - l_bound);
200 |     }
201 | 
202 |     nid_t CSRUGraph::out_degree(nid_t nid)
203 |     {
204 |         return degree(nid);
205 |     }
206 | 
207 |     nid_t CSRUGraph::in_degree(nid_t nid)
208 |     {
209 |         return degree(nid);
210 |     }
211 | 
212 |     nid_t CSRUGraph::nbr_id_for_node(nid_t nid, nid_t edge_no)
213 |     {
214 |         return edges[offsets[nid] + edge_no];
215 |     }
216 |     nid_t CSRUGraph::num_nodes()
217 |     {
218 |         return node_count;
219 |     }
220 | 
221 |     nid_t CSRUGraph::num_edges()
222 |     {
223 |         return edge_count;
224 |     }
225 | };
226 | 


--------------------------------------------------------------------------------
/src/RPGraph.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPGraph.hpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | 
 25 | #ifndef RPGraph_hpp
 26 | #define RPGraph_hpp
 27 | #include <vector>
 28 | #include <string>
 29 | #include <unordered_map>
 30 | 
 31 | namespace RPGraph
 32 | {
 33 |     // Type to represent node IDs.
 34 |     // NOTE: we limit to 4,294,967,296 nodes through uint32_t.
 35 |     typedef uint32_t nid_t;
 36 | 
 37 |     // Type to represent edge IDs.
 38 |     // NOTE: uint32_t limits density to 50% for directed graphs.
 39 |     typedef uint32_t eid_t;
 40 | 
 41 |     // Virtual base class to derive different Graph types from.
 42 |     class Graph
 43 |     {
 44 |         public:
 45 |             virtual nid_t num_nodes() = 0;
 46 |             virtual nid_t num_edges() = 0;
 47 |             virtual nid_t degree(nid_t nid) = 0;
 48 |             virtual nid_t in_degree(nid_t nid) = 0;
 49 |             virtual nid_t out_degree(nid_t nid) = 0;
 50 |             virtual std::vector<nid_t> neighbors_with_geq_id(nid_t nid) = 0;
 51 | 
 52 |     };
 53 | 
 54 |     // Very basic (adjacency list) representation of an undirected graph.
 55 |     class UGraph : public Graph
 56 |     {
 57 |     private:
 58 |         nid_t node_count, edge_count;
 59 |         std::unordered_map<nid_t, nid_t> degrees;
 60 |         std::unordered_map<nid_t, std::vector<nid_t>> adjacency_list;
 61 | 
 62 |         bool has_node(nid_t nid);
 63 |         bool has_edge(nid_t s, nid_t t);
 64 |         void add_node(nid_t nid);
 65 |         void add_edge(nid_t s, nid_t t);
 66 | 
 67 |     public:
 68 |         // Construct UGraph from edgelist. IDs in edgelist are mapped to
 69 |         // [0, 1, ..., num_nodes-1]. Removes any self-edges.
 70 |         UGraph(std::string edgelist_path);
 71 |         std::unordered_map<nid_t, nid_t> node_map; // el id -> UGraph id
 72 |         std::unordered_map<nid_t, nid_t> node_map_r; // UGraph id -> el id
 73 | 
 74 |         virtual nid_t num_nodes() override;
 75 |         virtual nid_t num_edges() override;
 76 |         virtual nid_t degree(nid_t nid) override;
 77 |         virtual nid_t in_degree(nid_t nid) override;
 78 |         virtual nid_t out_degree(nid_t nid) override;
 79 | 
 80 |         std::vector<nid_t> neighbors_with_geq_id(nid_t nid) override;
 81 |     };
 82 | 
 83 |     // Compressed sparserow (CSR) for undirected graphs.
 84 |     class CSRUGraph : public Graph
 85 |     {
 86 |     private:
 87 |         nid_t *edges;   // All edgelists, concatenated.
 88 |         nid_t *offsets; // For each node, into edges.
 89 |         nid_t node_count, edge_count;
 90 |         nid_t first_free_id, edges_seen;
 91 | 
 92 |     public:
 93 |         std::unordered_map<nid_t, nid_t> nid_to_offset;
 94 |         nid_t *offset_to_nid;
 95 | 
 96 |         CSRUGraph(nid_t num_nodes, nid_t num_edges);
 97 |         ~CSRUGraph();
 98 | 
 99 |         /// Inserts node_id and its edges. Once inserted, edges
100 |         /// can't be altered for this node.
101 |         void insert_node(nid_t node_id, std::vector<nid_t> nbr_ids);
102 |         void fix_edge_ids(); // this should go...
103 | 
104 |         virtual nid_t num_nodes() override;
105 |         virtual nid_t num_edges() override;
106 |         virtual nid_t degree(nid_t nid) override;
107 |         virtual nid_t in_degree(nid_t nid) override;
108 |         virtual nid_t out_degree(nid_t nid) override;
109 | 
110 |         nid_t nbr_id_for_node(nid_t nid, nid_t nbr_no);
111 |     };
112 | }
113 | 
114 | #endif /* Graph_h */
115 | 


--------------------------------------------------------------------------------
/src/RPGraphLayout.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  RPGraphLayout.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 |  ==============================================================================
 22 | */
 23 | 
 24 | 
 25 | #include "RPGraphLayout.hpp"
 26 | #include "../lib/pngwriter/src/pngwriter.h"
 27 | 
 28 | #include <fstream>
 29 | #include <cmath>
 30 | #include <limits>
 31 | 
 32 | namespace RPGraph
 33 | {
 34 |     GraphLayout::GraphLayout(UGraph &graph, float width, float height)
 35 |         : graph(graph), width(width), height(height)
 36 |     {
 37 |         coordinates = (Coordinate *) malloc(graph.num_nodes() * sizeof(Coordinate));
 38 |     }
 39 | 
 40 |     GraphLayout::~GraphLayout()
 41 |     {
 42 |         free(coordinates);
 43 |     }
 44 | 
 45 |     void GraphLayout::randomizePositions()
 46 |     {
 47 |         for (nid_t i = 0; i <  graph.num_nodes(); ++i)
 48 |         {
 49 |             setX(i, get_random(-width/2.0, width/2.0));
 50 |             setY(i, get_random(-height/2.0, height/2.0));
 51 |         }
 52 |     }
 53 | 
 54 |     float GraphLayout::getX(nid_t node_id)
 55 |     {
 56 |         return coordinates[node_id].x;
 57 |     }
 58 | 
 59 |     float GraphLayout::getY(nid_t node_id)
 60 |     {
 61 |         return coordinates[node_id].y;
 62 |     }
 63 | 
 64 |     float GraphLayout::minX()
 65 |     {
 66 |         float minX = std::numeric_limits<float>::max();
 67 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
 68 |             if (getX(n) < minX) minX = getX(n);
 69 |         return minX;
 70 |     }
 71 | 
 72 |     float GraphLayout::maxX()
 73 |     {
 74 |         float maxX = std::numeric_limits<float>::min();
 75 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
 76 |             if (getX(n) > maxX) maxX = getX(n);
 77 |         return maxX;
 78 |     }
 79 | 
 80 |     float GraphLayout::minY()
 81 |     {
 82 |         float minY = std::numeric_limits<float>::max();
 83 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
 84 |             if (getY(n) < minY) minY = getY(n);
 85 |         return minY;
 86 |     }
 87 | 
 88 |     float GraphLayout::maxY()
 89 |     {
 90 |         float maxY = std::numeric_limits<float>::min();
 91 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
 92 |             if (getY(n) > maxY) maxY = getY(n);
 93 |         return maxY;
 94 |     }
 95 | 
 96 |     float GraphLayout::getXRange()
 97 |     {
 98 |         return maxX()- minX();
 99 |     }
100 | 
101 |     float GraphLayout::getYRange()
102 |     {
103 |         return maxY() - minY();
104 |     }
105 | 
106 |     float GraphLayout::getSpan()
107 |     {
108 |         return ceil(fmaxf(getXRange(), getYRange()));
109 |     }
110 | 
111 |     float GraphLayout::getDistance(nid_t n1, nid_t n2)
112 |     {
113 |         const float dx = getX(n1)-getX(n2);
114 |         const float dy = getY(n1)-getY(n2);
115 |         return std::sqrt(dx*dx + dy*dy);
116 |     }
117 | 
118 |     Real2DVector GraphLayout::getDistanceVector(nid_t n1, nid_t n2)
119 |     {
120 |         return Real2DVector(getX(n2) - getX(n1), getY(n2) - getY(n1));
121 |     }
122 | 
123 |     Real2DVector GraphLayout::getNormalizedDistanceVector(nid_t n1, nid_t n2)
124 |     {
125 |         const float x1 = getX(n1);
126 |         const float x2 = getX(n2);
127 |         const float y1 = getY(n1);
128 |         const float y2 = getY(n2);
129 |         const float dx = x2 - x1;
130 |         const float dy = y2 - y1;
131 |         const float len = std::sqrt(dx*dx + dy*dy);
132 | 
133 |         return Real2DVector(dx / len, dy / len);
134 |     }
135 | 
136 |     Coordinate GraphLayout::getCoordinate(nid_t node_id)
137 |     {
138 |         return coordinates[node_id];
139 |     }
140 | 
141 |     Coordinate GraphLayout::getCenter()
142 |     {
143 |         float x = minX() + getXRange()/2.0;
144 |         float y = minY() + getYRange()/2.0;
145 |         return Coordinate(x, y);
146 |     }
147 | 
148 |     void GraphLayout::setX(nid_t node_id, float x_value)
149 |     {
150 |         coordinates[node_id].x = x_value;
151 |     }
152 | 
153 |     void GraphLayout::setY(nid_t node_id, float y_value)
154 |     {
155 |         coordinates[node_id].y = y_value;
156 |     }
157 | 
158 |     void GraphLayout::moveNode(nid_t n, RPGraph::Real2DVector v)
159 |     {
160 |         setX(n, getX(n) + v.x);
161 |         setY(n, getY(n) + v.y);
162 |     }
163 | 
164 |     void GraphLayout::setCoordinates(nid_t node_id, Coordinate c)
165 |     {
166 |         setX(node_id, c.x);
167 |         setY(node_id, c.y);
168 |     }
169 | 
170 |     void GraphLayout::writeToPNG(const int image_w, const int image_h,
171 |                                  std::string path)
172 |     {
173 |         const float xRange = getXRange();
174 |         const float yRange = getYRange();
175 |         const RPGraph::Coordinate center = getCenter();
176 |         const float xCenter = center.x;
177 |         const float yCenter = center.y;
178 |         const float minX = xCenter - xRange/2.0;
179 |         const float minY = yCenter - yRange/2.0;
180 |         const float xScale = image_w/xRange;
181 |         const float yScale = image_h/yRange;
182 | 
183 |         // Here we need to do some guessing as to what the optimal
184 |         // opacity of nodes and edges might be, given network size.
185 |         const float node_opacity = 10000.0  / graph.num_nodes();
186 |         const float edge_opacity = 100000.0 / graph.num_edges();
187 | 
188 |         // Write to file.
189 |         pngwriter layout_png(image_w, image_h, 0, path.c_str());
190 |         layout_png.invert(); // set bg. to white
191 | 
192 |         for (nid_t n1 = 0; n1 < graph.num_nodes(); ++n1)
193 |         {
194 |             // Plot node,
195 |             layout_png.filledcircle_blend((getX(n1) - minX)*xScale,
196 |                                           (getY(n1) - minY)*yScale,
197 |                                           3, node_opacity, 0, 0, 0);
198 |             for (nid_t n2 : graph.neighbors_with_geq_id(n1)) {
199 |                 // ... and edge.
200 |                 layout_png.line_blend((getX(n1) - minX)*xScale, (getY(n1) - minY)*yScale,
201 |                                       (getX(n2) - minX)*xScale, (getY(n2) - minY)*yScale,
202 |                                       edge_opacity, 0, 0, 0);
203 |             }
204 |         }
205 |         // Write it to disk.
206 |         layout_png.write_png();
207 |     }
208 | 
209 |     void GraphLayout::writeToCSV(std::string path)
210 |     {
211 |         if (is_file_exists(path.c_str()))
212 |         {
213 |             printf("Error: File exists at %s\n", path.c_str());
214 |             exit(EXIT_FAILURE);
215 |         }
216 | 
217 |         std::ofstream out_file(path);
218 | 
219 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
220 |         {
221 |             nid_t id = graph.node_map_r[n]; // id as found in edgelist
222 |             out_file << id << "," << getX(n) << "," << getY(n) << "\n";
223 |         }
224 | 
225 |         out_file.close();
226 |     }
227 | 
228 |     void GraphLayout::writeToBin(std::string path)
229 |     {
230 |         if (is_file_exists(path.c_str()))
231 |         {
232 |             printf("Error: File exists at %s\n", path.c_str());
233 |             exit(EXIT_FAILURE);
234 |         }
235 | 
236 |         std::ofstream out_file(path, std::ofstream::binary);
237 | 
238 |         for (nid_t n = 0; n < graph.num_nodes(); ++n)
239 |         {
240 |             nid_t id = graph.node_map_r[n]; // id as found in edgelist
241 |             float x = getX(n);
242 |             float y = getY(n);
243 | 
244 |             out_file.write(reinterpret_cast<const char*>(&id), sizeof(id));
245 |             out_file.write(reinterpret_cast<const char*>(&x), sizeof(x));
246 |             out_file.write(reinterpret_cast<const char*>(&y), sizeof(y));
247 |         }
248 | 
249 |         out_file.close();
250 |     }
251 | 
252 | }
253 | 


--------------------------------------------------------------------------------
/src/RPGraphLayout.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPGraphLayout.cpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPGraphLayout_hpp
25 | #define RPGraphLayout_hpp
26 | 
27 | #include "RPGraph.hpp"
28 | #include "RPCommon.hpp"
29 | #include <string>
30 | 
31 | namespace RPGraph
32 | {
33 |     class GraphLayout
34 |     {
35 |     private:
36 |         Coordinate *coordinates;
37 | 
38 |     protected:
39 |         float width, height;
40 |         float minX(), minY(), maxX(), maxY();
41 | 
42 |     public:
43 |         GraphLayout(RPGraph::UGraph &graph,
44 |                     float width = 10000, float height = 10000);
45 |         ~GraphLayout();
46 | 
47 |         UGraph &graph; // to lay-out
48 | 
49 |         // randomize the layout position of all nodes.
50 |         void randomizePositions();
51 | 
52 |         float getX(nid_t node_id), getY(nid_t node_id);
53 |         float getXRange(), getYRange(), getSpan();
54 |         float getDistance(nid_t n1, nid_t n2);
55 |         Real2DVector getDistanceVector(nid_t n1, nid_t n2);
56 |         Real2DVector getNormalizedDistanceVector(nid_t n1, nid_t n2);
57 |         Coordinate getCoordinate(nid_t node_id);
58 |         Coordinate getCenter();
59 | 
60 | 
61 |         void setX(nid_t node_id, float x_value), setY(nid_t node_id, float y_value);
62 |         void moveNode(nid_t, Real2DVector v);
63 |         void setCoordinates(nid_t node_id, Coordinate c);
64 |         void writeToPNG(const int image_w, const int image_h, std::string path);
65 |         void writeToCSV(std::string path);
66 |         void writeToBin(std::string path);
67 |     };
68 | }
69 | 
70 | #endif /* RPGraphLayout_hpp */
71 | 


--------------------------------------------------------------------------------
/src/RPLayoutAlgorithm.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPLayoutAlgorithm.cpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #include "RPLayoutAlgorithm.hpp"
25 | 
26 | namespace RPGraph
27 | {
28 |     LayoutAlgorithm::LayoutAlgorithm(GraphLayout &layout): layout(layout){}
29 |     LayoutAlgorithm::~LayoutAlgorithm(){}
30 | }
31 | 


--------------------------------------------------------------------------------
/src/RPLayoutAlgorithm.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  ==============================================================================
 3 | 
 4 |  RPLayoutAlgorithm.hpp
 5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
 6 | 
 7 |  This file is part of graph_viewer.
 8 | 
 9 |  graph_viewer is free software: you can redistribute it and/or modify
10 |  it under the terms of version 3 of the GNU Affero General Public License as
11 |  published by the Free Software Foundation.
12 | 
13 |  graph_viewer is distributed in the hope that it will be useful,
14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 |  GNU Affero General Public License for more details.
17 | 
18 |  You should have received a copy of the GNU Affero General Public License
19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
20 | 
21 |  ==============================================================================
22 | */
23 | 
24 | #ifndef RPLayoutAlgorithm_hpp
25 | #define RPLayoutAlgorithm_hpp
26 | 
27 | #include "RPGraphLayout.hpp"
28 | 
29 | namespace RPGraph
30 | {
31 |     class LayoutAlgorithm
32 |     {
33 |     public:
34 |         LayoutAlgorithm(GraphLayout &layout);
35 |         ~LayoutAlgorithm();
36 |         GraphLayout &layout;
37 | 
38 |         virtual void sync_layout() = 0; // write current layout to `layout'.
39 |     };
40 | }
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/src/graph_viewer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  ==============================================================================
  3 | 
  4 |  graph_viewer.cpp
  5 |  Copyright © 2016, 2017, 2018  G. Brinkmann
  6 | 
  7 |  This file is part of graph_viewer.
  8 | 
  9 |  graph_viewer is free software: you can redistribute it and/or modify
 10 |  it under the terms of version 3 of the GNU Affero General Public License as
 11 |  published by the Free Software Foundation.
 12 | 
 13 |  graph_viewer is distributed in the hope that it will be useful,
 14 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 15 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 16 |  GNU Affero General Public License for more details.
 17 | 
 18 |  You should have received a copy of the GNU Affero General Public License
 19 |  along with graph_viewer.  If not, see <https://www.gnu.org/licenses/>.
 20 | 
 21 | -------------------------------------------------------------------------------
 22 | 
 23 |  This code was written as part of a research project at the Leiden Institute of
 24 |  Advanced Computer Science (www.liacs.nl). For other resources related to this
 25 |  project, see https://liacs.leidenuniv.nl/~takesfw/GPUNetworkVis/.
 26 | 
 27 |  ==============================================================================
 28 | */
 29 | 
 30 | 
 31 | #include <stdio.h>
 32 | #include <stdlib.h>
 33 | #include <string>
 34 | #include <math.h>
 35 | 
 36 | #include "RPCommon.hpp"
 37 | #include "RPGraph.hpp"
 38 | #include "RPGraphLayout.hpp"
 39 | #include "RPCPUForceAtlas2.hpp"
 40 | 
 41 | #ifdef __NVCC__
 42 | #include <cuda_runtime_api.h>
 43 | #include "RPGPUForceAtlas2.hpp"
 44 | #endif
 45 | 
 46 | int main(int argc, const char **argv)
 47 | {
 48 |     // For reproducibility.
 49 |     srandom(1234);
 50 | 
 51 |     // Parse commandline arguments
 52 |     if (argc < 10 or (std::string(argv[10]) == "png" and argc < 12))
 53 |     {
 54 |         fprintf(stderr, "Usage: graph_viewer gpu|cpu max_iterations num_snaps sg|wg scale gravity exact|approximate edgelist_path out_path [png image_w image_h|csv|bin]\n");
 55 |         exit(EXIT_FAILURE);
 56 |     }
 57 | 
 58 |     const bool cuda_requested = std::string(argv[1]) == "gpu" or std::string(argv[1]) == "cuda";
 59 |     const int max_iterations = std::stoi(argv[2]);
 60 |     const int num_screenshots = std::stoi(argv[3]);
 61 |     const bool strong_gravity = std::string(argv[4]) == "sg";
 62 |     const float scale = std::stof(argv[5]);
 63 |     const float gravity = std::stof(argv[6]);
 64 |     const bool approximate = std::string(argv[7]) == "approximate";
 65 |     std::string edgelist_path = argv[8];
 66 |     std::string out_path = argv[9];
 67 |     std::string out_format = "png";
 68 |     int image_w = 1250;
 69 |     int image_h = 1250;
 70 | 
 71 |     for (int arg_no = 10; arg_no < argc; arg_no++)
 72 |     {
 73 |         if(std::string(argv[arg_no]) == "png")
 74 |         {
 75 |             out_format = "png";
 76 |             image_w = std::stoi(argv[arg_no+1]);
 77 |             image_h = std::stoi(argv[arg_no+2]);
 78 |             arg_no += 2;
 79 |         }
 80 | 
 81 |         else if(std::string(argv[arg_no]) == "csv")
 82 |         {
 83 |             out_format = "csv";
 84 |         }
 85 | 
 86 |         else if(std::string(argv[arg_no]) == "bin")
 87 |         {
 88 |             out_format = "bin";
 89 |         }
 90 |     }
 91 | 
 92 | 
 93 |     if(cuda_requested and not approximate)
 94 |     {
 95 |         fprintf(stderr, "error: The CUDA implementation (currently) requires Barnes-Hut approximation.\n");
 96 |         exit(EXIT_FAILURE);
 97 |     }
 98 | 
 99 |     // Check in_path and out_path
100 |     if (!is_file_exists(edgelist_path))
101 |     {
102 |         fprintf(stderr, "error: No edgelist at %s\n", edgelist_path.c_str());
103 |         exit(EXIT_FAILURE);
104 |     }
105 |     if (!is_file_exists(out_path))
106 |     {
107 |         fprintf(stderr, "error: No output folder at %s\n", out_path.c_str());
108 |         exit(EXIT_FAILURE);
109 |     }
110 | 
111 |     // If not compiled with cuda support, check if cuda is requested.
112 |     #ifndef __NVCC__
113 |     if(cuda_requested)
114 |     {
115 |         fprintf(stderr, "error: CUDA was requested, but not compiled for.\n");
116 |         exit(EXIT_FAILURE);
117 |     }
118 |     #endif
119 | 
120 |     // Load graph.
121 |     printf("Loading edgelist at '%s'...", edgelist_path.c_str());
122 |     fflush(stdout);
123 |     RPGraph::UGraph graph = RPGraph::UGraph(edgelist_path);
124 |     printf("done.\n");
125 |     printf("    fetched %d nodes and %d edges.\n", graph.num_nodes(), graph.num_edges());
126 | 
127 |     // Create the GraphLayout and ForceAtlas2 objects.
128 |     RPGraph::GraphLayout layout(graph);
129 |     RPGraph::ForceAtlas2 *fa2;
130 |     #ifdef __NVCC__
131 |     if(cuda_requested)
132 |         fa2 = new RPGraph::CUDAForceAtlas2(layout, approximate,
133 |                                            strong_gravity, gravity, scale);
134 |     else
135 |     #endif
136 |         fa2 = new RPGraph::CPUForceAtlas2(layout, approximate,
137 |                                           strong_gravity, gravity, scale);
138 | 
139 |     printf("Started Layout algorithm...\n");
140 |     const int snap_period = ceil((float)max_iterations/num_screenshots);
141 |     const int print_period = ceil((float)max_iterations*0.05);
142 | 
143 |     for (int iteration = 1; iteration <= max_iterations; ++iteration)
144 |     {
145 |         fa2->doStep();
146 |         // If we need to, write the result to a png
147 |         if (num_screenshots > 0 && (iteration % snap_period == 0 || iteration == max_iterations))
148 |         {
149 | 	    // Determine output filename
150 | 	    std::string edgelist_basename = basename(edgelist_path);
151 | 	    std::string out_filename = edgelist_basename + "_" + std::to_string(iteration) + "." + out_format;
152 |             std::string out_filepath = out_path + "/" + out_filename;
153 |             printf("Starting iteration %d (%.2f%%), writing %s...", iteration, 100*(float)iteration/max_iterations, out_format.c_str());
154 |             fflush(stdout);
155 |             fa2->sync_layout();
156 | 
157 |             if (out_format == "png")
158 |                 layout.writeToPNG(image_w, image_h, out_filepath);
159 |             else if (out_format == "csv")
160 |                 layout.writeToCSV(out_filepath);
161 |             else if (out_format == "bin")
162 |                 layout.writeToBin(out_filepath);
163 | 
164 |             printf("done.\n");
165 |         }
166 | 
167 |         // Else we print (if we need to)
168 |         else if (iteration % print_period == 0)
169 |         {
170 |             printf("Starting iteration %d (%.2f%%).\n", iteration, 100*(float)iteration/max_iterations);
171 |         }
172 |     }
173 | 
174 |     delete fa2;
175 |     exit(EXIT_SUCCESS);
176 | }
177 | 


--------------------------------------------------------------------------------