├── .gitignore
├── LICENSE
├── README.md
├── contributor_network
├── __init__.py
├── cli.py
├── graph.py
├── template
│ ├── index.html
│ └── static
│ │ └── js
│ │ ├── jquery.min.js
│ │ └── vis-network.min.js
└── utils.py
├── data
├── contributor-network-graph.png
└── dependencies.csv
├── parse_requirements.py
├── requirements-development.txt
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | *.db
3 | *.egg-info/
4 | *.pyc
5 | *.sqlite
6 | *.sw?
7 | *~
8 | .*.sw?
9 | .DS_Store
10 | .activate
11 | .coverage
12 | .directory
13 | .env
14 | .idea/*
15 | .ipynb_checkpoints/
16 | .pytest_cache/
17 | .tox
18 | MANIFEST
19 | build/*
20 | data/
21 | dist/*
22 | docs-build/
23 | docs/man/
24 | docs/reference/
25 | reg_settings.py
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Contributor Network Graph
2 |
3 | Create a graph visualization from a code repository.
4 |
5 | 
6 |
7 |
8 | ## Installing
9 |
10 | ```
11 | pip install contributor-network
12 | ```
13 |
14 | ## Running
15 |
16 | Given a file called `dependencies.csv`, like:
17 |
18 | ```csv
19 | name,repository_type,repository_url,depended_by
20 | contributor-network,git,https://github.com/PythonicCafe/contributor-network,
21 | lxml,git,https://github.com/lxml/lxml,contributor-network
22 | tqdm,git,https://github.com/tqdm/tqdm,contributor-network
23 | ```
24 |
25 | Execute:
26 |
27 | ```shell
28 | python -m contributor_network.cli \
29 | --temp-dir=/tmp/repositories/ \
30 | dependencies.csv \
31 | network/
32 | ```
33 |
--------------------------------------------------------------------------------
/contributor_network/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = (0, 1, 1)
2 |
--------------------------------------------------------------------------------
/contributor_network/cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import shutil
3 | import tempfile
4 | from pathlib import Path
5 |
6 | from tqdm import tqdm
7 |
8 | from .graph import Graph, Package, Contributor
9 | from .utils import read_csv
10 |
11 |
12 | def main():
13 | parser = argparse.ArgumentParser()
14 | parser.add_argument("input_filename")
15 | parser.add_argument("output_path")
16 | parser.add_argument("--template-dir", default=str(Path(__file__).parent / "template"))
17 | parser.add_argument("--temp-dir", default=str(Path(tempfile.gettempdir()) / "repositories"))
18 | args = parser.parse_args()
19 |
20 | input_filename = args.input_filename
21 | # TODO: check if input_filename has needed columns
22 |
23 | output_path = Path(args.output_path)
24 | output_filename = output_path / "data" / "network.json"
25 | image_path = output_path / "static" / "img"
26 |
27 | templates_path = Path(args.template_dir)
28 | shutil.copytree(templates_path, output_path)
29 |
30 | temp_path = Path(args.temp_dir)
31 |
32 | graph = Graph()
33 |
34 | # First, add nodes and save contributors
35 | iterator = tqdm(read_csv(input_filename))
36 | for package_data in iterator:
37 | iterator.desc = f"Extracting repository data: {package_data['name']}"
38 | iterator.refresh()
39 | package_data.pop("depended_by")
40 | package_data["repository_path"] = temp_path / package_data["name"]
41 | package = Package(**package_data)
42 | graph.add_node(package.serialize())
43 |
44 | filename = output_path / "data" / f"{package.id}.json"
45 | if not filename.exists():
46 | package.save_contributors(filename, output_path, image_path)
47 |
48 | # Then, add edges
49 | iterator = tqdm(read_csv(input_filename))
50 | for package_data in iterator:
51 | iterator.desc = f"Adding dependencies: {package_data['name']}"
52 | iterator.refresh()
53 | depended_by = package_data.pop("depended_by")
54 | if depended_by:
55 | graph.add_edge(
56 | from_id=graph.get_node_by_name(depended_by)["id"],
57 | to_id=graph.get_node_by_name(package_data["name"])["id"],
58 | label="depends on",
59 | width=10,
60 | color="blue",
61 | )
62 |
63 | print("Exporting network JSON...", end="", flush=True)
64 | graph.save(output_filename)
65 | print()
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/contributor_network/graph.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import json
3 | import shlex
4 | import subprocess
5 | import tempfile
6 | from collections import Counter
7 | from dataclasses import dataclass
8 | from email.utils import getaddresses
9 | from pathlib import Path
10 | from urllib.request import urlopen
11 |
12 | from .utils import transform
13 |
14 |
15 | BASE_PATH = Path(__file__).parent
16 | DATA_PATH = BASE_PATH / "data"
17 | IMAGE_PATH = DATA_PATH / "img"
18 |
19 |
20 | def list_contributors(repository_type, repository_url, path):
21 | if repository_type not in ("git", "hg"):
22 | raise NotImplementedError(f"Unknown repository type: {repr(repository_type)}")
23 |
24 | save_path = (Path(path) / Path(repository_url).name).absolute()
25 | if not save_path.parent.exists():
26 | save_path.parent.mkdir(parents=True)
27 |
28 | if not save_path.exists(): # clone repository
29 | command = f'{repository_type} clone "{repository_url}" "{save_path}"'
30 | subprocess.check_output(shlex.split(command), stderr=subprocess.PIPE)
31 |
32 | lines = subprocess.check_output(
33 | shlex.split(f"{repository_type} log"), cwd=save_path, encoding="utf-8",
34 | ).splitlines()
35 | c = Counter()
36 | if repository_type == "git":
37 | author_key = "Author:"
38 | elif repository_type == "hg":
39 | author_key = "user:"
40 |
41 | names = {}
42 | for line in lines:
43 | if not line.startswith(author_key) or "@" not in line:
44 | continue
45 | name, email = getaddresses([line[len(author_key) :].strip()])[0]
46 | email = email.strip().lower()
47 | names[email] = name
48 | c[email] += 1
49 | for email, commits in c.most_common():
50 | yield (names.get(email, email), email, commits)
51 |
52 |
53 | @dataclass
54 | class Package:
55 | name: str
56 | repository_type: str
57 | repository_url: str
58 | repository_path: str
59 |
60 | @property
61 | def id(self):
62 | return f"package:{self.name}"
63 |
64 | def serialize(self):
65 | return {
66 | "group": "package",
67 | "id": self.id,
68 | "label": self.name,
69 | "name": self.name,
70 | "shape": "circle",
71 | }
72 |
73 | def contributors(self):
74 | if not hasattr(self, "_contributors"):
75 | iterator = list_contributors(
76 | self.repository_type, self.repository_url, self.repository_path
77 | )
78 | self._contributors = [
79 | Contributor(name=name, email=email, commits=commits, package=self)
80 | for name, email, commits in iterator
81 | ]
82 | yield from self._contributors
83 |
84 | @property
85 | def min_commits(self):
86 | if not hasattr(self, "_min_commits"):
87 | self._min_commits = min(contributor.commits for contributor in self.contributors())
88 | return self._min_commits
89 |
90 | @property
91 | def max_commits(self):
92 | if not hasattr(self, "_max_commits"):
93 | self._max_commits = max(contributor.commits for contributor in self.contributors())
94 | return self._max_commits
95 |
96 | @property
97 | def commits(self):
98 | if not hasattr(self, "_commits"):
99 | self._commits = sum(contributor.commits for contributor in self.contributors())
100 | return self._commits
101 |
102 | def save_contributors(self, filename, base_path, image_path):
103 | image_path = Path(image_path)
104 | graph = Graph()
105 | for contributor in self.contributors():
106 | node = contributor.serialize()
107 | if "image" in node and (
108 | node.get("image", "").startswith("http://") or
109 | node.get("image", "").startswith("https://")
110 | ):
111 | image_filename = image_path / str(node["id"])
112 | if not image_filename.exists():
113 | if not image_filename.parent.exists():
114 | image_filename.parent.mkdir(parents=True)
115 | response = urlopen(node["image"])
116 | node["image"] = str(image_filename.relative_to(base_path))
117 | with open(image_filename, mode="wb") as fobj:
118 | fobj.write(response.read())
119 | graph.add_node(node)
120 | graph.add_edge(
121 | from_id=contributor.id,
122 | to_id=contributor.package.id,
123 | label="contributed to",
124 | width=contributor.commit_weight,
125 | )
126 |
127 | graph.save(filename)
128 |
129 |
130 | @dataclass
131 | class Contributor:
132 | name: str
133 | email: str
134 | commits: int
135 | package: Package
136 |
137 | @property
138 | def email_hash(self):
139 | return hashlib.md5(self.email.strip().lower().encode("utf-8")).hexdigest()
140 |
141 | @property
142 | def id(self):
143 | return f"person:{self.email_hash}"
144 |
145 | @property
146 | def avatar_url(self):
147 | return f"https://www.gravatar.com/avatar/{self.email_hash}"
148 |
149 | @property
150 | def commit_weight(self):
151 | return transform(self.commits, self.package.min_commits, self.package.max_commits, 1, 10)
152 |
153 | def serialize(self):
154 | return {
155 | "group": "person",
156 | "id": self.id,
157 | "image": self.avatar_url,
158 | "label": self.name,
159 | "shape": "circularImage",
160 | }
161 |
162 |
163 | class Graph:
164 |
165 | def __init__(self):
166 | self.__nodes_by_id = {}
167 | self.__nodes_by_name = {}
168 | self.__edges = []
169 |
170 | def add_node(self, node):
171 | self.__nodes_by_id[node["id"]] = node
172 | if "name" in node:
173 | self.__nodes_by_name[node["name"]] = node
174 |
175 | def get_node_by_name(self, name):
176 | return self.__nodes_by_name[name]
177 |
178 | def add_edge(self, from_id, to_id, label, width=1, color="black"):
179 | self.__edges.append(
180 | {
181 | "from": from_id,
182 | "label": label,
183 | "to": to_id,
184 | "width": width,
185 | "color": color,
186 | }
187 | )
188 |
189 | def save(self, filename):
190 | filename = Path(filename)
191 | if not filename.parent.exists():
192 | filename.parent.mkdir(parents=True)
193 | with open(filename, mode="w") as fobj:
194 | json.dump(
195 | {
196 | "nodes": list(self.__nodes_by_id.values()),
197 | "edges": self.__edges,
198 | },
199 | fobj,
200 | )
201 |
--------------------------------------------------------------------------------
/contributor_network/template/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |