├── .coveragerc
├── .gitignore
├── .hound.yml
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── PULL_REQUEST_TEMPLATE.md
├── README.md
├── logo.png
├── requirements.txt
├── src
├── githubcity
│ ├── __init__.py
│ ├── ghcity.py
│ ├── ghregion.py
│ └── ghuser.py
├── manualtest
│ ├── config.json
│ └── template
├── run.py
├── runUser.py
├── setup.cfg
└── setup.py
└── tests
├── __init__.py
├── ghuserTester.py
└── resources
├── user.html
├── userk.html
└── userprivate.html
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | omit =
3 | *lib*
4 | *test*
5 | */python?.?/*
6 | */site-packages/nose/*
7 | *__init__*
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | .pypirc
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 |
56 | # Sphinx documentation
57 | docs/_build/
58 |
59 | # PyBuilder
60 | target/
61 |
62 | MANIFEST
63 |
--------------------------------------------------------------------------------
/.hound.yml:
--------------------------------------------------------------------------------
1 | flake8:
2 | enabled: true
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.4"
4 | - "3.6"
5 | install:
6 | - pip install -r requirements.txt
7 | - pip install coverage codecov
8 | script:
9 | - coverage run tests/ghuserTester.py
10 | after_success:
11 | - codecov --token=$COVERAGE
12 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Code of Conduct
2 |
3 | As contributors and maintainers of this project, and in the interest of
4 | fostering an open and welcoming community, we pledge to respect all people who
5 | contribute through reporting issues, posting feature requests, updating
6 | documentation, submitting pull requests or patches, and other activities.
7 |
8 | We are committed to making participation in this project a harassment-free
9 | experience for everyone, regardless of level of experience, gender, gender
10 | identity and expression, sexual orientation, disability, personal appearance,
11 | body size, race, ethnicity, age, religion, or nationality.
12 |
13 | Examples of unacceptable behavior by participants include:
14 |
15 | * The use of sexualized language or imagery
16 | * Personal attacks
17 | * Trolling or insulting/derogatory comments
18 | * Public or private harassment
19 | * Publishing other's private information, such as physical or electronic
20 | addresses, without explicit permission
21 | * Other unethical or unprofessional conduct
22 |
23 | Project maintainers have the right and responsibility to remove, edit, or
24 | reject comments, commits, code, wiki edits, issues, and other contributions
25 | that are not aligned to this Code of Conduct, or to ban temporarily or
26 | permanently any contributor for other behaviors that they deem inappropriate,
27 | threatening, offensive, or harmful.
28 |
29 | By adopting this Code of Conduct, project maintainers commit themselves to
30 | fairly and consistently applying these principles to every aspect of managing
31 | this project. Project maintainers who do not follow or enforce the Code of
32 | Conduct may be permanently removed from the project team.
33 |
34 | This Code of Conduct applies both within project spaces and in public spaces
35 | when an individual is representing the project or its community.
36 |
37 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
38 | version 1.3.0, available at
39 | [http://contributor-covenant.org/version/1/3/0/][version]
40 |
41 | [homepage]: http://contributor-covenant.org
42 | [version]: http://contributor-covenant.org/version/1/3/0/
43 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | :+1::tada: First off, thanks for taking the time to contribute! :tada::+1:
4 |
5 | When contributing to this repository, please first discuss the change you wish to make via issue,
6 | email, or any other method with the owners of this repository before making a change.
7 |
8 | Please note we have a code of conduct, please follow it in all your interactions with the project.
9 |
10 |
11 | ## How Can I Contribute?
12 |
13 | ### Reporting Bugs
14 |
15 | This section guides you through submitting a bug report for GitHubCity. Following these guidelines helps maintainers and the community understand your report :pencil:, reproduce the behavior :computer: :computer:, and find related reports :mag_right:.
16 |
17 | Before creating bug reports, please check [this list](#before-submitting-a-bug-report) as you might find out that you don't need to create one. When you are creating a bug report, please [include as many details as possible](#how-do-i-submit-a-good-bug-report). Fill out [the required template](ISSUE_TEMPLATE.md), the information it asks for helps us resolve issues faster.
18 |
19 | > **Note:** If you find a **Closed** issue that seems like it is the same thing that you're experiencing, open a new issue and include a link to the original issue in the body of your new one.
20 |
21 | ### Suggesting Enhancements
22 |
23 | This section guides you through submitting an enhancement suggestion for GitHubCity, including completely new features and minor improvements to existing functionality. Following these guidelines helps maintainers and the community understand your suggestion :pencil: and find related suggestions :mag_right:.
24 |
25 | Before creating enhancement suggestions, please check [this list](#before-submitting-an-enhancement-suggestion) as you might find out that you don't need to create one. When you are creating an enhancement suggestion, please [include as many details as possible](#how-do-i-submit-a-good-enhancement-suggestion). Fill in [the template](ISSUE_TEMPLATE.md), including the steps that you imagine you would take if the feature you're requesting existed.
26 |
27 | ### Your First Code Contribution
28 |
29 | Unsure where to begin contributing to GitHubCity? You can start by looking through these `beginner` and `help-wanted` issues:
30 |
31 | * [Beginner issues][beginner] - issues which should only require a few lines of code, and a test or two.
32 | * [Help wanted issues][help-wanted] - issues which should be a bit more involved than `beginner` issues.
33 |
34 | Both issue lists are sorted by total number of comments. While not perfect, number of comments is a reasonable proxy for impact a given change will have.
35 |
36 | ### Pull Requests
37 |
38 | * Fill in [the required template](PULL_REQUEST_TEMPLATE.md)
39 | * Do not include issue numbers in the PR title
40 | * Include screenshots and animated GIFs in your pull request whenever possible.
41 | * Document new code.
42 | * End all files with a newline
43 |
44 | ## Styleguides
45 |
46 | ### Git Commit Messages
47 |
48 | * Use the present tense ("Add feature" not "Added feature")
49 | * Use the imperative mood ("Move cursor to..." not "Moves cursor to...")
50 | * Limit the first line to 72 characters or less
51 | * Reference issues and pull requests liberally after the first line
52 | * When only changing documentation, include `[ci skip]` in the commit description
53 | * Consider starting the commit message with an applicable emoji:
54 | * :art: `:art:` when improving the format/structure of the code
55 | * :racehorse: `:racehorse:` when improving performance
56 | * :non-potable_water: `:non-potable_water:` when plugging memory leaks
57 | * :memo: `:memo:` when writing docs
58 | * :penguin: `:penguin:` when fixing something on Linux
59 | * :apple: `:apple:` when fixing something on macOS
60 | * :checkered_flag: `:checkered_flag:` when fixing something on Windows
61 | * :bug: `:bug:` when fixing a bug
62 | * :fire: `:fire:` when removing code or files
63 | * :green_heart: `:green_heart:` when fixing the CI build
64 | * :white_check_mark: `:white_check_mark:` when adding tests
65 | * :lock: `:lock:` when dealing with security
66 | * :arrow_up: `:arrow_up:` when upgrading dependencies
67 | * :arrow_down: `:arrow_down:` when downgrading dependencies
68 | * :shirt: `:shirt:` when removing linter warnings
69 |
70 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2015 Israel Blancas @iblancasa (http://iblancasa.com/)
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
5 | and associated documentation files (the “Software”), to deal in the Software without
6 | restriction, including without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
8 | the Software is furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all copies or
11 | substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
15 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
16 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
17 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
18 | IN THE SOFTWARE.
--------------------------------------------------------------------------------
/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## Purpose
2 | _Describe the problem/issue/feature (also mention any links if available)._
3 |
4 | ## Approach
5 | _How does this change address the problem?_
6 |
7 | ## New Issues/FIXME?
8 | _Are there any new FIXME or issues that need to be corrected in future?_
9 |
10 | ## Learning
11 | _Describe any new relevant findings_
12 |
13 | _Links to blog posts, patterns, libraries or addons used to solve this problem_
14 |
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GitHubCity
2 |
3 | [](https://travis-ci.org/iblancasa/GitHubCity)
4 | [](https://codecov.io/gh/iblancasa/GitHubCity)
5 | [](https://snyk.io/test/github/iblancasa/githubcity)
6 | [](https://gemnasium.com/github.com/iblancasa/GitHubCity)
7 | [](https://github.com/iblancasa/GitHubCity)
8 |
9 | ## What is this?
10 |
11 | This is a small library which gets all GitHub users given a city. Original idea is [Top-GitHub-Users-Data](https://github.com/JJ/top-github-users-data) by [@JJ](https://github.com/JJ), an adaptation of [top-github-users](https://github.com/paulmillr/top-github-users) from [@paulmillr](https://github.com/paulmillr/).
12 |
13 | ## What I can do with this?
14 |
15 | This is an amazing Python library to study the GitHub community in a
16 | location. You can get all the GitHub users from a given location and
17 | obtain some data. For instance, you can generate one ranking
18 | like
19 | [this ranking with the users from Spain (and its provinces)](https://github.com/iblancasa/GitHubRankingsSpain).
20 |
21 | ## What I need to run this?
22 |
23 | You will need to install Python 3. *Python 2 is not supported*.
24 |
25 | In addition, you will need to get an ID and Secret for a GitHub
26 | application, [after registering your own application here!](https://github.com/settings/applications/new).
27 |
28 | ### Dependencies
29 |
30 | There is a ``requirements.txt`` file included in this repo. Install all dependences with ``pip install -r requirements.txt``.
31 |
32 | ## How to install
33 |
34 | There are two options to install this library and its dependencies.
35 |
36 | ### Install from the source code
37 |
38 | You need to clone (or download) this repository. Then, go to ``src`` folder and run:
39 | ```shell
40 | python setup.py install
41 | ```
42 |
43 | ### Install from pip
44 |
45 | [This library is available to be installed using pip.](https://pypi.python.org/pypi?:action=display&name=githubcity)
46 |
47 | ```shell
48 | pip install githubcity
49 | ```
50 |
51 |
52 | ## Getting started
53 |
54 | [You can see one example about how to use this library here](https://github.com/iblancasa/GitHubSpanishRankingGenerator).
55 |
56 | ### Basic example
57 |
58 | ```python
59 | idGH = os.environ.get('GH_ID')
60 | secretGH = os.environ.get('GH_SECRET')
61 | configuration = {
62 | "excludedLocations": [],
63 | "excludedUsers": [],
64 | "intervals": [
65 | [
66 | "2008-01-01",
67 | "2015-12-30"
68 | ]
69 | ],
70 | "last_date": "2015-12-30",
71 | "locations": [
72 | "Ceuta"
73 | ],
74 | "name": "Ceuta"
75 | }
76 | ciudad = GitHubCity(idGH, secretGH, configuration)
77 | ciudad.calculateBestIntervals()
78 | ciudad.addFilter("repos", ">1")
79 | ciudad.addFilter("followers", ">1")
80 | ciudad.getCityUsers()
81 | ```
82 |
83 | ### Excluding users
84 |
85 | You can generate a JSON file like this (each element is an user and this properties are name -login name of the user- and reason -why this user has been banned-):
86 |
87 | ```json
88 | [
89 | {
90 | "name": "asdpokjdf",
91 | "reason": "It is only a test"
92 | },
93 | {
94 | "name": "asdfasdf",
95 | "reason": "It is only a test"
96 | },
97 | {
98 | "name": "asdfasdfadf",
99 | "reason": "It is only a test"
100 | }
101 | ]
102 | ```
103 |
104 |
105 | ## The MIT License (MIT)
106 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
107 |
108 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software
109 | and associated documentation files (the “Software”), to deal in the Software without
110 | restriction, including without limitation the rights to use, copy, modify, merge, publish,
111 | distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
112 | the Software is furnished to do so, subject to the following conditions:
113 |
114 | The above copyright notice and this permission notice shall be included in all copies or
115 | substantial portions of the Software.
116 |
117 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
118 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
119 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
120 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
121 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
122 | IN THE SOFTWARE.
123 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iblancasa/GitHubCity/c5299c6859dbefbd869e2ac6ff2faff2a39cf32f/logo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dateutil==2.6.1
2 | beautifulsoup4==4.6.0
3 | lxml==4.1.1
4 | coloredlogs==7.3.1
5 | pystache==0.5.4
6 | httpretty==0.8.14
7 |
8 |
--------------------------------------------------------------------------------
/src/githubcity/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .ghcity import GitHubCity
3 | from .ghregion import GitHubRegion
4 | from .ghuser import GitHubUser
5 |
--------------------------------------------------------------------------------
/src/githubcity/ghcity.py:
--------------------------------------------------------------------------------
1 | """Allows to get all data about a given GitHub City.
2 |
3 | This module allow to developers to get all users of GitHub that have a
4 | given city in their profile. For example, if I want getting all users
5 | from London,. I will get all users that have London in their
6 | profiles (they could live in London or not)
7 |
8 | Author: Israel Blancas @iblancasa
9 | Original idea: https://github.com/JJ/github-city-rankings
10 | License:
11 |
12 | The MIT License (MIT)
13 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
14 |
15 | Permission is hereby granted, free of charge, to any person
16 | obtaining a copy of this software and associated documentation
17 | files (the Software), to deal in the Software
18 | without restriction, including without
19 | limitation the rights to use, copy, modify, merge,
20 | publish, distribute, sublicense, and/or sell
21 | copies of the Software, and to permit persons to whom the
22 | Software is furnished to do so, subject to the following conditions:
23 |
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 |
27 | THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
29 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
30 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
31 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
33 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
34 | USE OR OTHER DEALINGS IN THE SOFTWARE.
35 | """
36 |
37 | from __future__ import absolute_import
38 | from urllib.request import Request, urlopen
39 | from urllib.parse import quote
40 | from urllib.error import HTTPError, URLError
41 | from threading import Lock, Thread
42 | from calendar import timegm
43 | from queue import Queue, Empty
44 | from time import sleep
45 | from json import load, loads, dump
46 | from logging import getLogger
47 | from pystache import parse, Renderer
48 | from coloredlogs import install
49 | import datetime
50 | from gzip import GzipFile
51 | from io import BytesIO
52 | from githubcity.ghuser import GitHubUser
53 |
54 |
55 | class GitHubCity:
56 | """Manager of a GithubCity."""
57 |
58 | def __init__(self, githubID, githubSecret, configuration=False,
59 | verbosity=0):
60 | """Constructor of the class.
61 |
62 | Constructor of the class GitHubCity.
63 |
64 | :param githubID: your GitHub ID.
65 | :type githubID: str.
66 | :param githubSecret: your GitHub Secret.
67 | :type githubSecret: str.
68 | :param configuration: configuration of the class.
69 | :type configuration: dict.
70 |
71 | Note
72 | ----------
73 | To get your ID and secret, you will need to create
74 | an application in
75 | https://github.com/settings/applications/new
76 | """
77 | self.__logger = getLogger("GitHubCity")
78 | self.__logger.info("Starting GitHubCity")
79 | install(level=verbosity)
80 |
81 | if not githubID:
82 | self.__logger.exception("init: No GitHub ID inserted")
83 | raise Exception("init: No GitHub ID inserted")
84 | self.__githubID = githubID
85 |
86 | if not githubSecret:
87 | self.__logger.exception("init: No GitHub Secret inserted")
88 | raise Exception("init: No GitHub Secret inserted")
89 | self.__githubSecret = githubSecret
90 |
91 | self.__usersToProccess = Queue()
92 | self.__urlLocations = ""
93 | self.__urlFilters = ""
94 | self.__cityUsers = set()
95 | self.__processedUsers = []
96 | self.__threads = set()
97 | self.__end = False
98 | self.__lastDay = False
99 | self.__lockGetUser = Lock()
100 | self.__lockReadAddUser = Lock()
101 | self.__server = "https://api.github.com/"
102 | self.__intervals = []
103 | self.__excludedUsers= []
104 | self.__excludedLocations = []
105 |
106 | if configuration:
107 | self.readConfig(configuration)
108 | self.__logger.debug("Configuration set")
109 | else:
110 | self.__logger.warning("Not configuration set")
111 |
112 | # Read configurations ----------------------------------------------------
113 | def readConfig(self, configuration):
114 | """Read configuration from dict.
115 |
116 | Read configuration from a JSON configuration file.
117 |
118 | :param configuration: configuration to load.
119 | :type configuration: dict.
120 | """
121 | self.__logger.debug("Reading configuration")
122 | self.city = configuration["name"]
123 | self.__logger.info("City name: " + self.city)
124 | if "intervals" in configuration:
125 | self.__intervals = configuration["intervals"]
126 | self.__logger.debug("Intervals: " +
127 | str(self.__intervals))
128 |
129 | if "last_date" in configuration:
130 | self.__lastDay = configuration["last_date"]
131 | self.__logger.debug("Last day: " + self.__lastDay)
132 |
133 | if "locations" in configuration:
134 | self.__locations = configuration["locations"]
135 | self.__logger.debug("Locations: " +
136 | str(self.__locations))
137 | self.__addLocationsToURL(self.__locations)
138 |
139 | if "excludedUsers" in configuration:
140 | self.__excludedUsers= set()
141 | self.__excludedLocations = set()
142 |
143 | excluded = configuration["excludedUsers"]
144 | for e in excluded:
145 | self.__excludedUsers.add(e)
146 | self.__logger.debug("Excluded users " +
147 | str(self.__excludedUsers))
148 |
149 | if "excludedLocations" in configuration:
150 | excluded = configuration["excludedLocations"]
151 | for e in excluded:
152 | self.__excludedLocations.add(e)
153 |
154 | self.__logger.debug("Excluded locations " +
155 | str(self.__excludedLocations))
156 |
157 | def readConfigFromJSON(self, fileName):
158 | """Read configuration from JSON.
159 |
160 | :param fileName: path to the configuration file.
161 | :type fileName: str.
162 | """
163 | self.__logger.debug("readConfigFromJSON: reading from " + fileName)
164 | with open(fileName) as data_file:
165 | data = load(data_file)
166 | self.readConfig(data)
167 |
168 | def configToJson(self, fileName):
169 | """Save the configuration of the city in a JSON.
170 |
171 | :param fileName: path to the output file.
172 | :type fileName: str.
173 | """
174 | config = self.getConfig()
175 | with open(fileName, "w") as outfile:
176 | dump(config, outfile, indent=4, sort_keys=True)
177 |
178 | def getConfig(self):
179 | """Return the configuration of the city.
180 |
181 | :return: configuration of the city.
182 | :rtype: dict.
183 | """
184 | config = {}
185 | config["name"] = self.city
186 | config["intervals"] = self.__intervals
187 | config["last_date"] = self.__lastDay
188 | config["excludedUsers"] = []
189 | config["excludedLocations"] = []
190 |
191 | for e in self.__excludedUsers:
192 | config["excludedUsers"].append(e)
193 |
194 | for e in self.__excludedLocations:
195 | config["excludedLocations"].append(e)
196 |
197 | config["locations"] = self.__locations
198 | return config
199 | # En read configurations --------------------------------------------------
200 |
201 | # Get and process users ---------------------------------------------------
202 | def addFilter(self, field, value):
203 | """Add a filter to the seach.
204 |
205 | :param field: what field filter (see GitHub search).
206 | :type field: str.
207 | :param value: value of the filter (see GitHub search).
208 | :type value: str.
209 | """
210 | if "<" not in value or ">" not in value or ".." not in value:
211 | value = ":" + value
212 |
213 | if self.__urlFilters:
214 | self.__urlFilters += "+" + field + str(quote(value))
215 | else:
216 | self.__urlFilters += field + str(quote(value))
217 |
218 | def __processUsers(self):
219 | """Process users of the queue."""
220 | while self.__usersToProccess.empty() and not self.__end:
221 | pass
222 |
223 | while not self.__end or not self.__usersToProccess.empty():
224 | self.__lockGetUser.acquire()
225 | try:
226 | new_user = self.__usersToProccess.get(False)
227 | except Empty:
228 | self.__lockGetUser.release()
229 | return
230 | else:
231 | self.__lockGetUser.release()
232 | self.__addUser(new_user)
233 | self.__logger.info("__processUsers:" +
234 | str(self.__usersToProccess.qsize()) +
235 | " users to process")
236 |
237 | def __addUser(self, new_user):
238 | """Add new users to the list.
239 |
240 | :param new_user: name of a GitHub user to include in
241 | the ranking
242 | :type new_user: str.
243 | """
244 | self.__lockReadAddUser.acquire()
245 | if new_user not in self.__cityUsers and \
246 | new_user not in self.__excludedUsers:
247 | self.__lockReadAddUser.release()
248 | self.__logger.debug("__addUser: Adding " + new_user)
249 | self.__cityUsers.add(new_user)
250 |
251 | myNewUser = GitHubUser(new_user)
252 | myNewUser.getData()
253 | myNewUser.getRealContributions()
254 |
255 | userLoc = myNewUser.location
256 | if not any(s in userLoc for s in self.__excludedLocations):
257 | self.__processedUsers.append(myNewUser)
258 | else:
259 | self.__logger.debug("__addUser: Excluding " + new_user)
260 | self.__lockReadAddUser.release()
261 |
262 | def __getPeriodUsers(self, start_date, final_date):
263 | """Get all the users given a period.
264 |
265 | :param start_date: start date of the range to search
266 | users
267 | :type start_date: time.date.
268 | :param final_date: final date of the range to search
269 | users
270 | :type final_date: time.date.
271 | """
272 | self.__logger.info("Getting users from " + start_date +
273 | " to " + final_date)
274 |
275 | url = self.__getURL(1, start_date, final_date)
276 | data = self.__readAPI(url)
277 | users = []
278 |
279 | total_pages = 10000
280 | page = 1
281 |
282 | while total_pages >= page:
283 | url = self.__getURL(page, start_date, final_date)
284 | data = self.__readAPI(url)
285 | self.__logger.debug(str(len(data['items'])) +
286 | " users found")
287 | for u in data['items']:
288 | users.append(u["login"])
289 | self.__usersToProccess.put(u["login"])
290 | total_count = data["total_count"]
291 | total_pages = int(total_count / 100) + 1
292 | page += 1
293 | return users
294 |
295 | def getCityUsers(self, numberOfThreads=20):
296 | """Get all the users from the city.
297 |
298 | :param numberOfThreads: number of threads to run.
299 | :type numberOfThreads: int.
300 | """
301 | if not self.__intervals:
302 | self.__logger.debug("Calculating best intervals")
303 | self.calculateBestIntervals()
304 |
305 | self.__end = False
306 | self.__threads = set()
307 |
308 | comprobationURL = self.__getURL()
309 | self.__readAPI(comprobationURL)
310 |
311 | self.__launchThreads(numberOfThreads)
312 | self.__logger.debug("Launching threads")
313 | for i in self.__intervals:
314 | self.__getPeriodUsers(i[0], i[1])
315 |
316 | self.__end = True
317 |
318 | for t in self.__threads:
319 | t.join()
320 | self.__logger.debug("Threads joined")
321 | # End of get and process users --------------------------------------------
322 |
323 | # Calcule and set intervals----------------------------------------------
324 | def calculateBestIntervals(self):
325 | """Calcule valid intervals of a city."""
326 | self.__intervals = []
327 | self.__readAPI(self.__getURL())
328 | today = datetime.datetime.now().date()
329 |
330 | self.__validInterval(datetime.date(2008, 1, 1), today)
331 | self.__logger.info("Total number of intervals: " +
332 | str(len(self.__intervals)))
333 | self.__lastDay = today.strftime("%Y-%m-%d")
334 |
335 | def __validInterval(self, start, finish):
336 | """Check if the interval is correct.
337 |
338 | An interval is correct if it has less than 1001
339 | users. If the interval is correct, it will be added
340 | to '_intervals' attribute. Else, interval will be
341 | split in two news intervals and these intervals
342 | will be checked.
343 |
344 | :param start: start date of the interval.
345 | :type start: datetime.date.
346 | :param finish: finish date of the interval.
347 | :type finish: datetime.date.
348 | """
349 | url = self.__getURL(1,
350 | start.strftime("%Y-%m-%d"),
351 | finish.strftime("%Y-%m-%d"))
352 |
353 | data = self.__readAPI(url)
354 |
355 | if data["total_count"] >= 1000:
356 | middle = start + (finish - start)/2
357 | self.__validInterval(start, middle)
358 | self.__validInterval(middle, finish)
359 | else:
360 | self.__intervals.append([start.strftime("%Y-%m-%d"),
361 | finish.strftime("%Y-%m-%d")])
362 | self.__logger.info("New valid interval: " +
363 | start.strftime("%Y-%m-%d") +
364 | " to " +
365 | finish.strftime("%Y-%m-%d"))
366 | # End calcule and set intervals-------------------------------------------
367 |
368 | # Import/export users ----------------------------------------------------
369 | def export(self, template_file_name, output_file_name,
370 | sort="public", data=None, limit=0):
371 | """Export ranking to a file.
372 |
373 | Args:
374 | template_file_name (str): where is the template
375 | (moustache template)
376 | output_file_name (str): where create the file with the ranking
377 | sort (str): field to sort the users
378 | """
379 | exportedData = {}
380 | exportedUsers = self.__exportUsers(sort, limit)
381 |
382 | exportedData["users"] = exportedUsers
383 | exportedData["extraData"] = data
384 |
385 | with open(template_file_name) as template_file:
386 | template_raw = template_file.read()
387 |
388 | template = parse(template_raw)
389 | renderer = Renderer()
390 |
391 | output = renderer.render(template, exportedData)
392 |
393 | with open(output_file_name, "w") as text_file:
394 | text_file.write(output)
395 |
396 | def getSortedUsers(self, order="public"):
397 | """Return a list with sorted users.
398 |
399 | :param order: the field to sort the users.
400 | - contributions (total number of contributions)
401 | - public (public contributions)
402 | - private (private contributions)
403 | - name
404 | - followers
405 | - join
406 | - organizations
407 | - repositories
408 | :type order: str.
409 | :return: a list of the github users sorted by the selected field.
410 | :rtype: str.
411 | """
412 | try:
413 | self.__processedUsers.sort(key=lambda u: getattr(u, order), reverse=True)
414 | except AttributeError:
415 | pass
416 | return self.__processedUsers
417 |
418 | def __exportUsers(self, sort, limit=0):
419 | """Export the users to a dictionary.
420 |
421 | :param sort: field to sort the users
422 | :type sort: str.
423 | :return: exported users.
424 | :rtype: dict.
425 | """
426 | position = 1
427 | dataUsers = self.getSortedUsers(sort)
428 |
429 | if limit:
430 | dataUsers = dataUsers[:limit]
431 |
432 | exportedUsers = []
433 |
434 | for u in dataUsers:
435 | userExported = u.export()
436 | userExported["position"] = position
437 | exportedUsers.append(userExported)
438 |
439 | if position < len(dataUsers):
440 | userExported["comma"] = True
441 |
442 | position += 1
443 | return exportedUsers
444 |
445 | # End import/export users ------------------------------------------------
446 |
447 | # Utilities --------------------------------------------------------------
448 | def calculeToday(self):
449 | """Calcule the intervals from the last date."""
450 | self.__logger.debug("Add today")
451 | last = datetime.datetime.strptime(self.__lastDay, "%Y-%m-%d")
452 | today = datetime.datetime.now().date()
453 | self.__validInterval(last, today)
454 |
455 | def __addLocationsToURL(self, locations):
456 | """Format all locations to GitHub's URL API.
457 |
458 | :param locations: locations where to search users.
459 | :type locations: list(str).
460 | """
461 | for l in self.__locations:
462 | self.__urlLocations += "+location:\""\
463 | + str(quote(l)) + "\""
464 |
465 | def __launchThreads(self, numThreads):
466 | """Launch some threads and start to process users.
467 |
468 | :param numThreads: number of thrads to launch.
469 | :type numThreads: int.
470 | """
471 | i = 0
472 | while i < numThreads:
473 | self.__logger.debug("Launching thread number " +
474 | str(i))
475 | i += 1
476 | newThr = Thread(target=self.__processUsers)
477 | newThr.setDaemon(True)
478 | self.__threads.add(newThr)
479 | newThr.start()
480 |
481 | def __readAPI(self, url):
482 | """Read a petition to the GitHub API (private).
483 |
484 | :param url: URL to query.
485 | :type url: str.
486 | :return: the response of the API -a dictionary with
487 | these fields-:
488 | * total_count (int): number of total
489 | users that match with the search
490 | * incomplete_results (bool):
491 | https://developer.github.com/v3/search/#timeouts-and-incomplete-results
492 | * items (List[dict]): a list with the
493 | users that match with the search
494 | :rtype: dict.
495 | """
496 | code = 0
497 | hdr = {'User-Agent': 'curl/7.43.0 (x86_64-ubuntu) \
498 | libcurl/7.43.0 OpenSSL/1.0.1k zlib/1.2.8 gh-rankings-grx',
499 | 'Accept': 'application/vnd.github.v3.text-match+json',
500 | 'Accept-Encoding': 'gzip'}
501 | while code != 200:
502 | req = Request(url, headers=hdr)
503 | try:
504 | self.__logger.debug("Getting " + url)
505 | response = urlopen(req)
506 | code = response.code
507 | except HTTPError as error:
508 | if error.code == 404:
509 | self.__logger.exception("_readAPI: ERROR 404")
510 | self.__logger.exception(str(error))
511 | break
512 | headers = error.headers.items()
513 | reset = -1
514 | for header in headers:
515 | if header[0] == "X-RateLimit-Reset":
516 | reset = int(header[1])
517 | if reset < 0:
518 | log_message = "Error when reading response. Wait: 30 secs"
519 | sleep_duration = 30
520 | else:
521 | utcAux = datetime.datetime.utcnow()
522 | utcAux = utcAux.utctimetuple()
523 | now_sec = timegm(utcAux)
524 | sleep_duration = reset - now_sec
525 | log_message = "Limit of API. Wait: "
526 | log_message += str(sleep_duration)
527 | log_message += " secs"
528 | self.__logger.warning(log_message)
529 | sleep(sleep_duration)
530 | code = 0
531 | except URLError as error:
532 | self.__logger.exception(str(error))
533 | self.__logger.exception("_readAPI: waiting 15 secs")
534 | sleep(15)
535 | responseBody = response.read()
536 |
537 | if response.getheader('Content-Encoding') == 'gzip':
538 | with GzipFile(fileobj=BytesIO(responseBody)) as gzFile:
539 | responseBody = gzFile.read()
540 |
541 | data = loads(responseBody.decode('utf-8'))
542 | response.close()
543 | return data
544 |
545 | def __getURL(self, page=1, start_date=None,
546 | final_date=None, order="asc"):
547 | """Get the API's URL to query to get data about users.
548 |
549 | :param page: number of the page.
550 | :param start_date: start date of the range to search
551 | users (Y-m-d).
552 | "param final_date: final date of the range to search
553 | users (Y-m-d).
554 | :param order: order of the query. Valid values are
555 | 'asc' or 'desc'. Default: asc
556 | :return: formatted URL.
557 | :rtype: str.
558 | """
559 | if not start_date or not final_date:
560 | url = self.__server + "search/users?client_id=" + \
561 | self.__githubID + "&client_secret=" + \
562 | self.__githubSecret + \
563 | "&order=desc&q=sort:joined+type:user" + \
564 | self.__urlLocations + \
565 | self.__urlFilters + \
566 | "&sort=joined&order=asc&per_page=100&page=" + \
567 | str(page)
568 | else:
569 | url = self.__server + "search/users?client_id=" + \
570 | self.__githubID + "&client_secret=" + \
571 | self.__githubSecret + \
572 | "&order=desc&q=sort:joined+type:user" + \
573 | self.__urlLocations + \
574 | self.__urlFilters + \
575 | "+created:" + \
576 | start_date + ".." + final_date + \
577 | "&sort=joined&order=" + order + \
578 | "&per_page=100&page=" + str(page)
579 | return url
580 | # Endf utilities ----------------------------------------------------------
581 |
--------------------------------------------------------------------------------
/src/githubcity/ghregion.py:
--------------------------------------------------------------------------------
1 | """Mix the output of different GitHubCitis.
2 |
3 | This module allow to developers to get all users of GitHub that have a
4 | given city in their profile. For example, if I want getting all users
5 | from London,. I will get all users that have London in their
6 | profiles (they could live in London or not)
7 |
8 | Author: Israel Blancas @iblancasa
9 | Original idea: https://github.com/JJ/github-city-rankings
10 | License:
11 |
12 | The MIT License (MIT)
13 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
14 |
15 | Permission is hereby granted, free of charge, to any person
16 | obtaining a copy of this software and associated documentation
17 | files (the Software), to deal in the Software
18 | without restriction, including without
19 | limitation the rights to use, copy, modify, merge,
20 | publish, distribute, sublicense, and/or sell
21 | copies of the Software, and to permit persons to whom the
22 | Software is furnished to do so, subject to the following conditions:
23 |
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 |
27 | THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
29 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
30 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
31 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
33 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
34 | USE OR OTHER DEALINGS IN THE SOFTWARE.
35 | """
36 |
37 | from __future__ import absolute_import
38 | from json import load
39 | from pystache import Renderer, parse
40 |
41 |
42 | class GitHubRegion():
43 | """Handle the users from different cities."""
44 |
45 | def __init__(self):
46 | """Init the object."""
47 | self.__users = []
48 |
49 | def addCity(self, fileName):
50 | """Add a JSON file and read the users.
51 |
52 | :param fileName: path to the JSON file. This file has to have a list of
53 | users, called users.
54 | :type fileName: str.
55 | """
56 | with open(fileName) as data_file:
57 | data = load(data_file)
58 | for u in data["users"]:
59 | if not any(d["name"] == u["name"] for d in self.__users):
60 | self.__users.append(u)
61 |
62 | def export(self, template_file_name, output_file_name,
63 | sort="public", data=None, limit=0):
64 | """Export ranking to a file.
65 |
66 | Args:
67 | template_file_name (str): where is the template
68 | (moustache template)
69 | output_file_name (str): where create the file with the ranking
70 | sort (str): field to sort the users
71 | """
72 | exportedData = {}
73 | exportedUsers = self.getSortedUsers()
74 | template = self.__getTemplate(template_file_name)
75 | position = 1
76 |
77 | if not limit:
78 | exportedData["users"] = exportedUsers
79 | else:
80 | exportedData["users"] = exportedUsers[:limit]
81 |
82 | for u in exportedData["users"]:
83 | u["position"] = position
84 | u["comma"] = position < len(exportedData["users"])
85 | position += 1
86 |
87 | exportedData["extraData"] = data
88 |
89 | renderer = Renderer()
90 | output = renderer.render(template, exportedData)
91 |
92 | with open(output_file_name, "w") as text_file:
93 | text_file.write(output)
94 |
95 | @staticmethod
96 | def __getTemplate(template_file_name):
97 | """Get temaplte to save the ranking.
98 |
99 | :param template_file_name: path to the template.
100 | :type template_file_name: str.
101 |
102 | :return: template for the file.
103 | :rtype: pystache's template.
104 | """
105 | with open(template_file_name) as template_file:
106 | template_raw = template_file.read()
107 |
108 | template = parse(template_raw)
109 | return template
110 |
111 | def getSortedUsers(self, order="public"):
112 | """Return a list with sorted users.
113 |
114 | :param order: the field to sort the users.
115 | - contributions (total number of contributions)
116 | - public (public contributions)
117 | - private (private contributions)
118 | - name
119 | - followers
120 | - join
121 | - organizations
122 | - repositories
123 | :type order: str.
124 | :return: a list of the github users sorted by the selected field.
125 | :rtype: str.
126 | """
127 | if order == "contributions":
128 | self.__users.sort(key=lambda u: u["contributions"],
129 | reverse=True)
130 | elif order == "public":
131 | self.__users.sort(key=lambda u: u["public"],
132 | reverse=True)
133 | elif order == "private":
134 | self.__users.sort(key=lambda u: u["private"],
135 | reverse=True)
136 | elif order == "name":
137 | self.__users.sort(key=lambda u: u["name"], reverse=True)
138 | elif order == "followers":
139 | self.__users.sort(key=lambda u: u["followers"], reverse=True)
140 | elif order == "join":
141 | self.__users.sort(key=lambda u: u["join"], reverse=True)
142 | elif order == "organizations":
143 | self.__users.sort(key=lambda u: u["organizations"],
144 | reverse=True)
145 | elif order == "repositories":
146 | self.__users.sort(key=lambda u: u["repositories"],
147 | reverse=True)
148 | return self.__users
149 |
--------------------------------------------------------------------------------
/src/githubcity/ghuser.py:
--------------------------------------------------------------------------------
1 | """
2 | Allows to get all data about a given GitHub user.
3 |
4 | Author: Israel Blancas @iblancasa
5 | Original idea: https://github.com/JJ/github-city-rankings
6 | License:
7 |
8 | The MIT License (MIT)
9 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
10 |
11 | Permission is hereby granted, free of charge, to any
12 | person obtaining a copy of this software
13 | and associated documentation files (the "Software"), to
14 | deal in the Software without restriction, including without
15 | limitation the rights to use, copy, modify, merge, publish,
16 | distribute, sublicense, and/or sell copies of the
17 | Software, and to permit persons to whom
18 | the Software is furnished to do so, subject to the following conditions:
19 |
20 | The above copyright notice and this permission notice shall
21 | be included in all copies or substantial portions of the Software.
22 |
23 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
24 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
27 | FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
29 | OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30 | OTHER DEALINGS IN THE SOFTWARE.
31 |
32 | """
33 | from __future__ import absolute_import
34 | from time import sleep
35 | from urllib.request import Request, urlopen
36 | from urllib.error import HTTPError, URLError
37 | from datetime import datetime
38 | from dateutil.relativedelta import relativedelta
39 | from bs4 import BeautifulSoup
40 |
41 |
42 | class GitHubUser:
43 | """Manager of a GitHub User.
44 |
45 | Attributes:
46 | name (str): Name of the user (private).
47 | contributions (int): total contributions (web - last year) (private).
48 | public (int): public contributions in the last year (private).
49 | private (int): private contributions in the last year (private).
50 | followers (int): total number of followers of an user (private).
51 | numRepos (int): number of repositories of an user (private).
52 | organizations (int): number of public organizations (private).
53 | join (str): when the user joined. Format: %Y-%M-%D (private).
54 | avatar (str): URL where the user's avatar is (private).
55 | bio (str): bio of the user (private).
56 | """
57 |
58 | def __init__(self, name, server="https://github.com/"):
59 | """Constructor of the class.
60 |
61 | :param name: name (login) of an user in GitHub.
62 | :type name: str.
63 | :param server: server to query data. Default: https://github.com/
64 | :type server: str.
65 | """
66 | self.name = name
67 | self.server = server
68 | self.followers = -1
69 | self.numberOfRepos = -1
70 | self.organizations = -1
71 | self.contributions = -1
72 | self.join = ""
73 | self.avatar = ""
74 | self.bio = ""
75 | self.public = 0
76 | self.private = 0
77 | self.location = ""
78 |
79 | def export(self):
80 | """Export all attributes of the user to a dict.
81 |
82 | :return: attributes of the user.
83 | :rtype: dict.
84 | """
85 | data = {}
86 | data["name"] = self.name
87 | data["contributions"] = self.contributions
88 | data["avatar"] = self.avatar
89 | data["followers"] = self.followers
90 | data["join"] = self.join
91 | data["organizations"] = self.organizations
92 | data["repositories"] = self.numberOfRepos
93 | data["bio"] = self.bio
94 | data["private"] = self.private
95 | data["public"] = self.public
96 | data["location"] = self.location
97 | return data
98 |
99 | @staticmethod
100 | def isASCII(s):
101 | """Check if a string is ASCII.
102 |
103 | :param s: string to check if it is ASCII.
104 | :type s: str.
105 | :return: True if the string is ASCII.
106 | :rtype: boolean.
107 | """
108 | return all(ord(c) < 128 for c in s)
109 |
110 | def __getContributions(self, web):
111 | """Scrap the contributions from a GitHub profile.
112 |
113 | :param web: parsed web.
114 | :type web: BeautifulSoup node.
115 | """
116 | contributions_raw = web.find_all('h2',
117 | {'class': 'f4 text-normal mb-2'})
118 | try:
119 | contrText = contributions_raw[0].text
120 | contrText = contrText.lstrip().split(" ")[0]
121 | contrText = contrText.replace(",", "")
122 | except IndexError as error:
123 | print("There was an error with the user " + self.name)
124 | print(error)
125 | except AttributeError as error:
126 | print("There was an error with the user " + self.name)
127 | print(error)
128 |
129 | self.contributions = int(contrText)
130 |
131 | def __getAvatar(self, web):
132 | """Scrap the avatar from a GitHub profile.
133 |
134 | :param web: parsed web.
135 | :type web: BeautifulSoup node.
136 | """
137 | try:
138 | self.avatar = web.find("img", {"class": "avatar"})['src'][:-10]
139 | except IndexError as error:
140 | print("There was an error with the user " + self.name)
141 | print(error)
142 | except AttributeError as error:
143 | print("There was an error with the user " + self.name)
144 | print(error)
145 |
146 | def __getNumberOfRepositories(self, web):
147 | """Scrap the number of repositories from a GitHub profile.
148 |
149 | :param web: parsed web.
150 | :type web: BeautifulSoup node.
151 | """
152 | counters = web.find_all('span', {'class': 'Counter'})
153 | try:
154 | if 'k' not in counters[0].text:
155 | self.numberOfRepos = int(counters[0].text)
156 | else:
157 | reposText = counters[0].text.replace(" ", "")
158 | reposText = reposText.replace("\n", "").replace("k", "")
159 |
160 | if reposText and len(reposText) > 1:
161 | self.numberOfRepos = int(reposText.split(".")[0]) * \
162 | 1000 + int(reposText.split(".")[1]) * 100
163 | elif reposText:
164 | self.numberOfRepos = int(reposText.split(".")[0]) * 1000
165 | except IndexError as error:
166 | print("There was an error with the user " + self.name)
167 | print(error)
168 | except AttributeError as error:
169 | print("There was an error with the user " + self.name)
170 | print(error)
171 |
172 | def __getNumberOfFollowers(self, web):
173 | """Scrap the number of followers from a GitHub profile.
174 |
175 | :param web: parsed web.
176 | :type web: BeautifulSoup node.
177 | """
178 | counters = web.find_all('span', {'class': 'Counter'})
179 | try:
180 | if 'k' not in counters[2].text:
181 | self.followers = int(counters[2].text)
182 | else:
183 | follText = counters[2].text.replace(" ", "")
184 | follText = follText.replace("\n", "").replace("k", "")
185 |
186 | if follText and len(follText) > 1:
187 | self.followers = int(follText.split(".")[0])*1000 + \
188 | int(follText.split(".")[1]) * 100
189 | elif follText:
190 | self.followers = int(follText.split(".")[0])*1000
191 | except IndexError as error:
192 | print("There was an error with the user " + self.name)
193 | print(error)
194 | except AttributeError as error:
195 | print("There was an error with the user " + self.name)
196 | print(error)
197 |
198 | def __getLocation(self, web):
199 | """Scrap the location from a GitHub profile.
200 |
201 | :param web: parsed web.
202 | :type web: BeautifulSoup node.
203 | """
204 | try:
205 | self.location = web.find("span", {"class": "p-label"}).text
206 | except AttributeError as error:
207 | print("There was an error with the user " + self.name)
208 | print(error)
209 |
210 | def __getJoin(self, web):
211 | """Scrap the join date from a GitHub profile.
212 |
213 | :param web: parsed web.
214 | :type web: BeautifulSoup node.
215 | """
216 | join = web.findAll("a", {"class": "dropdown-item"})
217 | for j in join:
218 | try:
219 | if "Joined GitHub" in j.text:
220 | self.join = j["href"][-10:]
221 | except IndexError as error:
222 | print("There was an error with the user " + self.name)
223 | print(error)
224 | except AttributeError as error:
225 | print("There was an error with the user " + self.name)
226 | print(error)
227 |
228 | def __getBio(self, web):
229 | """Scrap the bio from a GitHub profile.
230 |
231 | :param web: parsed web.
232 | :type web: BeautifulSoup node.
233 | """
234 | bio = web.find_all("div", {"class": "user-profile-bio"})
235 |
236 | if bio:
237 | try:
238 | bio = bio[0].text
239 | if bio and GitHubUser.isASCII(bio):
240 | bioText = bio.replace("\n", "")
241 | bioText = bioText.replace("\t", " ").replace("\"", "")
242 | bioText = bioText.replace("\'", "").replace("\\", "")
243 | self.bio = bioText
244 | else:
245 | self.bio = ""
246 | except IndexError as error:
247 | print("There was an error with the user " + self.name)
248 | print(error)
249 | except AttributeError as error:
250 | print("There was an error with the user " + self.name)
251 | print(error)
252 |
253 | def __getOrganizations(self, web):
254 | """Scrap the number of organizations from a GitHub profile.
255 |
256 | :param web: parsed web.
257 | :type web: BeautifulSoup node.
258 | """
259 | orgsElements = web.find_all("a", {"class": "avatar-group-item"})
260 | self.organizations = len(orgsElements)
261 |
262 | def getData(self):
263 | """Get data of the GitHub user."""
264 | url = self.server + self.name
265 | data = GitHubUser.__getDataFromURL(url)
266 | web = BeautifulSoup(data, "lxml")
267 | self.__getContributions(web)
268 | self.__getLocation(web)
269 | self.__getAvatar(web)
270 | self.__getNumberOfRepositories(web)
271 | self.__getNumberOfFollowers(web)
272 | self.__getBio(web)
273 | self.__getJoin(web)
274 | self.__getOrganizations(web)
275 |
276 | def getRealContributions(self):
277 | """Get the real number of contributions (private + public)."""
278 | datefrom = datetime.now() - relativedelta(days=366)
279 | dateto = datefrom + relativedelta(months=1) - relativedelta(days=1)
280 | private = 0
281 |
282 | while datefrom < datetime.now():
283 | fromstr = datefrom.strftime("%Y-%m-%d")
284 | tostr = dateto.strftime("%Y-%m-%d")
285 | url = self.server + self.name
286 | url += "?tab=overview&from=" + fromstr + "&to=" + tostr
287 |
288 | data = GitHubUser.__getDataFromURL(url)
289 | web = BeautifulSoup(data, "lxml")
290 |
291 | aux = "f4 lh-condensed m-0 text-gray"
292 | pcontribs = web.find_all("span", {"class": aux})
293 |
294 | aux = web.find_all('span', {'class': 'text-gray m-0'})
295 |
296 | noContribs = False
297 |
298 | for compr in aux:
299 | if "had no activity during this period." in compr.text:
300 | noContribs = True
301 |
302 | try:
303 | if not noContribs:
304 | for contrib in pcontribs:
305 | contribution = None
306 | contribution = contrib.text
307 | contribution = contribution.lstrip().replace(",", "")
308 | contribution = contribution.replace("\n", " ")
309 | contribution = contribution.partition(" ")[0]
310 | private += int(contribution)
311 | except IndexError as error:
312 | print("There was an error with the user " + self.name)
313 | print(error)
314 | except AttributeError as error:
315 | print("There was an error with the user " + self.name)
316 | print(error)
317 |
318 | datefrom += relativedelta(months=1)
319 | dateto += relativedelta(months=1)
320 |
321 | self.private = private
322 | self.public = self.contributions - private
323 |
324 | if self.public < 0: # Is not exact
325 | self.public = 0
326 |
327 | @staticmethod
328 | def __getDataFromURL(url):
329 | """Read HTML data from an user GitHub profile.
330 |
331 | :param url: URL of the webpage to download.
332 | :type url: str.
333 | :return: webpage donwloaded.
334 | :rtype: str.
335 | """
336 | code = 0
337 |
338 | while code != 200:
339 | req = Request(url)
340 | try:
341 | response = urlopen(req)
342 | code = response.code
343 | sleep(0.01)
344 | except HTTPError as error:
345 | code = error.code
346 | if code == 404:
347 | break
348 | except URLError as error:
349 | sleep(3)
350 |
351 | if code == 404:
352 | raise Exception("User was not found")
353 | return response.read().decode('utf-8')
354 |
--------------------------------------------------------------------------------
/src/manualtest/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "excludedLocations": [
3 | "Granada"
4 | ],
5 | "excludedUsers": [
6 | "vrivas"
7 | ],
8 | "intervals": [
9 | [
10 | "2008-01-01",
11 | "2015-12-24"
12 | ]
13 | ],
14 | "last_date": "2015-12-24",
15 | "locations": [
16 | "Jaén"
17 | ],
18 | "name": "Jaen"
19 | }
20 |
--------------------------------------------------------------------------------
/src/manualtest/template:
--------------------------------------------------------------------------------
1 | {{#users}}
2 | {{name}}
3 | {{/users}}
4 |
--------------------------------------------------------------------------------
/src/run.py:
--------------------------------------------------------------------------------
1 | from githubcity.ghcity import GitHubCity
2 | from os import environ
3 | import sys
4 |
5 |
6 | def main(argv):
7 | idGH = environ.get('GH_ID')
8 | secretGH = environ.get('GH_SECRET')
9 | configuration = {
10 | "excludedLocations": [],
11 | "excludedUsers": [],
12 | "intervals": [
13 | [
14 | "2008-01-01",
15 | "2015-12-30"
16 | ]
17 | ],
18 | "last_date": "2015-12-30",
19 | "locations": [
20 | "Ceuta"
21 | ],
22 | "name": "Ceuta"
23 | }
24 | ciudad = GitHubCity(idGH, secretGH, configuration)
25 | # ciudad.readConfigFromJSON(argv[0])
26 |
27 |
28 | if __name__ == "__main__":
29 | main(sys.argv[1:])
30 |
--------------------------------------------------------------------------------
/src/runUser.py:
--------------------------------------------------------------------------------
1 | from githubcity.ghuser import GitHubUser
2 | import sys
3 |
4 |
5 | def main(argv):
6 | user = GitHubUser(argv[0])
7 | user.getData()
8 | user.getRealContributions()
9 | print(user.export())
10 |
11 |
12 | if __name__ == "__main__":
13 | main(sys.argv[1:])
14 |
--------------------------------------------------------------------------------
/src/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = ../README.rst
3 |
--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
1 | """Allows to get all data about a given GitHub City.
2 |
3 | This module allow to developers to get all users of GitHub that have a
4 | given city in their profile. For example, if I want getting all users
5 | from London,. I will get all users that have London in their
6 | profiles (they could live in London or not)
7 |
8 | Author: Israel Blancas @iblancasa
9 | Original idea: https://github.com/JJ/github-city-rankings
10 | License:
11 |
12 | The MIT License (MIT)
13 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
14 |
15 | Permission is hereby granted, free of charge, to any person
16 | obtaining a copy of this software and associated documentation
17 | files (the Software), to deal in the Software
18 | without restriction, including without
19 | limitation the rights to use, copy, modify, merge,
20 | publish, distribute, sublicense, and/or sell
21 | copies of the Software, and to permit persons to whom the
22 | Software is furnished to do so, subject to the following conditions:
23 |
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 |
27 | THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
29 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
30 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
31 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
33 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
34 | USE OR OTHER DEALINGS IN THE SOFTWARE.
35 | """
36 | from setuptools import setup
37 | setup(
38 | name='githubcity',
39 | version='1.0.4',
40 | description='GitHub city ranking creator',
41 | author='Israel Blancas @iblancasa',
42 | author_email='iblancasa@gmail.com',
43 | url='https://github.com/iblancasa/GitHubCity',
44 | download_url='https://github.com/iblancasa/GitHubCity/tarball/0.01',
45 | keywords=['github', 'ranking', 'data', 'api'],
46 | classifiers=[],
47 | install_requires=[
48 | 'python-dateutil==2.4.2',
49 | 'beautifulsoup4==4.6.0',
50 | 'lxml==4.1.1',
51 | 'coloredlogs==5.0',
52 | 'pystache==0.5.4',
53 | 'httpretty==0.8.14'
54 | ],
55 | packages=['githubcity'],
56 | py_modules=["githubcity"],
57 | long_description=open('../README.md').read(),
58 | license='MIT'
59 | )
60 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from . import ghcityTester
3 |
--------------------------------------------------------------------------------
/tests/ghuserTester.py:
--------------------------------------------------------------------------------
1 | """Allows to get all data about a given GitHub City.
2 |
3 | This module allow to developers to get all users of GitHub that have a
4 | given city in their profile. For example, if I want getting all users
5 | from London,. I will get all users that have London in their
6 | profiles (they could live in London or not)
7 |
8 | Author: Israel Blancas @iblancasa
9 | Original idea: https://github.com/JJ/github-city-rankings
10 | License:
11 |
12 | The MIT License (MIT)
13 | Copyright (c) 2015-2017 Israel Blancas @iblancasa (http://iblancasa.com/)
14 |
15 | Permission is hereby granted, free of charge, to any person
16 | obtaining a copy of this software and associated documentation
17 | files (the Software), to deal in the Software
18 | without restriction, including without
19 | limitation the rights to use, copy, modify, merge,
20 | publish, distribute, sublicense, and/or sell
21 | copies of the Software, and to permit persons to whom the
22 | Software is furnished to do so, subject to the following conditions:
23 |
24 | The above copyright notice and this permission notice shall be
25 | included in all copies or substantial portions of the Software.
26 |
27 | THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
29 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
30 | PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
31 | OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
33 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
34 | USE OR OTHER DEALINGS IN THE SOFTWARE.
35 | """
36 |
37 | from __future__ import absolute_import
38 | import sys
39 | import os
40 | import unittest
41 | import httpretty
42 | from bs4 import BeautifulSoup
43 | sys.path.append(os.path.join(os.path.dirname(sys.path[0]), 'src'))
44 | print(os.path.join(os.path.dirname(sys.path[0]), 'src'))
45 | from githubcity.ghuser import GitHubUser
46 |
47 |
48 | class ghcityTester(unittest.TestCase):
49 | """Tester of the class GitHubUser."""
50 |
51 | def test_init(self):
52 | """Test the creation of the class with its configuration."""
53 | pass
54 |
55 | def test_getNumberOfRepos(self):
56 | """Test the private method getNumberOfRepos."""
57 | # Given
58 | data = ""
59 | with open("tests/resources/user.html") as userWeb:
60 | data = userWeb.read()
61 | web = BeautifulSoup(data, "lxml")
62 | # When
63 | user = GitHubUser("iblancasa")
64 | user._GitHubUser__getNumberOfRepositories(web)
65 | # Then
66 | self.assertEqual(user.numberOfRepos, 141)
67 |
68 | # Given
69 | data = ""
70 | with open("tests/resources/userk.html") as userWeb:
71 | data = userWeb.read()
72 | web = BeautifulSoup(data, "lxml")
73 | # When
74 | user = GitHubUser("vim-scripts")
75 | user._GitHubUser__getNumberOfRepositories(web)
76 | # Then
77 | self.assertEqual(user.numberOfRepos, 5200)
78 |
79 |
80 | def test_getNumberOfFollowers(self):
81 | """Test the private method getNumberOfFollowers."""
82 | # Given
83 | data = ""
84 | with open("tests/resources/user.html") as userWeb:
85 | data = userWeb.read()
86 | web = BeautifulSoup(data, "lxml")
87 | # When
88 | user = GitHubUser("iblancasa")
89 | user._GitHubUser__getNumberOfFollowers(web)
90 | # Then
91 | self.assertEqual(user.followers, 107)
92 |
93 | # Given
94 | data = ""
95 | with open("tests/resources/userk.html") as userWeb:
96 | data = userWeb.read()
97 | web = BeautifulSoup(data, "lxml")
98 | # When
99 | user = GitHubUser("vim-scripts")
100 | user._GitHubUser__getNumberOfFollowers(web)
101 | # Then
102 | self.assertEqual(user.followers, 3800)
103 |
104 | def test__getContributions(self):
105 | """Test the private method getContributions."""
106 | # Given
107 | data = ""
108 | with open("tests/resources/user.html") as userWeb:
109 | data = userWeb.read()
110 | web = BeautifulSoup(data, "lxml")
111 | # When
112 | user = GitHubUser("iblancasa")
113 | user._GitHubUser__getContributions(web)
114 | # Then
115 | self.assertEqual(user.contributions, 490)
116 |
117 | def test__getAvatar(self):
118 | """Test the private method getAvatar."""
119 | # Given
120 | data = ""
121 | with open("tests/resources/user.html") as userWeb:
122 | data = userWeb.read()
123 | web = BeautifulSoup(data, "lxml")
124 | # When
125 | user = GitHubUser("iblancasa")
126 | user._GitHubUser__getAvatar(web)
127 | # Then
128 | self.assertEqual(user.avatar,
129 | "https://avatars0.githubusercontent.com/u/4806311")
130 |
131 | def test__getLocation(self):
132 | """Test the private method getLocation."""
133 | # Given
134 | data = ""
135 | with open("tests/resources/user.html") as userWeb:
136 | data = userWeb.read()
137 | web = BeautifulSoup(data, "lxml")
138 | # When
139 | user = GitHubUser("iblancasa")
140 | user._GitHubUser__getLocation(web)
141 | # Then
142 | self.assertEqual(user.location, "Granada, Andalucía, Spain")
143 |
144 | def test__getBio(self):
145 | """Test the private method getBio."""
146 | # Given
147 | data = ""
148 | with open("tests/resources/user.html") as userWeb:
149 | data = userWeb.read()
150 | web = BeautifulSoup(data, "lxml")
151 | # When
152 | user = GitHubUser("iblancasa")
153 | user._GitHubUser__getBio(web)
154 | # Then
155 | self.assertEqual(user.bio, "This is my bio.")
156 |
157 | def test__getOrganizations(self):
158 | """Test the private method getOrganizations."""
159 | # Given
160 | data = ""
161 | with open("tests/resources/user.html") as userWeb:
162 | data = userWeb.read()
163 | web = BeautifulSoup(data, "lxml")
164 | # When
165 | user = GitHubUser("iblancasa")
166 | user._GitHubUser__getOrganizations(web)
167 | # Then
168 | self.assertEqual(user.organizations, 7)
169 |
170 | def test_export(self):
171 | # Given
172 | user = GitHubUser("iblancasa")
173 | user.numberOfRepos = 141
174 | user.bio = ""
175 | user.private = 0
176 | user.public = 0
177 | user.location = 'Granada, Andalucía, Spain'
178 | user.contributions = 490
179 | user.name = "iblancasa"
180 | user.join = "2013-06-24"
181 | user.followers = 107
182 | user.organizations = 7
183 | user.avatar = "https://avatars0.githubusercontent.com/u/4806311"
184 | # When
185 | exportedUser = {'name': 'iblancasa',
186 | 'bio': '',
187 | 'join': '2013-06-24',
188 | 'organizations': 7,
189 | 'private': 0,
190 | 'contributions':490,
191 | 'public': 0,
192 | 'location': 'Granada, Andalucía, Spain',
193 | 'avatar':
194 | 'https://avatars0.githubusercontent.com/u/4806311',
195 | 'repositories': 141,
196 | 'followers': 107
197 | }
198 | # Then
199 | self.assertEqual(user.export(), exportedUser)
200 |
201 | def test_getRealContributions(self):
202 | # Given
203 | with open("tests/resources/user.html") as userWeb:
204 | reply = userWeb.read()
205 | httpretty.enable()
206 | httpretty.register_uri(httpretty.GET, "https://github.com/iblancasa",
207 | body=reply,
208 | content_type="text/html")
209 | # When
210 | user = GitHubUser("iblancasa")
211 | user.contributions = 490
212 | user.getRealContributions()
213 | # Then
214 | self.assertEqual(user.public, 490)
215 | self.assertEqual(user.private, 0)
216 |
217 | # Given
218 | with open("tests/resources/userprivate.html") as userWeb:
219 | reply = userWeb.read()
220 | httpretty.enable()
221 | httpretty.register_uri(httpretty.GET, "https://github.com/privateuser",
222 | body=reply,
223 | content_type="text/html")
224 | # When
225 | user = GitHubUser("privateuser")
226 | user.contributions = 2026
227 | user.getRealContributions()
228 | # Then
229 | self.assertEqual(user.public, 1116)
230 | self.assertEqual(user.private, 910)
231 |
232 | httpretty.disable()
233 | httpretty.reset()
234 |
235 | def test_getData(self):
236 | # Given
237 | with open("tests/resources/user.html") as userWeb:
238 | reply = userWeb.read()
239 | httpretty.enable()
240 | httpretty.register_uri(httpretty.GET, "https://github.com/iblancasa",
241 | body=reply,
242 | content_type="text/html")
243 | # When
244 | user = GitHubUser("iblancasa")
245 | user.getData()
246 | # Then
247 | self.assertEqual(user.numberOfRepos, 141)
248 | self.assertEqual(user.bio, "This is my bio.")
249 | self.assertEqual(user.private, 0)
250 | self.assertEqual(user.public, 0)
251 | self.assertEqual(user.contributions, 490)
252 | self.assertEqual(user.name, "iblancasa")
253 | self.assertEqual(user.join, "2013-06-24")
254 | self.assertEqual(user.followers, 107)
255 | self.assertEqual(user.organizations, 7)
256 | self.assertEqual(user.avatar,
257 | "https://avatars0.githubusercontent.com/u/4806311")
258 | httpretty.disable()
259 | httpretty.reset()
260 |
261 | if __name__ == "__main__":
262 | unittest.main()
263 |
--------------------------------------------------------------------------------
/tests/resources/userprivate.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
1256 |
1257 | You signed in with another tab or window. Reload to refresh your session.
1258 | You signed out in another tab or window. Reload to refresh your session.
1259 |