├── .gitignore
├── LICENSE
├── README.md
├── config.json
├── github_api_status.py
├── github_following.py
├── github_stars.py
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 |
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 |
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 |
121 | # SageMath parsed files
122 | *.sage.py
123 |
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 |
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 |
137 | # Rope project settings
138 | .ropeproject
139 |
140 | # mkdocs documentation
141 | /site
142 |
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 |
148 | # Pyre type checker
149 | .pyre/
150 |
151 | # pytype static type analyzer
152 | .pytype/
153 |
154 | # Cython debug symbols
155 | cython_debug/
156 |
157 | # PyCharm
158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | # and can be added to the global gitignore or merged into this file. For a more nuclear
161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 | # Ignore aider files
171 | .aider.chat.history.md
172 | .aider.input.history
173 | .aider.tags.cache.v3/
174 |
175 | # Ignore the changes patch file
176 | changes.patch
177 |
178 | # Ignore config and data files
179 | config.json
180 | github_following.csv
181 | ignored_repos.txt
182 | .aider*
183 |
184 | github_following_repo_prediction.csv
185 | old_account_csvs/
186 |
187 | *.csv
188 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Tom Dörr
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # 🌟 GitHub Analytics Tools
4 |
5 | [](LICENSE)
6 | [](https://www.python.org)
7 | [](https://docs.github.com/en/rest)
8 |
9 | Powerful Python scripts for analyzing GitHub user data, including following relationships and starred repositories.
10 |
11 |
12 |
13 | ## 🚀 Features
14 |
15 | - 📊 Analyze following relationships between GitHub users
16 | - ⭐ Track and rank starred repositories
17 | - 📈 Generate detailed statistics and reports
18 | - 🔄 Real-time data processing
19 | - 📋 CSV export functionality
20 |
21 | ## 🛠️ Installation
22 |
23 | 1. Clone the repository:
24 | ```bash
25 | git clone https://github.com/tom-doerr/github-analytics-tools.git
26 | cd github-analytics-tools
27 | ```
28 |
29 | 2. Install dependencies:
30 | ```bash
31 | pip install -r requirements.txt
32 | ```
33 |
34 | 3. Configure your GitHub token:
35 | - Use environment variables:
36 | ```bash
37 | export GITHUB_TOKEN=your_github_token_here
38 | ```
39 |
40 | ## 📚 Usage
41 |
42 | ### Following Analysis
43 |
44 | ```bash
45 | python github_following.py [--count ]
46 | ```
47 |
48 | Options:
49 | - `username`: Target GitHub username
50 | - `--count`: Number of following accounts to analyze (default: 100)
51 |
52 | ### Stars Analysis
53 |
54 | ```bash
55 | python github_stars.py [--top-accounts ] [--top-repos ]
56 | ```
57 |
58 | Options:
59 | - `--top-accounts`: Number of top accounts to analyze (default: 100)
60 | - `--top-repos`: Number of top repositories to show (default: 40)
61 | - `--final-ranking`: Items in final ranking (default: 50)
62 |
63 | ## 📋 Configuration Files
64 |
65 | - `config.json`: Basic settings
66 | - `.env`: Environment variables
67 | - `ignored_repos.txt`: Repositories to exclude
68 |
69 | ## 📊 Output
70 |
71 | - CSV files with following data
72 | - Console-based repository rankings
73 | - Detailed statistics and reports
74 |
75 | ## 🤝 Contributing
76 |
77 | Contributions are welcome! Please feel free to:
78 | - Fork the repository
79 | - Create a feature branch
80 | - Submit a Pull Request
81 |
82 | ## ⚖️ License
83 |
84 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
85 |
86 | ## ⚠️ Disclaimer
87 |
88 | This tool is for educational purposes. Please comply with GitHub's terms of service and API usage limits.
89 |
--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "count": 5
3 | }
4 |
--------------------------------------------------------------------------------
/github_api_status.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os
4 | import requests
5 | from datetime import datetime
6 | import time
7 | from dotenv import load_dotenv
8 |
9 | def get_rate_limits():
10 | """Get GitHub API rate limit information"""
11 | load_dotenv()
12 | github_token = os.getenv('GITHUB_TOKEN')
13 | if not github_token:
14 | print("Error: GITHUB_TOKEN not found in .env file")
15 | return None
16 |
17 | headers = {
18 | 'Authorization': f'token {github_token}',
19 | 'Accept': 'application/vnd.github.v3+json'
20 | }
21 |
22 | try:
23 | # Get rate limit info
24 | response = requests.get('https://api.github.com/rate_limit', headers=headers)
25 | response.raise_for_status()
26 | return response.json()
27 |
28 | except requests.exceptions.RequestException as e:
29 | print(f"Error accessing GitHub API: {e}")
30 | return None
31 |
32 | def format_time_until_reset(reset_timestamp):
33 | """Format the time until rate limit reset"""
34 | now = datetime.now().timestamp()
35 | time_left = reset_timestamp - now
36 |
37 | if time_left <= 0:
38 | return "Reset time has passed"
39 |
40 | minutes, seconds = divmod(int(time_left), 60)
41 | hours, minutes = divmod(minutes, 60)
42 |
43 | parts = []
44 | if hours > 0:
45 | parts.append(f"{hours}h")
46 | if minutes > 0:
47 | parts.append(f"{minutes}m")
48 | parts.append(f"{seconds}s")
49 |
50 | return " ".join(parts)
51 |
52 | def display_api_status():
53 | """Display GitHub API status information"""
54 | rate_limits = get_rate_limits()
55 | if not rate_limits:
56 | return
57 |
58 | print("\nGitHub API Status:")
59 | print("=" * 50)
60 |
61 | # Integration manifest API limits
62 | integration = rate_limits['resources']['integration_manifest']
63 | print("\nIntegration Manifest API:")
64 | print(f" Remaining calls: {integration['remaining']}/{integration['limit']}")
65 | print(f" Reset in: {format_time_until_reset(integration['reset'])}")
66 | print(f" Usage: {((integration['limit'] - integration['remaining']) / integration['limit'] * 100):.1f}%")
67 |
68 | # Graphql API limits
69 | graphql = rate_limits['resources']['graphql']
70 | print("\nGraphQL API:")
71 | print(f" Remaining calls: {graphql['remaining']}/{graphql['limit']}")
72 | print(f" Reset in: {format_time_until_reset(graphql['reset'])}")
73 | print(f" Usage: {((graphql['limit'] - graphql['remaining']) / graphql['limit'] * 100):.1f}%")
74 |
75 | # Search API limits
76 | search = rate_limits['resources']['search']
77 | print("\nSearch API:")
78 | print(f" Remaining calls: {search['remaining']}/{search['limit']}")
79 | print(f" Reset in: {format_time_until_reset(search['reset'])}")
80 | print(f" Usage: {((search['limit'] - search['remaining']) / search['limit'] * 100):.1f}%")
81 |
82 | # Core API limits
83 | core = rate_limits['resources']['core']
84 | print("\nCore API:")
85 | print(f" Remaining calls: {core['remaining']}/{core['limit']}")
86 | print(f" Reset in: {format_time_until_reset(core['reset'])}")
87 | print(f" Usage: {((core['limit'] - core['remaining']) / core['limit'] * 100):.1f}%")
88 |
89 | if __name__ == "__main__":
90 | display_api_status()
91 |
--------------------------------------------------------------------------------
/github_following.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import requests
4 | import csv
5 | import os
6 | import argparse
7 | import time
8 | from requests.auth import HTTPBasicAuth
9 | from colorama import init, Fore, Style
10 | from dotenv import load_dotenv
11 |
12 | # Initialize colorama
13 | init(autoreset=True)
14 |
15 | def load_config():
16 | load_dotenv()
17 | return {
18 | 'github_token': os.getenv('GITHUB_TOKEN')
19 | }
20 |
21 | def make_github_request(url, params=None, token=None):
22 | max_retries = 5
23 | base_delay = 1
24 | headers = {'Authorization': f'token {token}'} if token else {}
25 |
26 | for attempt in range(max_retries):
27 | try:
28 | response = requests.get(url, params=params, headers=headers)
29 | response.raise_for_status()
30 | return response.json()
31 | except requests.exceptions.HTTPError as e:
32 | if e.response.status_code == 403 and 'rate limit exceeded' in str(e).lower():
33 | if attempt < max_retries - 1:
34 | delay = base_delay * (2 ** attempt)
35 | print(f"Rate limit exceeded. Retrying in {delay} seconds...")
36 | time.sleep(delay)
37 | else:
38 | print(f"Error: Rate limit exceeded. Max retries reached.")
39 | return None
40 | else:
41 | print(f"HTTP error occurred: {e}")
42 | return None
43 | except requests.RequestException as e:
44 | print(f"Error: Unable to fetch data. {e}")
45 | return None
46 |
47 | time.sleep(1) # Add a small delay between requests
48 |
49 | def get_following(username, count=100, token=None):
50 | url = f"https://api.github.com/users/{username}/following"
51 | params = {"per_page": count}
52 |
53 | following = make_github_request(url, params, token)
54 |
55 | if following is None:
56 | return []
57 | elif not following:
58 | print(f"No following accounts found for {username}")
59 | return []
60 |
61 | return following
62 |
63 | def get_follower_count(username, token=None):
64 | url = f"https://api.github.com/users/{username}"
65 |
66 | user_data = make_github_request(url, token=token)
67 |
68 | if user_data is None:
69 | return None
70 |
71 | return user_data.get('followers')
72 |
73 | def write_to_csv(username, following, csv_file, token):
74 | file_exists = os.path.isfile(csv_file)
75 |
76 | with open(csv_file, 'a', newline='') as f:
77 | writer = csv.writer(f)
78 | if not file_exists:
79 | writer.writerow(['Account', 'Followers', 'Following'])
80 |
81 | existing_accounts = set()
82 | if file_exists:
83 | with open(csv_file, 'r') as f:
84 | reader = csv.reader(f)
85 | next(reader) # Skip header
86 | existing_accounts = set(row[0] for row in reader)
87 |
88 | for account in following:
89 | if account['login'] not in existing_accounts:
90 | follower_count = get_follower_count(account['login'], token)
91 | if follower_count is not None:
92 | writer.writerow([account['login'], follower_count, username])
93 | else:
94 | print(f"Skipping {account['login']} due to error fetching follower count")
95 |
96 | def display_following(username, following, token=None):
97 | print(f"\n{Fore.CYAN}{'=' * 40}")
98 | print(f"{Fore.YELLOW}Accounts followed by {Fore.GREEN}{username}{Fore.YELLOW}:")
99 | print(f"{Fore.CYAN}{'=' * 40}\n")
100 | for i, account in enumerate(following, 1):
101 | follower_count = get_follower_count(account['login'], token)
102 | print(f"{Fore.MAGENTA}{i:3}. {Fore.GREEN}{account['login']} {Fore.RESET}- {account['html_url']}")
103 | print(f" {Fore.CYAN}Followers: {Fore.YELLOW}{follower_count}")
104 |
105 | if __name__ == "__main__":
106 | parser = argparse.ArgumentParser(description="Fetch GitHub following accounts")
107 | parser.add_argument("--username", help="GitHub username to fetch following accounts for")
108 | parser.add_argument("--count", type=int, default=100, help="Number of following accounts to fetch (default: 100)")
109 | args = parser.parse_args()
110 |
111 | config = load_config()
112 | token = config.get('github_token')
113 | if not token:
114 | print(f"{Fore.RED}Error: GitHub token not found in .env file.")
115 | exit(1)
116 |
117 | username = args.username or input("Enter a GitHub username: ")
118 | count = args.count
119 |
120 | print(f"\n{Fore.CYAN}{'=' * 40}")
121 | print(f"{Fore.YELLOW}GitHub Following Analysis")
122 | print(f"{Fore.CYAN}{'=' * 40}\n")
123 |
124 | following = get_following(username, count, token)
125 | if following:
126 | display_following(username, following, token)
127 |
128 | csv_file = 'github_following.csv'
129 | write_to_csv(username, following, csv_file, token)
130 | print(f"\n{Fore.GREEN}Data has been written to {Fore.YELLOW}{csv_file}")
131 | else:
132 | print(f"{Fore.RED}No data found for user: {Fore.YELLOW}{username}")
133 |
134 | print(f"\n{Fore.CYAN}{'=' * 40}")
135 | print(f"{Fore.YELLOW}Analysis Complete")
136 | print(f"{Fore.CYAN}{'=' * 40}")
137 |
--------------------------------------------------------------------------------
/github_stars.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import requests
4 | from requests.adapters import HTTPAdapter
5 | from urllib3.util.retry import Retry
6 | import json
7 | import csv
8 | import argparse
9 | import os
10 | import subprocess
11 | import concurrent.futures
12 | from collections import defaultdict, Counter
13 | from requests.auth import HTTPBasicAuth
14 | from tqdm import tqdm
15 | import matplotlib.pyplot as plt
16 | from colorama import init, Fore, Style
17 | from dotenv import load_dotenv
18 | from watchdog.observers import Observer
19 | from watchdog.events import FileSystemEventHandler
20 | import time
21 | from datetime import datetime
22 | import os
23 | import pathlib
24 | import json
25 |
26 | # Initialize colorama
27 | init(autoreset=True)
28 |
29 | # Load environment variables
30 | load_dotenv()
31 |
32 | def load_config():
33 | with open('config.json', 'r') as f:
34 | config = json.load(f)
35 | config['github_token'] = os.getenv('GITHUB_TOKEN')
36 | return config
37 |
38 | def load_ignored_repos():
39 | try:
40 | with open('ignored_repos.txt', 'r') as f:
41 | return set(line.strip() for line in f if line.strip() and not line.startswith('#'))
42 | except FileNotFoundError:
43 | print(f"{Fore.YELLOW}Warning: ignored_repos.txt not found. No repositories will be ignored.")
44 | return set()
45 |
46 | def add_to_ignored_repos(repo):
47 | with open('ignored_repos.txt', 'a') as f:
48 | f.write(f"{repo}\n")
49 |
50 | def check_rate_limit(token=None):
51 | headers = {'Authorization': f'token {token}'} if token else {}
52 | response = requests.get('https://api.github.com/rate_limit', headers=headers)
53 | if response.status_code == 200:
54 | limits = response.json()['resources']['core']
55 | remaining = limits['remaining']
56 | reset_time = datetime.fromtimestamp(limits['reset']).strftime('%H:%M:%S')
57 | total = limits['limit']
58 | used = total - remaining
59 | return remaining, reset_time, used, total
60 | return None, None, None, None
61 |
62 | def create_session():
63 | session = requests.Session()
64 | retries = Retry(
65 | total=5,
66 | backoff_factor=1,
67 | status_forcelist=[429, 500, 502, 503, 504],
68 | allowed_methods=["GET"]
69 | )
70 | session.mount('https://', HTTPAdapter(max_retries=retries))
71 | return session
72 |
73 | def get_newest_stars(username, count, token):
74 | # Use debug level logging instead of print
75 | if os.getenv('DEBUG'):
76 | tqdm.write(f"{Fore.CYAN}Fetching stars for user: {username}")
77 | url = f"https://api.github.com/users/{username}/starred?timestamp=1"
78 | params = {
79 | "sort": "created",
80 | "direction": "desc",
81 | "per_page": count
82 | }
83 | headers = {'Authorization': f'token {token}'} if token else {}
84 |
85 | session = create_session()
86 | try:
87 | response = session.get(url, params=params, headers=headers, timeout=30)
88 | response.raise_for_status()
89 | except requests.exceptions.HTTPError as e:
90 | if e.response.status_code == 403:
91 | if 'X-RateLimit-Remaining' in e.response.headers:
92 | remaining = e.response.headers['X-RateLimit-Remaining']
93 | reset_time = time.strftime('%H:%M:%S', time.localtime(int(e.response.headers['X-RateLimit-Reset'])))
94 | print(f"{Fore.RED}Error: Rate limit exceeded for {username}. "
95 | f"Remaining requests: {remaining}. Reset time: {reset_time}")
96 | else:
97 | print(f"{Fore.RED}Error: Rate limit exceeded or authentication required for {username}. "
98 | f"Check your GitHub token or wait a while.")
99 | else:
100 | print(f"{Fore.RED}Error: Unable to fetch data for {username}. HTTP {e.response.status_code}")
101 | return []
102 | except requests.Timeout:
103 | print(f"{Fore.RED}Error: Request timed out for {username}. The server took too long to respond.")
104 | return []
105 | except requests.ConnectionError:
106 | print(f"{Fore.RED}Error: Connection failed for {username}. Please check your internet connection.")
107 | return []
108 | except requests.RequestException as e:
109 | print(f"{Fore.RED}Error: Unable to fetch data for {username}. {e}")
110 | return []
111 |
112 | stars = response.json()
113 |
114 | if not stars:
115 | if os.getenv('DEBUG'):
116 | tqdm.write(f"{Fore.YELLOW}No starred repositories found for {username}")
117 | return []
118 |
119 | return stars
120 |
121 | def get_top_accounts(csv_file, n):
122 | accounts = []
123 | with open(csv_file, 'r') as f:
124 | reader = csv.reader(f)
125 | next(reader) # Skip header
126 | for row in reader:
127 | try:
128 | # Try old format (username, follower_count)
129 | accounts.append((row[0], int(row[1])))
130 | except ValueError:
131 | # New format (username, repo_list)
132 | # Use number of repos as the weight
133 | repo_count = len(row[1].split(','))
134 | accounts.append((row[0], repo_count))
135 |
136 | return sorted(accounts, key=lambda x: x[1], reverse=True)[:n]
137 |
138 | def process_account(args):
139 | username, count, token = args
140 | try:
141 | stars = get_newest_stars(username, count, token)
142 | if stars: # If we got any stars back
143 | return [(star, username) for star in stars], len(stars), True, False
144 | else: # If we got an empty list but no exception
145 | return [], 0, False, True
146 | except Exception as e:
147 | print(f"{Fore.RED}Error in process_account for {username}: {str(e)}")
148 | return [], 0, False, True
149 |
150 |
151 | def process_accounts(config_file, top_n, token, args):
152 | count = args.stars_per_account
153 | top_accounts = get_top_accounts(args.csv_file, top_n)
154 |
155 | # Initialize tracking variables
156 | all_stars = []
157 | total_stars_considered = 0
158 | successful_requests = 0
159 | failed_requests = 0
160 |
161 | print(f"{Fore.CYAN}Starting to process {len(top_accounts)} accounts...\n")
162 |
163 | with tqdm(total=len(top_accounts),
164 | desc="Starting...",
165 | bar_format='{desc:<30}{percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]') as pbar:
166 |
167 | print(f"\n{Fore.CYAN}{'=' * 60}")
168 | print(f"{Fore.YELLOW}Request Progress")
169 | print(f"{Fore.CYAN}{'=' * 60}\n")
170 |
171 | with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor:
172 | # Prepare arguments for each account
173 | process_args = [(username, count, token) for username, _ in top_accounts]
174 |
175 | # Submit all tasks
176 | future_to_username = {executor.submit(process_account, arg): arg[0]
177 | for arg in process_args}
178 |
179 | # Process completed tasks as they finish
180 | for future in concurrent.futures.as_completed(future_to_username):
181 | username = future_to_username[future]
182 | try:
183 | try:
184 | stars, stars_count, success, failure = future.result()
185 | all_stars.extend(stars)
186 | total_stars_considered += stars_count
187 | if success:
188 | successful_requests += 1
189 | if failure:
190 | failed_requests += 1
191 | except Exception as e:
192 | print(f"{Fore.RED}Error processing results for {username}: {str(e)}")
193 | failed_requests += 1
194 | except Exception as e:
195 | print(f"{Fore.RED}Error processing {username}: {e}")
196 |
197 | display_name = f"{username[:7]}..." if len(username) > 10 else f"{username:<10}"
198 | pbar.set_description(f"Processing {display_name}")
199 | pbar.update(1)
200 |
201 |
202 | return all_stars, total_stars_considered, successful_requests, failed_requests
203 |
204 | def write_repo_data(sorted_repos, ignored_repos, timestamp=None):
205 | """Write repository data to timestamped files in both human and machine readable formats"""
206 | if timestamp is None:
207 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
208 |
209 | # Create reports directories if they don't exist
210 | reports_dir = pathlib.Path("reports")
211 | reports_dir.mkdir(exist_ok=True)
212 |
213 | data_dir = pathlib.Path("data")
214 | data_dir.mkdir(exist_ok=True)
215 |
216 | # Create the human-readable report file
217 | report_file = reports_dir / f"repo_report_{timestamp}.txt"
218 |
219 | with open(report_file, "w") as f:
220 | f.write(f"Repository Report - Generated at {timestamp}\n")
221 | f.write("=" * 80 + "\n\n")
222 |
223 | for repo, usernames in sorted_repos:
224 | is_ignored = repo in ignored_repos
225 | f.write(f"Repository: {repo}\n")
226 | f.write(f"Stars: {len(usernames)}\n")
227 | f.write(f"Status: {'Previously Displayed' if is_ignored else 'New'}\n")
228 | f.write("Starred by:\n")
229 | for username in usernames:
230 | f.write(f" - {username}\n")
231 | f.write("\n" + "-" * 40 + "\n\n")
232 |
233 | # Create the machine-readable JSON file
234 | json_file = data_dir / f"repo_data_{timestamp}.json"
235 |
236 | json_data = {
237 | "repositories": [
238 | {
239 | "name": repo,
240 | "stars_count": len(usernames),
241 | "is_ignored": repo in ignored_repos,
242 | "starred_by": usernames
243 | }
244 | for repo, usernames in sorted_repos
245 | ]
246 | }
247 |
248 | with open(json_file, "w") as f:
249 | json.dump(json_data, f, indent=2)
250 |
251 | def create_ranking(all_stars, top_repos, ignored_repos=None):
252 | if ignored_repos is None:
253 | ignored_repos = set()
254 | repo_counts = defaultdict(list)
255 | for star, username in all_stars:
256 | repo_key = f"{star['owner']['login']}/{star['name']}"
257 | if repo_key not in ignored_repos:
258 | repo_counts[repo_key].append(username)
259 |
260 | sorted_repos = sorted(repo_counts.items(), key=lambda x: len(x[1]), reverse=True)[:top_repos]
261 | return sorted_repos
262 |
263 | def display_distribution(all_stars, ignored_repos=None):
264 | if ignored_repos is None:
265 | ignored_repos = set()
266 |
267 | # Only count non-ignored repos
268 | star_counts = Counter()
269 | for star, _ in all_stars:
270 | repo_key = f"{star['owner']['login']}/{star['name']}"
271 | if repo_key not in ignored_repos:
272 | star_counts[star['id']] += 1
273 |
274 | distribution = Counter(star_counts.values())
275 |
276 | print(f"\n{Fore.CYAN}{'=' * 60}")
277 | print(f"{Fore.YELLOW}Star Distribution (Excluding Ignored Repos)")
278 | print(f"{Fore.CYAN}{'=' * 60}\n")
279 |
280 | total_repos = sum(distribution.values())
281 | total_stars = sum(stars * count for stars, count in distribution.items())
282 |
283 | print(f"{Fore.CYAN}Total unique repositories: {Fore.GREEN}{total_repos}")
284 | print(f"{Fore.CYAN}Total stars across repos: {Fore.GREEN}{total_stars}\n")
285 |
286 | # Calculate cumulative counts
287 | sorted_dist = sorted(distribution.items(), reverse=True)
288 | cumulative = 0
289 |
290 | print(f"{Fore.CYAN}Stars Repos Cumulative")
291 | print(f"{Fore.CYAN}{'=' * 25}")
292 |
293 | for stars, count in sorted_dist:
294 | cumulative += count
295 | print(f"{Fore.GREEN}{stars:5d} {Fore.YELLOW}{count:5d} {Fore.CYAN}{cumulative:5d}")
296 |
297 | # Create a bar plot of the distribution
298 | plt.figure(figsize=(10, 6))
299 | plt.bar(distribution.keys(), distribution.values(), color='skyblue')
300 | plt.title('Distribution of Stars Across Repositories')
301 | plt.xlabel('Number of Stars')
302 | plt.ylabel('Number of Repositories')
303 | plt.savefig('star_distribution.png')
304 | print(f"\n{Fore.CYAN}Distribution plot saved as 'star_distribution.png'")
305 |
306 | def display_ranking(sorted_repos, interactive=False, all_stars=None, initial_ignored=None):
307 | # Create browser_opens.log if it doesn't exist
308 | if not os.path.exists('browser_opens.log'):
309 | with open('browser_opens.log', 'w') as f:
310 | f.write("# Log of repositories opened in browser\n")
311 | f.write("# Format: human_timestamp,unix_timestamp,repository_name\n")
312 |
313 | print(f"\n{Fore.CYAN}{'=' * 60}")
314 | print(f"{Fore.YELLOW}Repository Ranking (Most Popular at Top)")
315 | print(f"{Fore.CYAN}{'=' * 60}\n")
316 |
317 | for i, (repo, usernames) in enumerate(sorted_repos, 1):
318 | status = "[PREV]" if repo in initial_ignored else ""
319 | print(f"{Fore.MAGENTA}{i:3}. {status} {Fore.GREEN}{repo}")
320 | repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo)
321 | print(f" {Fore.CYAN}URL: {Fore.BLUE}{repo_url}")
322 | print(f" {Fore.CYAN}Starred by {Fore.YELLOW}{len(usernames)} {Fore.CYAN}account(s):")
323 | print(f" {Fore.YELLOW}{', '.join(usernames)}")
324 | print()
325 |
326 | if interactive:
327 | input("Press Enter to continue...")
328 | # Log before attempting to open browser
329 | now = datetime.now()
330 | human_timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
331 | unix_timestamp = int(now.timestamp())
332 | with open('browser_opens.log', 'a') as log:
333 | log.write(f"{human_timestamp},{unix_timestamp},{repo}\n")
334 |
335 | try:
336 | subprocess.run(['brave', repo_url], check=True)
337 | except subprocess.CalledProcessError:
338 | print(f"{Fore.RED}Error: Unable to open Brave browser. Make sure it's installed and accessible from the command line.")
339 | except FileNotFoundError:
340 | print(f"{Fore.RED}Error: Brave browser not found. Make sure it's installed and accessible from the command line.")
341 | add_to_ignored_repos(repo)
342 |
343 | # Check for changes to ignored repos after each repo
344 | new_ignored = recheck_and_display(all_stars, args, initial_ignored)
345 | if new_ignored:
346 | initial_ignored = new_ignored
347 |
348 | class IgnoreFileHandler(FileSystemEventHandler):
349 | def __init__(self):
350 | self.last_modified_by_script = False
351 |
352 | def on_modified(self, event):
353 | if event.src_path.endswith('ignored_repos.txt'):
354 | if not self.last_modified_by_script:
355 | self.handle_external_modification()
356 | self.last_modified_by_script = False
357 |
358 | def handle_external_modification(self):
359 | # This will be called only for external modifications
360 | pass
361 |
362 | def recheck_and_display(all_stars, args, initial_ignored):
363 | """Recheck ignored repos and redisplay if changed"""
364 | current_ignored = load_ignored_repos()
365 |
366 | # Count how many repos were added
367 | added = current_ignored - initial_ignored
368 | removed = initial_ignored - current_ignored
369 |
370 | # Only reload if more than one repo was added at once
371 | if len(added) > 1 or removed:
372 | # Set flag before modifying the file
373 | file_handler.last_modified_by_script = True
374 |
375 | from datetime import datetime
376 | now = datetime.now().strftime("%H:%M:%S")
377 | print(f"\n{Fore.YELLOW}[{now}] Multiple changes detected in ignored repositories!")
378 |
379 | if added:
380 | print(f"{Fore.GREEN}Added to ignore list ({len(added)} repos): {', '.join(added)}")
381 | if removed:
382 | print(f"{Fore.RED}Removed from ignore list ({len(removed)} repos): {', '.join(removed)}")
383 |
384 | # Create new ranking with updated ignored repos
385 | sorted_repos = create_ranking(all_stars, args.final_ranking, current_ignored)
386 | print("\n" + "=" * 80 + "\n")
387 | print(f"{Fore.CYAN}Refreshed repository ranking:")
388 | display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=current_ignored)
389 |
390 | return current_ignored
391 |
392 | # Create a global file handler instance
393 | file_handler = IgnoreFileHandler()
394 |
395 | if __name__ == "__main__":
396 | # Start timing the execution
397 | start_time = time.time()
398 |
399 | # Set up the file system observer
400 | observer = Observer()
401 | observer.schedule(file_handler, path='.', recursive=False)
402 | observer.start()
403 |
404 | parser = argparse.ArgumentParser(description="Fetch GitHub stars for top accounts")
405 | parser.add_argument("--top-accounts", type=int, default=100, help="Number of top accounts to consider (default: 100)")
406 | parser.add_argument("--stars-per-account", type=int, default=50, help="Number of newest stars to consider per account (default: 50)")
407 | parser.add_argument("--final-ranking", type=int, default=100, help="Number of items to show in the final ranking (default: 100)")
408 | parser.add_argument("--no-interactive", action="store_true", help="Disable interactive mode")
409 | parser.add_argument("--csv-file", type=str, default='github_following.csv',
410 | help="Path to the GitHub following CSV file (default: github_following.csv)")
411 | parser.add_argument("--parallel", type=int, default=5,
412 | help="Number of parallel requests (default: 5)")
413 | parser.add_argument("--save-top", type=int,
414 | help="Save the top N repositories to a file")
415 | parser.add_argument("--output-file", type=str, default="top_repos.txt",
416 | help="Filename to save top repositories (default: top_repos.txt)")
417 | args = parser.parse_args()
418 |
419 | config = load_config()
420 | token = config.get('github_token')
421 |
422 | config_file = 'config.json'
423 |
424 | print(f"\n{Fore.CYAN}{'=' * 60}")
425 | print(f"{Fore.YELLOW}GitHub Stars Analysis")
426 | print(f"{Fore.CYAN}{'=' * 60}\n")
427 |
428 | # Check and display rate limit info
429 | remaining, reset_time, used, total = check_rate_limit(token)
430 | if remaining is not None:
431 | print(f"{Fore.CYAN}GitHub API Rate Limit Status:")
432 | print(f"{Fore.GREEN}Remaining: {remaining}/{total} requests")
433 | print(f"{Fore.YELLOW}Used: {used} requests")
434 | print(f"{Fore.CYAN}Reset Time: {reset_time}\n")
435 |
436 | initial_ignored = load_ignored_repos()
437 | if initial_ignored:
438 | print(f"{Fore.YELLOW}Ignoring {len(initial_ignored)} repositories listed in ignored_repos.txt")
439 |
440 | print(f"{Fore.GREEN}Processing top {Fore.YELLOW}{args.top_accounts} {Fore.GREEN}accounts...")
441 | print(f"{Fore.GREEN}Considering {Fore.YELLOW}{args.stars_per_account} {Fore.GREEN}newest stars per account...")
442 | all_stars, total_stars_considered, successful_requests, failed_requests = process_accounts(config_file, args.top_accounts, token, args)
443 |
444 | # These counts will be shown in display_distribution() with ignored repos excluded
445 |
446 | display_distribution(all_stars, initial_ignored)
447 |
448 | sorted_repos = create_ranking(all_stars, args.final_ranking, initial_ignored)
449 |
450 | # Generate timestamp for this run
451 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
452 |
453 | # Write all repository data before displaying
454 | all_sorted_repos = create_ranking(all_stars, len(all_stars), initial_ignored) # Get all repos
455 | write_repo_data(all_sorted_repos, initial_ignored, timestamp)
456 | print(f"\n{Fore.CYAN}Report saved to:")
457 | print(f"{Fore.GREEN} - reports/repo_report_{timestamp}.txt (human readable)")
458 | print(f"{Fore.GREEN} - data/repo_data_{timestamp}.json (machine readable)")
459 |
460 | # Save top N repos if requested
461 | if args.save_top is not None:
462 | top_n = min(args.save_top, len(sorted_repos))
463 | with open(args.output_file, 'w') as f:
464 | for i, (repo, usernames) in enumerate(sorted_repos[:top_n], 1):
465 | repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo)
466 | f.write(f"{i}. {repo} (Starred by {len(usernames)} users)\n")
467 | f.write(f" URL: {repo_url}\n")
468 | print(f"\n{Fore.GREEN}Saved top {top_n} repositories to {args.output_file}")
469 |
470 | display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=initial_ignored)
471 |
472 | # Show final statistics
473 | print(f"\n{Fore.CYAN}{'=' * 60}")
474 | print(f"{Fore.YELLOW}Request Statistics")
475 | print(f"{Fore.CYAN}{'=' * 60}\n")
476 |
477 | end_time = time.time()
478 | elapsed_time = end_time - start_time
479 | stars_per_second = total_stars_considered / elapsed_time if elapsed_time > 0 else 0
480 |
481 | print(f"{Fore.CYAN}Stars processed: {Fore.GREEN}{total_stars_considered}")
482 | print(f"\n{Fore.CYAN}Speed Statistics:")
483 | print(f"{Fore.CYAN}Total time: {Fore.GREEN}{elapsed_time:.1f} seconds")
484 | print(f"{Fore.CYAN}Processing speed: {Fore.GREEN}{stars_per_second:.1f} stars/second")
485 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | tqdm
3 | colorama
4 | python-dotenv
5 | matplotlib
6 | watchdog
7 |
--------------------------------------------------------------------------------