├── .gitignore ├── LICENSE ├── README.md ├── config.json ├── github_api_status.py ├── github_following.py ├── github_stars.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | # Ignore aider files 171 | .aider.chat.history.md 172 | .aider.input.history 173 | .aider.tags.cache.v3/ 174 | 175 | # Ignore the changes patch file 176 | changes.patch 177 | 178 | # Ignore config and data files 179 | config.json 180 | github_following.csv 181 | ignored_repos.txt 182 | .aider* 183 | 184 | github_following_repo_prediction.csv 185 | old_account_csvs/ 186 | 187 | *.csv 188 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Tom Dörr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # 🌟 GitHub Analytics Tools 4 | 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg?style=flat-square)](LICENSE) 6 | [![Python](https://img.shields.io/badge/Python-3.6+-blue.svg?style=flat-square&logo=python&logoColor=white)](https://www.python.org) 7 | [![GitHub API](https://img.shields.io/badge/GitHub-API-green.svg?style=flat-square&logo=github)](https://docs.github.com/en/rest) 8 | 9 | Powerful Python scripts for analyzing GitHub user data, including following relationships and starred repositories. 10 | 11 |
12 | 13 | ## 🚀 Features 14 | 15 | - 📊 Analyze following relationships between GitHub users 16 | - ⭐ Track and rank starred repositories 17 | - 📈 Generate detailed statistics and reports 18 | - 🔄 Real-time data processing 19 | - 📋 CSV export functionality 20 | 21 | ## 🛠️ Installation 22 | 23 | 1. Clone the repository: 24 | ```bash 25 | git clone https://github.com/tom-doerr/github-analytics-tools.git 26 | cd github-analytics-tools 27 | ``` 28 | 29 | 2. Install dependencies: 30 | ```bash 31 | pip install -r requirements.txt 32 | ``` 33 | 34 | 3. Configure your GitHub token: 35 | - Use environment variables: 36 | ```bash 37 | export GITHUB_TOKEN=your_github_token_here 38 | ``` 39 | 40 | ## 📚 Usage 41 | 42 | ### Following Analysis 43 | 44 | ```bash 45 | python github_following.py [--count ] 46 | ``` 47 | 48 | Options: 49 | - `username`: Target GitHub username 50 | - `--count`: Number of following accounts to analyze (default: 100) 51 | 52 | ### Stars Analysis 53 | 54 | ```bash 55 | python github_stars.py [--top-accounts ] [--top-repos ] 56 | ``` 57 | 58 | Options: 59 | - `--top-accounts`: Number of top accounts to analyze (default: 100) 60 | - `--top-repos`: Number of top repositories to show (default: 40) 61 | - `--final-ranking`: Items in final ranking (default: 50) 62 | 63 | ## 📋 Configuration Files 64 | 65 | - `config.json`: Basic settings 66 | - `.env`: Environment variables 67 | - `ignored_repos.txt`: Repositories to exclude 68 | 69 | ## 📊 Output 70 | 71 | - CSV files with following data 72 | - Console-based repository rankings 73 | - Detailed statistics and reports 74 | 75 | ## 🤝 Contributing 76 | 77 | Contributions are welcome! Please feel free to: 78 | - Fork the repository 79 | - Create a feature branch 80 | - Submit a Pull Request 81 | 82 | ## ⚖️ License 83 | 84 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 85 | 86 | ## ⚠️ Disclaimer 87 | 88 | This tool is for educational purposes. Please comply with GitHub's terms of service and API usage limits. 89 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "count": 5 3 | } 4 | -------------------------------------------------------------------------------- /github_api_status.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import requests 5 | from datetime import datetime 6 | import time 7 | from dotenv import load_dotenv 8 | 9 | def get_rate_limits(): 10 | """Get GitHub API rate limit information""" 11 | load_dotenv() 12 | github_token = os.getenv('GITHUB_TOKEN') 13 | if not github_token: 14 | print("Error: GITHUB_TOKEN not found in .env file") 15 | return None 16 | 17 | headers = { 18 | 'Authorization': f'token {github_token}', 19 | 'Accept': 'application/vnd.github.v3+json' 20 | } 21 | 22 | try: 23 | # Get rate limit info 24 | response = requests.get('https://api.github.com/rate_limit', headers=headers) 25 | response.raise_for_status() 26 | return response.json() 27 | 28 | except requests.exceptions.RequestException as e: 29 | print(f"Error accessing GitHub API: {e}") 30 | return None 31 | 32 | def format_time_until_reset(reset_timestamp): 33 | """Format the time until rate limit reset""" 34 | now = datetime.now().timestamp() 35 | time_left = reset_timestamp - now 36 | 37 | if time_left <= 0: 38 | return "Reset time has passed" 39 | 40 | minutes, seconds = divmod(int(time_left), 60) 41 | hours, minutes = divmod(minutes, 60) 42 | 43 | parts = [] 44 | if hours > 0: 45 | parts.append(f"{hours}h") 46 | if minutes > 0: 47 | parts.append(f"{minutes}m") 48 | parts.append(f"{seconds}s") 49 | 50 | return " ".join(parts) 51 | 52 | def display_api_status(): 53 | """Display GitHub API status information""" 54 | rate_limits = get_rate_limits() 55 | if not rate_limits: 56 | return 57 | 58 | print("\nGitHub API Status:") 59 | print("=" * 50) 60 | 61 | # Integration manifest API limits 62 | integration = rate_limits['resources']['integration_manifest'] 63 | print("\nIntegration Manifest API:") 64 | print(f" Remaining calls: {integration['remaining']}/{integration['limit']}") 65 | print(f" Reset in: {format_time_until_reset(integration['reset'])}") 66 | print(f" Usage: {((integration['limit'] - integration['remaining']) / integration['limit'] * 100):.1f}%") 67 | 68 | # Graphql API limits 69 | graphql = rate_limits['resources']['graphql'] 70 | print("\nGraphQL API:") 71 | print(f" Remaining calls: {graphql['remaining']}/{graphql['limit']}") 72 | print(f" Reset in: {format_time_until_reset(graphql['reset'])}") 73 | print(f" Usage: {((graphql['limit'] - graphql['remaining']) / graphql['limit'] * 100):.1f}%") 74 | 75 | # Search API limits 76 | search = rate_limits['resources']['search'] 77 | print("\nSearch API:") 78 | print(f" Remaining calls: {search['remaining']}/{search['limit']}") 79 | print(f" Reset in: {format_time_until_reset(search['reset'])}") 80 | print(f" Usage: {((search['limit'] - search['remaining']) / search['limit'] * 100):.1f}%") 81 | 82 | # Core API limits 83 | core = rate_limits['resources']['core'] 84 | print("\nCore API:") 85 | print(f" Remaining calls: {core['remaining']}/{core['limit']}") 86 | print(f" Reset in: {format_time_until_reset(core['reset'])}") 87 | print(f" Usage: {((core['limit'] - core['remaining']) / core['limit'] * 100):.1f}%") 88 | 89 | if __name__ == "__main__": 90 | display_api_status() 91 | -------------------------------------------------------------------------------- /github_following.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import requests 4 | import csv 5 | import os 6 | import argparse 7 | import time 8 | from requests.auth import HTTPBasicAuth 9 | from colorama import init, Fore, Style 10 | from dotenv import load_dotenv 11 | 12 | # Initialize colorama 13 | init(autoreset=True) 14 | 15 | def load_config(): 16 | load_dotenv() 17 | return { 18 | 'github_token': os.getenv('GITHUB_TOKEN') 19 | } 20 | 21 | def make_github_request(url, params=None, token=None): 22 | max_retries = 5 23 | base_delay = 1 24 | headers = {'Authorization': f'token {token}'} if token else {} 25 | 26 | for attempt in range(max_retries): 27 | try: 28 | response = requests.get(url, params=params, headers=headers) 29 | response.raise_for_status() 30 | return response.json() 31 | except requests.exceptions.HTTPError as e: 32 | if e.response.status_code == 403 and 'rate limit exceeded' in str(e).lower(): 33 | if attempt < max_retries - 1: 34 | delay = base_delay * (2 ** attempt) 35 | print(f"Rate limit exceeded. Retrying in {delay} seconds...") 36 | time.sleep(delay) 37 | else: 38 | print(f"Error: Rate limit exceeded. Max retries reached.") 39 | return None 40 | else: 41 | print(f"HTTP error occurred: {e}") 42 | return None 43 | except requests.RequestException as e: 44 | print(f"Error: Unable to fetch data. {e}") 45 | return None 46 | 47 | time.sleep(1) # Add a small delay between requests 48 | 49 | def get_following(username, count=100, token=None): 50 | url = f"https://api.github.com/users/{username}/following" 51 | params = {"per_page": count} 52 | 53 | following = make_github_request(url, params, token) 54 | 55 | if following is None: 56 | return [] 57 | elif not following: 58 | print(f"No following accounts found for {username}") 59 | return [] 60 | 61 | return following 62 | 63 | def get_follower_count(username, token=None): 64 | url = f"https://api.github.com/users/{username}" 65 | 66 | user_data = make_github_request(url, token=token) 67 | 68 | if user_data is None: 69 | return None 70 | 71 | return user_data.get('followers') 72 | 73 | def write_to_csv(username, following, csv_file, token): 74 | file_exists = os.path.isfile(csv_file) 75 | 76 | with open(csv_file, 'a', newline='') as f: 77 | writer = csv.writer(f) 78 | if not file_exists: 79 | writer.writerow(['Account', 'Followers', 'Following']) 80 | 81 | existing_accounts = set() 82 | if file_exists: 83 | with open(csv_file, 'r') as f: 84 | reader = csv.reader(f) 85 | next(reader) # Skip header 86 | existing_accounts = set(row[0] for row in reader) 87 | 88 | for account in following: 89 | if account['login'] not in existing_accounts: 90 | follower_count = get_follower_count(account['login'], token) 91 | if follower_count is not None: 92 | writer.writerow([account['login'], follower_count, username]) 93 | else: 94 | print(f"Skipping {account['login']} due to error fetching follower count") 95 | 96 | def display_following(username, following, token=None): 97 | print(f"\n{Fore.CYAN}{'=' * 40}") 98 | print(f"{Fore.YELLOW}Accounts followed by {Fore.GREEN}{username}{Fore.YELLOW}:") 99 | print(f"{Fore.CYAN}{'=' * 40}\n") 100 | for i, account in enumerate(following, 1): 101 | follower_count = get_follower_count(account['login'], token) 102 | print(f"{Fore.MAGENTA}{i:3}. {Fore.GREEN}{account['login']} {Fore.RESET}- {account['html_url']}") 103 | print(f" {Fore.CYAN}Followers: {Fore.YELLOW}{follower_count}") 104 | 105 | if __name__ == "__main__": 106 | parser = argparse.ArgumentParser(description="Fetch GitHub following accounts") 107 | parser.add_argument("--username", help="GitHub username to fetch following accounts for") 108 | parser.add_argument("--count", type=int, default=100, help="Number of following accounts to fetch (default: 100)") 109 | args = parser.parse_args() 110 | 111 | config = load_config() 112 | token = config.get('github_token') 113 | if not token: 114 | print(f"{Fore.RED}Error: GitHub token not found in .env file.") 115 | exit(1) 116 | 117 | username = args.username or input("Enter a GitHub username: ") 118 | count = args.count 119 | 120 | print(f"\n{Fore.CYAN}{'=' * 40}") 121 | print(f"{Fore.YELLOW}GitHub Following Analysis") 122 | print(f"{Fore.CYAN}{'=' * 40}\n") 123 | 124 | following = get_following(username, count, token) 125 | if following: 126 | display_following(username, following, token) 127 | 128 | csv_file = 'github_following.csv' 129 | write_to_csv(username, following, csv_file, token) 130 | print(f"\n{Fore.GREEN}Data has been written to {Fore.YELLOW}{csv_file}") 131 | else: 132 | print(f"{Fore.RED}No data found for user: {Fore.YELLOW}{username}") 133 | 134 | print(f"\n{Fore.CYAN}{'=' * 40}") 135 | print(f"{Fore.YELLOW}Analysis Complete") 136 | print(f"{Fore.CYAN}{'=' * 40}") 137 | -------------------------------------------------------------------------------- /github_stars.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import requests 4 | from requests.adapters import HTTPAdapter 5 | from urllib3.util.retry import Retry 6 | import json 7 | import csv 8 | import argparse 9 | import os 10 | import subprocess 11 | import concurrent.futures 12 | from collections import defaultdict, Counter 13 | from requests.auth import HTTPBasicAuth 14 | from tqdm import tqdm 15 | import matplotlib.pyplot as plt 16 | from colorama import init, Fore, Style 17 | from dotenv import load_dotenv 18 | from watchdog.observers import Observer 19 | from watchdog.events import FileSystemEventHandler 20 | import time 21 | from datetime import datetime 22 | import os 23 | import pathlib 24 | import json 25 | 26 | # Initialize colorama 27 | init(autoreset=True) 28 | 29 | # Load environment variables 30 | load_dotenv() 31 | 32 | def load_config(): 33 | with open('config.json', 'r') as f: 34 | config = json.load(f) 35 | config['github_token'] = os.getenv('GITHUB_TOKEN') 36 | return config 37 | 38 | def load_ignored_repos(): 39 | try: 40 | with open('ignored_repos.txt', 'r') as f: 41 | return set(line.strip() for line in f if line.strip() and not line.startswith('#')) 42 | except FileNotFoundError: 43 | print(f"{Fore.YELLOW}Warning: ignored_repos.txt not found. No repositories will be ignored.") 44 | return set() 45 | 46 | def add_to_ignored_repos(repo): 47 | with open('ignored_repos.txt', 'a') as f: 48 | f.write(f"{repo}\n") 49 | 50 | def check_rate_limit(token=None): 51 | headers = {'Authorization': f'token {token}'} if token else {} 52 | response = requests.get('https://api.github.com/rate_limit', headers=headers) 53 | if response.status_code == 200: 54 | limits = response.json()['resources']['core'] 55 | remaining = limits['remaining'] 56 | reset_time = datetime.fromtimestamp(limits['reset']).strftime('%H:%M:%S') 57 | total = limits['limit'] 58 | used = total - remaining 59 | return remaining, reset_time, used, total 60 | return None, None, None, None 61 | 62 | def create_session(): 63 | session = requests.Session() 64 | retries = Retry( 65 | total=5, 66 | backoff_factor=1, 67 | status_forcelist=[429, 500, 502, 503, 504], 68 | allowed_methods=["GET"] 69 | ) 70 | session.mount('https://', HTTPAdapter(max_retries=retries)) 71 | return session 72 | 73 | def get_newest_stars(username, count, token): 74 | # Use debug level logging instead of print 75 | if os.getenv('DEBUG'): 76 | tqdm.write(f"{Fore.CYAN}Fetching stars for user: {username}") 77 | url = f"https://api.github.com/users/{username}/starred?timestamp=1" 78 | params = { 79 | "sort": "created", 80 | "direction": "desc", 81 | "per_page": count 82 | } 83 | headers = {'Authorization': f'token {token}'} if token else {} 84 | 85 | session = create_session() 86 | try: 87 | response = session.get(url, params=params, headers=headers, timeout=30) 88 | response.raise_for_status() 89 | except requests.exceptions.HTTPError as e: 90 | if e.response.status_code == 403: 91 | if 'X-RateLimit-Remaining' in e.response.headers: 92 | remaining = e.response.headers['X-RateLimit-Remaining'] 93 | reset_time = time.strftime('%H:%M:%S', time.localtime(int(e.response.headers['X-RateLimit-Reset']))) 94 | print(f"{Fore.RED}Error: Rate limit exceeded for {username}. " 95 | f"Remaining requests: {remaining}. Reset time: {reset_time}") 96 | else: 97 | print(f"{Fore.RED}Error: Rate limit exceeded or authentication required for {username}. " 98 | f"Check your GitHub token or wait a while.") 99 | else: 100 | print(f"{Fore.RED}Error: Unable to fetch data for {username}. HTTP {e.response.status_code}") 101 | return [] 102 | except requests.Timeout: 103 | print(f"{Fore.RED}Error: Request timed out for {username}. The server took too long to respond.") 104 | return [] 105 | except requests.ConnectionError: 106 | print(f"{Fore.RED}Error: Connection failed for {username}. Please check your internet connection.") 107 | return [] 108 | except requests.RequestException as e: 109 | print(f"{Fore.RED}Error: Unable to fetch data for {username}. {e}") 110 | return [] 111 | 112 | stars = response.json() 113 | 114 | if not stars: 115 | if os.getenv('DEBUG'): 116 | tqdm.write(f"{Fore.YELLOW}No starred repositories found for {username}") 117 | return [] 118 | 119 | return stars 120 | 121 | def get_top_accounts(csv_file, n): 122 | accounts = [] 123 | with open(csv_file, 'r') as f: 124 | reader = csv.reader(f) 125 | next(reader) # Skip header 126 | for row in reader: 127 | try: 128 | # Try old format (username, follower_count) 129 | accounts.append((row[0], int(row[1]))) 130 | except ValueError: 131 | # New format (username, repo_list) 132 | # Use number of repos as the weight 133 | repo_count = len(row[1].split(',')) 134 | accounts.append((row[0], repo_count)) 135 | 136 | return sorted(accounts, key=lambda x: x[1], reverse=True)[:n] 137 | 138 | def process_account(args): 139 | username, count, token = args 140 | try: 141 | stars = get_newest_stars(username, count, token) 142 | if stars: # If we got any stars back 143 | return [(star, username) for star in stars], len(stars), True, False 144 | else: # If we got an empty list but no exception 145 | return [], 0, False, True 146 | except Exception as e: 147 | print(f"{Fore.RED}Error in process_account for {username}: {str(e)}") 148 | return [], 0, False, True 149 | 150 | 151 | def process_accounts(config_file, top_n, token, args): 152 | count = args.stars_per_account 153 | top_accounts = get_top_accounts(args.csv_file, top_n) 154 | 155 | # Initialize tracking variables 156 | all_stars = [] 157 | total_stars_considered = 0 158 | successful_requests = 0 159 | failed_requests = 0 160 | 161 | print(f"{Fore.CYAN}Starting to process {len(top_accounts)} accounts...\n") 162 | 163 | with tqdm(total=len(top_accounts), 164 | desc="Starting...", 165 | bar_format='{desc:<30}{percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]') as pbar: 166 | 167 | print(f"\n{Fore.CYAN}{'=' * 60}") 168 | print(f"{Fore.YELLOW}Request Progress") 169 | print(f"{Fore.CYAN}{'=' * 60}\n") 170 | 171 | with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor: 172 | # Prepare arguments for each account 173 | process_args = [(username, count, token) for username, _ in top_accounts] 174 | 175 | # Submit all tasks 176 | future_to_username = {executor.submit(process_account, arg): arg[0] 177 | for arg in process_args} 178 | 179 | # Process completed tasks as they finish 180 | for future in concurrent.futures.as_completed(future_to_username): 181 | username = future_to_username[future] 182 | try: 183 | try: 184 | stars, stars_count, success, failure = future.result() 185 | all_stars.extend(stars) 186 | total_stars_considered += stars_count 187 | if success: 188 | successful_requests += 1 189 | if failure: 190 | failed_requests += 1 191 | except Exception as e: 192 | print(f"{Fore.RED}Error processing results for {username}: {str(e)}") 193 | failed_requests += 1 194 | except Exception as e: 195 | print(f"{Fore.RED}Error processing {username}: {e}") 196 | 197 | display_name = f"{username[:7]}..." if len(username) > 10 else f"{username:<10}" 198 | pbar.set_description(f"Processing {display_name}") 199 | pbar.update(1) 200 | 201 | 202 | return all_stars, total_stars_considered, successful_requests, failed_requests 203 | 204 | def write_repo_data(sorted_repos, ignored_repos, timestamp=None): 205 | """Write repository data to timestamped files in both human and machine readable formats""" 206 | if timestamp is None: 207 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 208 | 209 | # Create reports directories if they don't exist 210 | reports_dir = pathlib.Path("reports") 211 | reports_dir.mkdir(exist_ok=True) 212 | 213 | data_dir = pathlib.Path("data") 214 | data_dir.mkdir(exist_ok=True) 215 | 216 | # Create the human-readable report file 217 | report_file = reports_dir / f"repo_report_{timestamp}.txt" 218 | 219 | with open(report_file, "w") as f: 220 | f.write(f"Repository Report - Generated at {timestamp}\n") 221 | f.write("=" * 80 + "\n\n") 222 | 223 | for repo, usernames in sorted_repos: 224 | is_ignored = repo in ignored_repos 225 | f.write(f"Repository: {repo}\n") 226 | f.write(f"Stars: {len(usernames)}\n") 227 | f.write(f"Status: {'Previously Displayed' if is_ignored else 'New'}\n") 228 | f.write("Starred by:\n") 229 | for username in usernames: 230 | f.write(f" - {username}\n") 231 | f.write("\n" + "-" * 40 + "\n\n") 232 | 233 | # Create the machine-readable JSON file 234 | json_file = data_dir / f"repo_data_{timestamp}.json" 235 | 236 | json_data = { 237 | "repositories": [ 238 | { 239 | "name": repo, 240 | "stars_count": len(usernames), 241 | "is_ignored": repo in ignored_repos, 242 | "starred_by": usernames 243 | } 244 | for repo, usernames in sorted_repos 245 | ] 246 | } 247 | 248 | with open(json_file, "w") as f: 249 | json.dump(json_data, f, indent=2) 250 | 251 | def create_ranking(all_stars, top_repos, ignored_repos=None): 252 | if ignored_repos is None: 253 | ignored_repos = set() 254 | repo_counts = defaultdict(list) 255 | for star, username in all_stars: 256 | repo_key = f"{star['owner']['login']}/{star['name']}" 257 | if repo_key not in ignored_repos: 258 | repo_counts[repo_key].append(username) 259 | 260 | sorted_repos = sorted(repo_counts.items(), key=lambda x: len(x[1]), reverse=True)[:top_repos] 261 | return sorted_repos 262 | 263 | def display_distribution(all_stars, ignored_repos=None): 264 | if ignored_repos is None: 265 | ignored_repos = set() 266 | 267 | # Only count non-ignored repos 268 | star_counts = Counter() 269 | for star, _ in all_stars: 270 | repo_key = f"{star['owner']['login']}/{star['name']}" 271 | if repo_key not in ignored_repos: 272 | star_counts[star['id']] += 1 273 | 274 | distribution = Counter(star_counts.values()) 275 | 276 | print(f"\n{Fore.CYAN}{'=' * 60}") 277 | print(f"{Fore.YELLOW}Star Distribution (Excluding Ignored Repos)") 278 | print(f"{Fore.CYAN}{'=' * 60}\n") 279 | 280 | total_repos = sum(distribution.values()) 281 | total_stars = sum(stars * count for stars, count in distribution.items()) 282 | 283 | print(f"{Fore.CYAN}Total unique repositories: {Fore.GREEN}{total_repos}") 284 | print(f"{Fore.CYAN}Total stars across repos: {Fore.GREEN}{total_stars}\n") 285 | 286 | # Calculate cumulative counts 287 | sorted_dist = sorted(distribution.items(), reverse=True) 288 | cumulative = 0 289 | 290 | print(f"{Fore.CYAN}Stars Repos Cumulative") 291 | print(f"{Fore.CYAN}{'=' * 25}") 292 | 293 | for stars, count in sorted_dist: 294 | cumulative += count 295 | print(f"{Fore.GREEN}{stars:5d} {Fore.YELLOW}{count:5d} {Fore.CYAN}{cumulative:5d}") 296 | 297 | # Create a bar plot of the distribution 298 | plt.figure(figsize=(10, 6)) 299 | plt.bar(distribution.keys(), distribution.values(), color='skyblue') 300 | plt.title('Distribution of Stars Across Repositories') 301 | plt.xlabel('Number of Stars') 302 | plt.ylabel('Number of Repositories') 303 | plt.savefig('star_distribution.png') 304 | print(f"\n{Fore.CYAN}Distribution plot saved as 'star_distribution.png'") 305 | 306 | def display_ranking(sorted_repos, interactive=False, all_stars=None, initial_ignored=None): 307 | # Create browser_opens.log if it doesn't exist 308 | if not os.path.exists('browser_opens.log'): 309 | with open('browser_opens.log', 'w') as f: 310 | f.write("# Log of repositories opened in browser\n") 311 | f.write("# Format: human_timestamp,unix_timestamp,repository_name\n") 312 | 313 | print(f"\n{Fore.CYAN}{'=' * 60}") 314 | print(f"{Fore.YELLOW}Repository Ranking (Most Popular at Top)") 315 | print(f"{Fore.CYAN}{'=' * 60}\n") 316 | 317 | for i, (repo, usernames) in enumerate(sorted_repos, 1): 318 | status = "[PREV]" if repo in initial_ignored else "" 319 | print(f"{Fore.MAGENTA}{i:3}. {status} {Fore.GREEN}{repo}") 320 | repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo) 321 | print(f" {Fore.CYAN}URL: {Fore.BLUE}{repo_url}") 322 | print(f" {Fore.CYAN}Starred by {Fore.YELLOW}{len(usernames)} {Fore.CYAN}account(s):") 323 | print(f" {Fore.YELLOW}{', '.join(usernames)}") 324 | print() 325 | 326 | if interactive: 327 | input("Press Enter to continue...") 328 | # Log before attempting to open browser 329 | now = datetime.now() 330 | human_timestamp = now.strftime("%Y-%m-%d %H:%M:%S") 331 | unix_timestamp = int(now.timestamp()) 332 | with open('browser_opens.log', 'a') as log: 333 | log.write(f"{human_timestamp},{unix_timestamp},{repo}\n") 334 | 335 | try: 336 | subprocess.run(['brave', repo_url], check=True) 337 | except subprocess.CalledProcessError: 338 | print(f"{Fore.RED}Error: Unable to open Brave browser. Make sure it's installed and accessible from the command line.") 339 | except FileNotFoundError: 340 | print(f"{Fore.RED}Error: Brave browser not found. Make sure it's installed and accessible from the command line.") 341 | add_to_ignored_repos(repo) 342 | 343 | # Check for changes to ignored repos after each repo 344 | new_ignored = recheck_and_display(all_stars, args, initial_ignored) 345 | if new_ignored: 346 | initial_ignored = new_ignored 347 | 348 | class IgnoreFileHandler(FileSystemEventHandler): 349 | def __init__(self): 350 | self.last_modified_by_script = False 351 | 352 | def on_modified(self, event): 353 | if event.src_path.endswith('ignored_repos.txt'): 354 | if not self.last_modified_by_script: 355 | self.handle_external_modification() 356 | self.last_modified_by_script = False 357 | 358 | def handle_external_modification(self): 359 | # This will be called only for external modifications 360 | pass 361 | 362 | def recheck_and_display(all_stars, args, initial_ignored): 363 | """Recheck ignored repos and redisplay if changed""" 364 | current_ignored = load_ignored_repos() 365 | 366 | # Count how many repos were added 367 | added = current_ignored - initial_ignored 368 | removed = initial_ignored - current_ignored 369 | 370 | # Only reload if more than one repo was added at once 371 | if len(added) > 1 or removed: 372 | # Set flag before modifying the file 373 | file_handler.last_modified_by_script = True 374 | 375 | from datetime import datetime 376 | now = datetime.now().strftime("%H:%M:%S") 377 | print(f"\n{Fore.YELLOW}[{now}] Multiple changes detected in ignored repositories!") 378 | 379 | if added: 380 | print(f"{Fore.GREEN}Added to ignore list ({len(added)} repos): {', '.join(added)}") 381 | if removed: 382 | print(f"{Fore.RED}Removed from ignore list ({len(removed)} repos): {', '.join(removed)}") 383 | 384 | # Create new ranking with updated ignored repos 385 | sorted_repos = create_ranking(all_stars, args.final_ranking, current_ignored) 386 | print("\n" + "=" * 80 + "\n") 387 | print(f"{Fore.CYAN}Refreshed repository ranking:") 388 | display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=current_ignored) 389 | 390 | return current_ignored 391 | 392 | # Create a global file handler instance 393 | file_handler = IgnoreFileHandler() 394 | 395 | if __name__ == "__main__": 396 | # Start timing the execution 397 | start_time = time.time() 398 | 399 | # Set up the file system observer 400 | observer = Observer() 401 | observer.schedule(file_handler, path='.', recursive=False) 402 | observer.start() 403 | 404 | parser = argparse.ArgumentParser(description="Fetch GitHub stars for top accounts") 405 | parser.add_argument("--top-accounts", type=int, default=100, help="Number of top accounts to consider (default: 100)") 406 | parser.add_argument("--stars-per-account", type=int, default=50, help="Number of newest stars to consider per account (default: 50)") 407 | parser.add_argument("--final-ranking", type=int, default=100, help="Number of items to show in the final ranking (default: 100)") 408 | parser.add_argument("--no-interactive", action="store_true", help="Disable interactive mode") 409 | parser.add_argument("--csv-file", type=str, default='github_following.csv', 410 | help="Path to the GitHub following CSV file (default: github_following.csv)") 411 | parser.add_argument("--parallel", type=int, default=5, 412 | help="Number of parallel requests (default: 5)") 413 | parser.add_argument("--save-top", type=int, 414 | help="Save the top N repositories to a file") 415 | parser.add_argument("--output-file", type=str, default="top_repos.txt", 416 | help="Filename to save top repositories (default: top_repos.txt)") 417 | args = parser.parse_args() 418 | 419 | config = load_config() 420 | token = config.get('github_token') 421 | 422 | config_file = 'config.json' 423 | 424 | print(f"\n{Fore.CYAN}{'=' * 60}") 425 | print(f"{Fore.YELLOW}GitHub Stars Analysis") 426 | print(f"{Fore.CYAN}{'=' * 60}\n") 427 | 428 | # Check and display rate limit info 429 | remaining, reset_time, used, total = check_rate_limit(token) 430 | if remaining is not None: 431 | print(f"{Fore.CYAN}GitHub API Rate Limit Status:") 432 | print(f"{Fore.GREEN}Remaining: {remaining}/{total} requests") 433 | print(f"{Fore.YELLOW}Used: {used} requests") 434 | print(f"{Fore.CYAN}Reset Time: {reset_time}\n") 435 | 436 | initial_ignored = load_ignored_repos() 437 | if initial_ignored: 438 | print(f"{Fore.YELLOW}Ignoring {len(initial_ignored)} repositories listed in ignored_repos.txt") 439 | 440 | print(f"{Fore.GREEN}Processing top {Fore.YELLOW}{args.top_accounts} {Fore.GREEN}accounts...") 441 | print(f"{Fore.GREEN}Considering {Fore.YELLOW}{args.stars_per_account} {Fore.GREEN}newest stars per account...") 442 | all_stars, total_stars_considered, successful_requests, failed_requests = process_accounts(config_file, args.top_accounts, token, args) 443 | 444 | # These counts will be shown in display_distribution() with ignored repos excluded 445 | 446 | display_distribution(all_stars, initial_ignored) 447 | 448 | sorted_repos = create_ranking(all_stars, args.final_ranking, initial_ignored) 449 | 450 | # Generate timestamp for this run 451 | timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 452 | 453 | # Write all repository data before displaying 454 | all_sorted_repos = create_ranking(all_stars, len(all_stars), initial_ignored) # Get all repos 455 | write_repo_data(all_sorted_repos, initial_ignored, timestamp) 456 | print(f"\n{Fore.CYAN}Report saved to:") 457 | print(f"{Fore.GREEN} - reports/repo_report_{timestamp}.txt (human readable)") 458 | print(f"{Fore.GREEN} - data/repo_data_{timestamp}.json (machine readable)") 459 | 460 | # Save top N repos if requested 461 | if args.save_top is not None: 462 | top_n = min(args.save_top, len(sorted_repos)) 463 | with open(args.output_file, 'w') as f: 464 | for i, (repo, usernames) in enumerate(sorted_repos[:top_n], 1): 465 | repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo) 466 | f.write(f"{i}. {repo} (Starred by {len(usernames)} users)\n") 467 | f.write(f" URL: {repo_url}\n") 468 | print(f"\n{Fore.GREEN}Saved top {top_n} repositories to {args.output_file}") 469 | 470 | display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=initial_ignored) 471 | 472 | # Show final statistics 473 | print(f"\n{Fore.CYAN}{'=' * 60}") 474 | print(f"{Fore.YELLOW}Request Statistics") 475 | print(f"{Fore.CYAN}{'=' * 60}\n") 476 | 477 | end_time = time.time() 478 | elapsed_time = end_time - start_time 479 | stars_per_second = total_stars_considered / elapsed_time if elapsed_time > 0 else 0 480 | 481 | print(f"{Fore.CYAN}Stars processed: {Fore.GREEN}{total_stars_considered}") 482 | print(f"\n{Fore.CYAN}Speed Statistics:") 483 | print(f"{Fore.CYAN}Total time: {Fore.GREEN}{elapsed_time:.1f} seconds") 484 | print(f"{Fore.CYAN}Processing speed: {Fore.GREEN}{stars_per_second:.1f} stars/second") 485 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | tqdm 3 | colorama 4 | python-dotenv 5 | matplotlib 6 | watchdog 7 | --------------------------------------------------------------------------------