├── .gitignore
├── LICENSE
├── README.md
├── config.json
├── github_api_status.py
├── github_following.py
├── github_stars.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | # Ignore aider files
171 | .aider.chat.history.md
172 | .aider.input.history
173 | .aider.tags.cache.v3/
174 | 
175 | # Ignore the changes patch file
176 | changes.patch
177 | 
178 | # Ignore config and data files
179 | config.json
180 | github_following.csv
181 | ignored_repos.txt
182 | .aider*
183 | 
184 | github_following_repo_prediction.csv
185 | old_account_csvs/
186 | 
187 | *.csv
188 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Tom Dörr
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 | 
 3 | # 🌟 GitHub Analytics Tools
 4 | 
 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg?style=flat-square)](LICENSE)
 6 | [![Python](https://img.shields.io/badge/Python-3.6+-blue.svg?style=flat-square&logo=python&logoColor=white)](https://www.python.org)
 7 | [![GitHub API](https://img.shields.io/badge/GitHub-API-green.svg?style=flat-square&logo=github)](https://docs.github.com/en/rest)
 8 | 
 9 | Powerful Python scripts for analyzing GitHub user data, including following relationships and starred repositories.
10 | 
11 | </div>
12 | 
13 | ## 🚀 Features
14 | 
15 | - 📊 Analyze following relationships between GitHub users
16 | - ⭐ Track and rank starred repositories
17 | - 📈 Generate detailed statistics and reports
18 | - 🔄 Real-time data processing
19 | - 📋 CSV export functionality
20 | 
21 | ## 🛠️ Installation
22 | 
23 | 1. Clone the repository:
24 | ```bash
25 | git clone https://github.com/tom-doerr/github-analytics-tools.git
26 | cd github-analytics-tools
27 | ```
28 | 
29 | 2. Install dependencies:
30 | ```bash
31 | pip install -r requirements.txt
32 | ```
33 | 
34 | 3. Configure your GitHub token:
35 |    - Use environment variables:
36 | ```bash
37 | export GITHUB_TOKEN=your_github_token_here
38 | ```
39 | 
40 | ## 📚 Usage
41 | 
42 | ### Following Analysis
43 | 
44 | ```bash
45 | python github_following.py <username> [--count <number>]
46 | ```
47 | 
48 | Options:
49 | - `username`: Target GitHub username
50 | - `--count`: Number of following accounts to analyze (default: 100)
51 | 
52 | ### Stars Analysis
53 | 
54 | ```bash
55 | python github_stars.py [--top-accounts <number>] [--top-repos <number>]
56 | ```
57 | 
58 | Options:
59 | - `--top-accounts`: Number of top accounts to analyze (default: 100)
60 | - `--top-repos`: Number of top repositories to show (default: 40)
61 | - `--final-ranking`: Items in final ranking (default: 50)
62 | 
63 | ## 📋 Configuration Files
64 | 
65 | - `config.json`: Basic settings
66 | - `.env`: Environment variables
67 | - `ignored_repos.txt`: Repositories to exclude
68 | 
69 | ## 📊 Output
70 | 
71 | - CSV files with following data
72 | - Console-based repository rankings
73 | - Detailed statistics and reports
74 | 
75 | ## 🤝 Contributing
76 | 
77 | Contributions are welcome! Please feel free to:
78 | - Fork the repository
79 | - Create a feature branch
80 | - Submit a Pull Request
81 | 
82 | ## ⚖️ License
83 | 
84 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
85 | 
86 | ## ⚠️ Disclaimer
87 | 
88 | This tool is for educational purposes. Please comply with GitHub's terms of service and API usage limits.
89 | 


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "count": 5
3 | }
4 | 


--------------------------------------------------------------------------------
/github_api_status.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import requests
 5 | from datetime import datetime
 6 | import time
 7 | from dotenv import load_dotenv
 8 | 
 9 | def get_rate_limits():
10 |     """Get GitHub API rate limit information"""
11 |     load_dotenv()
12 |     github_token = os.getenv('GITHUB_TOKEN')
13 |     if not github_token:
14 |         print("Error: GITHUB_TOKEN not found in .env file")
15 |         return None
16 | 
17 |     headers = {
18 |         'Authorization': f'token {github_token}',
19 |         'Accept': 'application/vnd.github.v3+json'
20 |     }
21 | 
22 |     try:
23 |         # Get rate limit info
24 |         response = requests.get('https://api.github.com/rate_limit', headers=headers)
25 |         response.raise_for_status()
26 |         return response.json()
27 | 
28 |     except requests.exceptions.RequestException as e:
29 |         print(f"Error accessing GitHub API: {e}")
30 |         return None
31 | 
32 | def format_time_until_reset(reset_timestamp):
33 |     """Format the time until rate limit reset"""
34 |     now = datetime.now().timestamp()
35 |     time_left = reset_timestamp - now
36 |     
37 |     if time_left <= 0:
38 |         return "Reset time has passed"
39 |     
40 |     minutes, seconds = divmod(int(time_left), 60)
41 |     hours, minutes = divmod(minutes, 60)
42 |     
43 |     parts = []
44 |     if hours > 0:
45 |         parts.append(f"{hours}h")
46 |     if minutes > 0:
47 |         parts.append(f"{minutes}m")
48 |     parts.append(f"{seconds}s")
49 |     
50 |     return " ".join(parts)
51 | 
52 | def display_api_status():
53 |     """Display GitHub API status information"""
54 |     rate_limits = get_rate_limits()
55 |     if not rate_limits:
56 |         return
57 | 
58 |     print("\nGitHub API Status:")
59 |     print("=" * 50)
60 | 
61 |     # Integration manifest API limits
62 |     integration = rate_limits['resources']['integration_manifest']
63 |     print("\nIntegration Manifest API:")
64 |     print(f"  Remaining calls: {integration['remaining']}/{integration['limit']}")
65 |     print(f"  Reset in: {format_time_until_reset(integration['reset'])}")
66 |     print(f"  Usage: {((integration['limit'] - integration['remaining']) / integration['limit'] * 100):.1f}%")
67 | 
68 |     # Graphql API limits
69 |     graphql = rate_limits['resources']['graphql']
70 |     print("\nGraphQL API:")
71 |     print(f"  Remaining calls: {graphql['remaining']}/{graphql['limit']}")
72 |     print(f"  Reset in: {format_time_until_reset(graphql['reset'])}")
73 |     print(f"  Usage: {((graphql['limit'] - graphql['remaining']) / graphql['limit'] * 100):.1f}%")
74 | 
75 |     # Search API limits
76 |     search = rate_limits['resources']['search']
77 |     print("\nSearch API:")
78 |     print(f"  Remaining calls: {search['remaining']}/{search['limit']}")
79 |     print(f"  Reset in: {format_time_until_reset(search['reset'])}")
80 |     print(f"  Usage: {((search['limit'] - search['remaining']) / search['limit'] * 100):.1f}%")
81 | 
82 |     # Core API limits
83 |     core = rate_limits['resources']['core']
84 |     print("\nCore API:")
85 |     print(f"  Remaining calls: {core['remaining']}/{core['limit']}")
86 |     print(f"  Reset in: {format_time_until_reset(core['reset'])}")
87 |     print(f"  Usage: {((core['limit'] - core['remaining']) / core['limit'] * 100):.1f}%")
88 | 
89 | if __name__ == "__main__":
90 |     display_api_status()
91 | 


--------------------------------------------------------------------------------
/github_following.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import requests
  4 | import csv
  5 | import os
  6 | import argparse
  7 | import time
  8 | from requests.auth import HTTPBasicAuth
  9 | from colorama import init, Fore, Style
 10 | from dotenv import load_dotenv
 11 | 
 12 | # Initialize colorama
 13 | init(autoreset=True)
 14 | 
 15 | def load_config():
 16 |     load_dotenv()
 17 |     return {
 18 |         'github_token': os.getenv('GITHUB_TOKEN')
 19 |     }
 20 | 
 21 | def make_github_request(url, params=None, token=None):
 22 |     max_retries = 5
 23 |     base_delay = 1
 24 |     headers = {'Authorization': f'token {token}'} if token else {}
 25 | 
 26 |     for attempt in range(max_retries):
 27 |         try:
 28 |             response = requests.get(url, params=params, headers=headers)
 29 |             response.raise_for_status()
 30 |             return response.json()
 31 |         except requests.exceptions.HTTPError as e:
 32 |             if e.response.status_code == 403 and 'rate limit exceeded' in str(e).lower():
 33 |                 if attempt < max_retries - 1:
 34 |                     delay = base_delay * (2 ** attempt)
 35 |                     print(f"Rate limit exceeded. Retrying in {delay} seconds...")
 36 |                     time.sleep(delay)
 37 |                 else:
 38 |                     print(f"Error: Rate limit exceeded. Max retries reached.")
 39 |                     return None
 40 |             else:
 41 |                 print(f"HTTP error occurred: {e}")
 42 |                 return None
 43 |         except requests.RequestException as e:
 44 |             print(f"Error: Unable to fetch data. {e}")
 45 |             return None
 46 |         
 47 |         time.sleep(1)  # Add a small delay between requests
 48 | 
 49 | def get_following(username, count=100, token=None):
 50 |     url = f"https://api.github.com/users/{username}/following"
 51 |     params = {"per_page": count}
 52 |     
 53 |     following = make_github_request(url, params, token)
 54 |     
 55 |     if following is None:
 56 |         return []
 57 |     elif not following:
 58 |         print(f"No following accounts found for {username}")
 59 |         return []
 60 |     
 61 |     return following
 62 | 
 63 | def get_follower_count(username, token=None):
 64 |     url = f"https://api.github.com/users/{username}"
 65 |     
 66 |     user_data = make_github_request(url, token=token)
 67 |     
 68 |     if user_data is None:
 69 |         return None
 70 |     
 71 |     return user_data.get('followers')
 72 | 
 73 | def write_to_csv(username, following, csv_file, token):
 74 |     file_exists = os.path.isfile(csv_file)
 75 |     
 76 |     with open(csv_file, 'a', newline='') as f:
 77 |         writer = csv.writer(f)
 78 |         if not file_exists:
 79 |             writer.writerow(['Account', 'Followers', 'Following'])
 80 |         
 81 |         existing_accounts = set()
 82 |         if file_exists:
 83 |             with open(csv_file, 'r') as f:
 84 |                 reader = csv.reader(f)
 85 |                 next(reader)  # Skip header
 86 |                 existing_accounts = set(row[0] for row in reader)
 87 |         
 88 |         for account in following:
 89 |             if account['login'] not in existing_accounts:
 90 |                 follower_count = get_follower_count(account['login'], token)
 91 |                 if follower_count is not None:
 92 |                     writer.writerow([account['login'], follower_count, username])
 93 |                 else:
 94 |                     print(f"Skipping {account['login']} due to error fetching follower count")
 95 | 
 96 | def display_following(username, following, token=None):
 97 |     print(f"\n{Fore.CYAN}{'=' * 40}")
 98 |     print(f"{Fore.YELLOW}Accounts followed by {Fore.GREEN}{username}{Fore.YELLOW}:")
 99 |     print(f"{Fore.CYAN}{'=' * 40}\n")
100 |     for i, account in enumerate(following, 1):
101 |         follower_count = get_follower_count(account['login'], token)
102 |         print(f"{Fore.MAGENTA}{i:3}. {Fore.GREEN}{account['login']} {Fore.RESET}- {account['html_url']}")
103 |         print(f"    {Fore.CYAN}Followers: {Fore.YELLOW}{follower_count}")
104 | 
105 | if __name__ == "__main__":
106 |     parser = argparse.ArgumentParser(description="Fetch GitHub following accounts")
107 |     parser.add_argument("--username", help="GitHub username to fetch following accounts for")
108 |     parser.add_argument("--count", type=int, default=100, help="Number of following accounts to fetch (default: 100)")
109 |     args = parser.parse_args()
110 | 
111 |     config = load_config()
112 |     token = config.get('github_token')
113 |     if not token:
114 |         print(f"{Fore.RED}Error: GitHub token not found in .env file.")
115 |         exit(1)
116 |     
117 |     username = args.username or input("Enter a GitHub username: ")
118 |     count = args.count
119 |     
120 |     print(f"\n{Fore.CYAN}{'=' * 40}")
121 |     print(f"{Fore.YELLOW}GitHub Following Analysis")
122 |     print(f"{Fore.CYAN}{'=' * 40}\n")
123 | 
124 |     following = get_following(username, count, token)
125 |     if following:
126 |         display_following(username, following, token)
127 |         
128 |         csv_file = 'github_following.csv'
129 |         write_to_csv(username, following, csv_file, token)
130 |         print(f"\n{Fore.GREEN}Data has been written to {Fore.YELLOW}{csv_file}")
131 |     else:
132 |         print(f"{Fore.RED}No data found for user: {Fore.YELLOW}{username}")
133 | 
134 |     print(f"\n{Fore.CYAN}{'=' * 40}")
135 |     print(f"{Fore.YELLOW}Analysis Complete")
136 |     print(f"{Fore.CYAN}{'=' * 40}")
137 | 


--------------------------------------------------------------------------------
/github_stars.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import requests
  4 | from requests.adapters import HTTPAdapter
  5 | from urllib3.util.retry import Retry
  6 | import json
  7 | import csv
  8 | import argparse
  9 | import os
 10 | import subprocess
 11 | import concurrent.futures
 12 | from collections import defaultdict, Counter
 13 | from requests.auth import HTTPBasicAuth
 14 | from tqdm import tqdm
 15 | import matplotlib.pyplot as plt
 16 | from colorama import init, Fore, Style
 17 | from dotenv import load_dotenv
 18 | from watchdog.observers import Observer
 19 | from watchdog.events import FileSystemEventHandler
 20 | import time
 21 | from datetime import datetime
 22 | import os
 23 | import pathlib
 24 | import json
 25 | 
 26 | # Initialize colorama
 27 | init(autoreset=True)
 28 | 
 29 | # Load environment variables
 30 | load_dotenv()
 31 | 
 32 | def load_config():
 33 |     with open('config.json', 'r') as f:
 34 |         config = json.load(f)
 35 |     config['github_token'] = os.getenv('GITHUB_TOKEN')
 36 |     return config
 37 | 
 38 | def load_ignored_repos():
 39 |     try:
 40 |         with open('ignored_repos.txt', 'r') as f:
 41 |             return set(line.strip() for line in f if line.strip() and not line.startswith('#'))
 42 |     except FileNotFoundError:
 43 |         print(f"{Fore.YELLOW}Warning: ignored_repos.txt not found. No repositories will be ignored.")
 44 |         return set()
 45 | 
 46 | def add_to_ignored_repos(repo):
 47 |     with open('ignored_repos.txt', 'a') as f:
 48 |         f.write(f"{repo}\n")
 49 | 
 50 | def check_rate_limit(token=None):
 51 |     headers = {'Authorization': f'token {token}'} if token else {}
 52 |     response = requests.get('https://api.github.com/rate_limit', headers=headers)
 53 |     if response.status_code == 200:
 54 |         limits = response.json()['resources']['core']
 55 |         remaining = limits['remaining']
 56 |         reset_time = datetime.fromtimestamp(limits['reset']).strftime('%H:%M:%S')
 57 |         total = limits['limit']
 58 |         used = total - remaining
 59 |         return remaining, reset_time, used, total
 60 |     return None, None, None, None
 61 | 
 62 | def create_session():
 63 |     session = requests.Session()
 64 |     retries = Retry(
 65 |         total=5,
 66 |         backoff_factor=1,
 67 |         status_forcelist=[429, 500, 502, 503, 504],
 68 |         allowed_methods=["GET"]
 69 |     )
 70 |     session.mount('https://', HTTPAdapter(max_retries=retries))
 71 |     return session
 72 | 
 73 | def get_newest_stars(username, count, token):
 74 |     # Use debug level logging instead of print
 75 |     if os.getenv('DEBUG'):
 76 |         tqdm.write(f"{Fore.CYAN}Fetching stars for user: {username}")
 77 |     url = f"https://api.github.com/users/{username}/starred?timestamp=1"
 78 |     params = {
 79 |         "sort": "created",
 80 |         "direction": "desc",
 81 |         "per_page": count
 82 |     }
 83 |     headers = {'Authorization': f'token {token}'} if token else {}
 84 |     
 85 |     session = create_session()
 86 |     try:
 87 |         response = session.get(url, params=params, headers=headers, timeout=30)
 88 |         response.raise_for_status()
 89 |     except requests.exceptions.HTTPError as e:
 90 |         if e.response.status_code == 403:
 91 |             if 'X-RateLimit-Remaining' in e.response.headers:
 92 |                 remaining = e.response.headers['X-RateLimit-Remaining']
 93 |                 reset_time = time.strftime('%H:%M:%S', time.localtime(int(e.response.headers['X-RateLimit-Reset'])))
 94 |                 print(f"{Fore.RED}Error: Rate limit exceeded for {username}. "
 95 |                       f"Remaining requests: {remaining}. Reset time: {reset_time}")
 96 |             else:
 97 |                 print(f"{Fore.RED}Error: Rate limit exceeded or authentication required for {username}. "
 98 |                       f"Check your GitHub token or wait a while.")
 99 |         else:
100 |             print(f"{Fore.RED}Error: Unable to fetch data for {username}. HTTP {e.response.status_code}")
101 |         return []
102 |     except requests.Timeout:
103 |         print(f"{Fore.RED}Error: Request timed out for {username}. The server took too long to respond.")
104 |         return []
105 |     except requests.ConnectionError:
106 |         print(f"{Fore.RED}Error: Connection failed for {username}. Please check your internet connection.")
107 |         return []
108 |     except requests.RequestException as e:
109 |         print(f"{Fore.RED}Error: Unable to fetch data for {username}. {e}")
110 |         return []
111 |     
112 |     stars = response.json()
113 |     
114 |     if not stars:
115 |         if os.getenv('DEBUG'):
116 |             tqdm.write(f"{Fore.YELLOW}No starred repositories found for {username}")
117 |         return []
118 |     
119 |     return stars
120 | 
121 | def get_top_accounts(csv_file, n):
122 |     accounts = []
123 |     with open(csv_file, 'r') as f:
124 |         reader = csv.reader(f)
125 |         next(reader)  # Skip header
126 |         for row in reader:
127 |             try:
128 |                 # Try old format (username, follower_count)
129 |                 accounts.append((row[0], int(row[1])))
130 |             except ValueError:
131 |                 # New format (username, repo_list)
132 |                 # Use number of repos as the weight
133 |                 repo_count = len(row[1].split(','))
134 |                 accounts.append((row[0], repo_count))
135 |     
136 |     return sorted(accounts, key=lambda x: x[1], reverse=True)[:n]
137 | 
138 | def process_account(args):
139 |     username, count, token = args
140 |     try:
141 |         stars = get_newest_stars(username, count, token)
142 |         if stars:  # If we got any stars back
143 |             return [(star, username) for star in stars], len(stars), True, False
144 |         else:  # If we got an empty list but no exception
145 |             return [], 0, False, True
146 |     except Exception as e:
147 |         print(f"{Fore.RED}Error in process_account for {username}: {str(e)}")
148 |         return [], 0, False, True
149 | 
150 | 
151 | def process_accounts(config_file, top_n, token, args):
152 |     count = args.stars_per_account
153 |     top_accounts = get_top_accounts(args.csv_file, top_n)
154 |     
155 |     # Initialize tracking variables
156 |     all_stars = []
157 |     total_stars_considered = 0
158 |     successful_requests = 0
159 |     failed_requests = 0
160 |     
161 |     print(f"{Fore.CYAN}Starting to process {len(top_accounts)} accounts...\n")
162 |     
163 |     with tqdm(total=len(top_accounts),
164 |              desc="Starting...",
165 |              bar_format='{desc:<30}{percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]') as pbar:
166 |         
167 |         print(f"\n{Fore.CYAN}{'=' * 60}")
168 |         print(f"{Fore.YELLOW}Request Progress")
169 |         print(f"{Fore.CYAN}{'=' * 60}\n")
170 |         
171 |         with concurrent.futures.ThreadPoolExecutor(max_workers=args.parallel) as executor:
172 |             # Prepare arguments for each account
173 |             process_args = [(username, count, token) for username, _ in top_accounts]
174 |             
175 |             # Submit all tasks
176 |             future_to_username = {executor.submit(process_account, arg): arg[0] 
177 |                                 for arg in process_args}
178 |             
179 |             # Process completed tasks as they finish
180 |             for future in concurrent.futures.as_completed(future_to_username):
181 |                 username = future_to_username[future]
182 |                 try:
183 |                     try:
184 |                         stars, stars_count, success, failure = future.result()
185 |                         all_stars.extend(stars)
186 |                         total_stars_considered += stars_count
187 |                         if success:
188 |                             successful_requests += 1
189 |                         if failure:
190 |                             failed_requests += 1
191 |                     except Exception as e:
192 |                         print(f"{Fore.RED}Error processing results for {username}: {str(e)}")
193 |                         failed_requests += 1
194 |                 except Exception as e:
195 |                     print(f"{Fore.RED}Error processing {username}: {e}")
196 |                 
197 |                 display_name = f"{username[:7]}..." if len(username) > 10 else f"{username:<10}"
198 |                 pbar.set_description(f"Processing {display_name}")
199 |                 pbar.update(1)
200 |                 
201 |     
202 |     return all_stars, total_stars_considered, successful_requests, failed_requests
203 | 
204 | def write_repo_data(sorted_repos, ignored_repos, timestamp=None):
205 |     """Write repository data to timestamped files in both human and machine readable formats"""
206 |     if timestamp is None:
207 |         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
208 |     
209 |     # Create reports directories if they don't exist
210 |     reports_dir = pathlib.Path("reports")
211 |     reports_dir.mkdir(exist_ok=True)
212 |     
213 |     data_dir = pathlib.Path("data")
214 |     data_dir.mkdir(exist_ok=True)
215 |     
216 |     # Create the human-readable report file
217 |     report_file = reports_dir / f"repo_report_{timestamp}.txt"
218 |     
219 |     with open(report_file, "w") as f:
220 |         f.write(f"Repository Report - Generated at {timestamp}\n")
221 |         f.write("=" * 80 + "\n\n")
222 |         
223 |         for repo, usernames in sorted_repos:
224 |             is_ignored = repo in ignored_repos
225 |             f.write(f"Repository: {repo}\n")
226 |             f.write(f"Stars: {len(usernames)}\n")
227 |             f.write(f"Status: {'Previously Displayed' if is_ignored else 'New'}\n")
228 |             f.write("Starred by:\n")
229 |             for username in usernames:
230 |                 f.write(f"  - {username}\n")
231 |             f.write("\n" + "-" * 40 + "\n\n")
232 |     
233 |     # Create the machine-readable JSON file
234 |     json_file = data_dir / f"repo_data_{timestamp}.json"
235 |     
236 |     json_data = {
237 |         "repositories": [
238 |             {
239 |                 "name": repo,
240 |                 "stars_count": len(usernames),
241 |                 "is_ignored": repo in ignored_repos,
242 |                 "starred_by": usernames
243 |             }
244 |             for repo, usernames in sorted_repos
245 |         ]
246 |     }
247 |     
248 |     with open(json_file, "w") as f:
249 |         json.dump(json_data, f, indent=2)
250 | 
251 | def create_ranking(all_stars, top_repos, ignored_repos=None):
252 |     if ignored_repos is None:
253 |         ignored_repos = set()
254 |     repo_counts = defaultdict(list)
255 |     for star, username in all_stars:
256 |         repo_key = f"{star['owner']['login']}/{star['name']}"
257 |         if repo_key not in ignored_repos:
258 |             repo_counts[repo_key].append(username)
259 |     
260 |     sorted_repos = sorted(repo_counts.items(), key=lambda x: len(x[1]), reverse=True)[:top_repos]
261 |     return sorted_repos
262 | 
263 | def display_distribution(all_stars, ignored_repos=None):
264 |     if ignored_repos is None:
265 |         ignored_repos = set()
266 |         
267 |     # Only count non-ignored repos
268 |     star_counts = Counter()
269 |     for star, _ in all_stars:
270 |         repo_key = f"{star['owner']['login']}/{star['name']}"
271 |         if repo_key not in ignored_repos:
272 |             star_counts[star['id']] += 1
273 |             
274 |     distribution = Counter(star_counts.values())
275 |     
276 |     print(f"\n{Fore.CYAN}{'=' * 60}")
277 |     print(f"{Fore.YELLOW}Star Distribution (Excluding Ignored Repos)")
278 |     print(f"{Fore.CYAN}{'=' * 60}\n")
279 |     
280 |     total_repos = sum(distribution.values())
281 |     total_stars = sum(stars * count for stars, count in distribution.items())
282 |     
283 |     print(f"{Fore.CYAN}Total unique repositories: {Fore.GREEN}{total_repos}")
284 |     print(f"{Fore.CYAN}Total stars across repos: {Fore.GREEN}{total_stars}\n")
285 |     
286 |     # Calculate cumulative counts
287 |     sorted_dist = sorted(distribution.items(), reverse=True)
288 |     cumulative = 0
289 |     
290 |     print(f"{Fore.CYAN}Stars  Repos  Cumulative")
291 |     print(f"{Fore.CYAN}{'=' * 25}")
292 |     
293 |     for stars, count in sorted_dist:
294 |         cumulative += count
295 |         print(f"{Fore.GREEN}{stars:5d}  {Fore.YELLOW}{count:5d}  {Fore.CYAN}{cumulative:5d}")
296 |     
297 |     # Create a bar plot of the distribution
298 |     plt.figure(figsize=(10, 6))
299 |     plt.bar(distribution.keys(), distribution.values(), color='skyblue')
300 |     plt.title('Distribution of Stars Across Repositories')
301 |     plt.xlabel('Number of Stars')
302 |     plt.ylabel('Number of Repositories')
303 |     plt.savefig('star_distribution.png')
304 |     print(f"\n{Fore.CYAN}Distribution plot saved as 'star_distribution.png'")
305 | 
306 | def display_ranking(sorted_repos, interactive=False, all_stars=None, initial_ignored=None):
307 |     # Create browser_opens.log if it doesn't exist
308 |     if not os.path.exists('browser_opens.log'):
309 |         with open('browser_opens.log', 'w') as f:
310 |             f.write("# Log of repositories opened in browser\n")
311 |             f.write("# Format: human_timestamp,unix_timestamp,repository_name\n")
312 | 
313 |     print(f"\n{Fore.CYAN}{'=' * 60}")
314 |     print(f"{Fore.YELLOW}Repository Ranking (Most Popular at Top)")
315 |     print(f"{Fore.CYAN}{'=' * 60}\n")
316 |     
317 |     for i, (repo, usernames) in enumerate(sorted_repos, 1):
318 |         status = "[PREV]" if repo in initial_ignored else ""
319 |         print(f"{Fore.MAGENTA}{i:3}. {status} {Fore.GREEN}{repo}")
320 |         repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo)
321 |         print(f"    {Fore.CYAN}URL: {Fore.BLUE}{repo_url}")
322 |         print(f"    {Fore.CYAN}Starred by {Fore.YELLOW}{len(usernames)} {Fore.CYAN}account(s):")
323 |         print(f"    {Fore.YELLOW}{', '.join(usernames)}")
324 |         print()
325 |         
326 |         if interactive:
327 |             input("Press Enter to continue...")
328 |             # Log before attempting to open browser
329 |             now = datetime.now()
330 |             human_timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
331 |             unix_timestamp = int(now.timestamp())
332 |             with open('browser_opens.log', 'a') as log:
333 |                 log.write(f"{human_timestamp},{unix_timestamp},{repo}\n")
334 |             
335 |             try:
336 |                 subprocess.run(['brave', repo_url], check=True)
337 |             except subprocess.CalledProcessError:
338 |                 print(f"{Fore.RED}Error: Unable to open Brave browser. Make sure it's installed and accessible from the command line.")
339 |             except FileNotFoundError:
340 |                 print(f"{Fore.RED}Error: Brave browser not found. Make sure it's installed and accessible from the command line.")
341 |             add_to_ignored_repos(repo)
342 |             
343 |             # Check for changes to ignored repos after each repo
344 |             new_ignored = recheck_and_display(all_stars, args, initial_ignored)
345 |             if new_ignored:
346 |                 initial_ignored = new_ignored
347 | 
348 | class IgnoreFileHandler(FileSystemEventHandler):
349 |     def __init__(self):
350 |         self.last_modified_by_script = False
351 |         
352 |     def on_modified(self, event):
353 |         if event.src_path.endswith('ignored_repos.txt'):
354 |             if not self.last_modified_by_script:
355 |                 self.handle_external_modification()
356 |             self.last_modified_by_script = False
357 |             
358 |     def handle_external_modification(self):
359 |         # This will be called only for external modifications
360 |         pass
361 | 
362 | def recheck_and_display(all_stars, args, initial_ignored):
363 |     """Recheck ignored repos and redisplay if changed"""
364 |     current_ignored = load_ignored_repos()
365 |     
366 |     # Count how many repos were added
367 |     added = current_ignored - initial_ignored
368 |     removed = initial_ignored - current_ignored
369 |     
370 |     # Only reload if more than one repo was added at once
371 |     if len(added) > 1 or removed:
372 |         # Set flag before modifying the file
373 |         file_handler.last_modified_by_script = True
374 |         
375 |         from datetime import datetime
376 |         now = datetime.now().strftime("%H:%M:%S")
377 |         print(f"\n{Fore.YELLOW}[{now}] Multiple changes detected in ignored repositories!")
378 |         
379 |         if added:
380 |             print(f"{Fore.GREEN}Added to ignore list ({len(added)} repos): {', '.join(added)}")
381 |         if removed:
382 |             print(f"{Fore.RED}Removed from ignore list ({len(removed)} repos): {', '.join(removed)}")
383 |         
384 |         # Create new ranking with updated ignored repos
385 |         sorted_repos = create_ranking(all_stars, args.final_ranking, current_ignored)
386 |         print("\n" + "=" * 80 + "\n")
387 |         print(f"{Fore.CYAN}Refreshed repository ranking:")
388 |         display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=current_ignored)
389 |         
390 |     return current_ignored
391 | 
392 | # Create a global file handler instance
393 | file_handler = IgnoreFileHandler()
394 | 
395 | if __name__ == "__main__":
396 |     # Start timing the execution
397 |     start_time = time.time()
398 |     
399 |     # Set up the file system observer
400 |     observer = Observer()
401 |     observer.schedule(file_handler, path='.', recursive=False)
402 |     observer.start()
403 |     
404 |     parser = argparse.ArgumentParser(description="Fetch GitHub stars for top accounts")
405 |     parser.add_argument("--top-accounts", type=int, default=100, help="Number of top accounts to consider (default: 100)")
406 |     parser.add_argument("--stars-per-account", type=int, default=50, help="Number of newest stars to consider per account (default: 50)")
407 |     parser.add_argument("--final-ranking", type=int, default=100, help="Number of items to show in the final ranking (default: 100)")
408 |     parser.add_argument("--no-interactive", action="store_true", help="Disable interactive mode")
409 |     parser.add_argument("--csv-file", type=str, default='github_following.csv', 
410 |                       help="Path to the GitHub following CSV file (default: github_following.csv)")
411 |     parser.add_argument("--parallel", type=int, default=5,
412 |                       help="Number of parallel requests (default: 5)")
413 |     parser.add_argument("--save-top", type=int,
414 |                       help="Save the top N repositories to a file")
415 |     parser.add_argument("--output-file", type=str, default="top_repos.txt",
416 |                       help="Filename to save top repositories (default: top_repos.txt)")
417 |     args = parser.parse_args()
418 | 
419 |     config = load_config()
420 |     token = config.get('github_token')
421 |     
422 |     config_file = 'config.json'
423 |     
424 |     print(f"\n{Fore.CYAN}{'=' * 60}")
425 |     print(f"{Fore.YELLOW}GitHub Stars Analysis")
426 |     print(f"{Fore.CYAN}{'=' * 60}\n")
427 | 
428 |     # Check and display rate limit info
429 |     remaining, reset_time, used, total = check_rate_limit(token)
430 |     if remaining is not None:
431 |         print(f"{Fore.CYAN}GitHub API Rate Limit Status:")
432 |         print(f"{Fore.GREEN}Remaining: {remaining}/{total} requests")
433 |         print(f"{Fore.YELLOW}Used: {used} requests")
434 |         print(f"{Fore.CYAN}Reset Time: {reset_time}\n")
435 |     
436 |     initial_ignored = load_ignored_repos()
437 |     if initial_ignored:
438 |         print(f"{Fore.YELLOW}Ignoring {len(initial_ignored)} repositories listed in ignored_repos.txt")
439 |     
440 |     print(f"{Fore.GREEN}Processing top {Fore.YELLOW}{args.top_accounts} {Fore.GREEN}accounts...")
441 |     print(f"{Fore.GREEN}Considering {Fore.YELLOW}{args.stars_per_account} {Fore.GREEN}newest stars per account...")
442 |     all_stars, total_stars_considered, successful_requests, failed_requests = process_accounts(config_file, args.top_accounts, token, args)
443 |     
444 |     # These counts will be shown in display_distribution() with ignored repos excluded
445 |     
446 |     display_distribution(all_stars, initial_ignored)
447 |     
448 |     sorted_repos = create_ranking(all_stars, args.final_ranking, initial_ignored)
449 |     
450 |     # Generate timestamp for this run
451 |     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
452 |     
453 |     # Write all repository data before displaying
454 |     all_sorted_repos = create_ranking(all_stars, len(all_stars), initial_ignored)  # Get all repos
455 |     write_repo_data(all_sorted_repos, initial_ignored, timestamp)
456 |     print(f"\n{Fore.CYAN}Report saved to:")
457 |     print(f"{Fore.GREEN}  - reports/repo_report_{timestamp}.txt (human readable)")
458 |     print(f"{Fore.GREEN}  - data/repo_data_{timestamp}.json (machine readable)")
459 |     
460 |     # Save top N repos if requested
461 |     if args.save_top is not None:
462 |         top_n = min(args.save_top, len(sorted_repos))
463 |         with open(args.output_file, 'w') as f:
464 |             for i, (repo, usernames) in enumerate(sorted_repos[:top_n], 1):
465 |                 repo_url = next(star['html_url'] for star, _ in all_stars if f"{star['owner']['login']}/{star['name']}" == repo)
466 |                 f.write(f"{i}. {repo} (Starred by {len(usernames)} users)\n")
467 |                 f.write(f"   URL: {repo_url}\n")
468 |         print(f"\n{Fore.GREEN}Saved top {top_n} repositories to {args.output_file}")
469 | 
470 |     display_ranking(sorted_repos, interactive=not args.no_interactive, all_stars=all_stars, initial_ignored=initial_ignored)
471 | 
472 |     # Show final statistics
473 |     print(f"\n{Fore.CYAN}{'=' * 60}")
474 |     print(f"{Fore.YELLOW}Request Statistics")
475 |     print(f"{Fore.CYAN}{'=' * 60}\n")
476 |     
477 |     end_time = time.time()
478 |     elapsed_time = end_time - start_time
479 |     stars_per_second = total_stars_considered / elapsed_time if elapsed_time > 0 else 0
480 |     
481 |     print(f"{Fore.CYAN}Stars processed: {Fore.GREEN}{total_stars_considered}")
482 |     print(f"\n{Fore.CYAN}Speed Statistics:")
483 |     print(f"{Fore.CYAN}Total time: {Fore.GREEN}{elapsed_time:.1f} seconds")
484 |     print(f"{Fore.CYAN}Processing speed: {Fore.GREEN}{stars_per_second:.1f} stars/second")
485 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | tqdm
3 | colorama
4 | python-dotenv
5 | matplotlib
6 | watchdog
7 | 


--------------------------------------------------------------------------------