├── .flake8 ├── .github └── workflows │ └── pylint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── .ruff.toml ├── .vscode └── settings.json ├── CITATION.cff ├── LICENSE ├── README.md ├── github.db ├── pics ├── db.png ├── demo.png ├── demo2.png └── warning1.png ├── requirements.txt └── src ├── configs.py ├── main.py ├── manager.py └── utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E501, E203 3 | max-line-length = 200 4 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.12"] 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | cookies.pkl 7 | .progress.txt 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/PyCQA/isort 5 | rev: 5.13.2 6 | hooks: 7 | - id: isort 8 | args: [--profile, black] 9 | 10 | # Using this mirror lets us use mypyc-compiled black, which is about 2x faster 11 | - repo: https://github.com/psf/black-pre-commit-mirror 12 | rev: 24.4.2 13 | hooks: 14 | - id: 15 | black 16 | # It is recommended to specify the latest version of Python 17 | # supported by your project here, or alternatively use 18 | # pre-commit's default_language_version, see 19 | # https://pre-commit.com/#top_level-default_language_version 20 | language_version: python3.12 21 | args: ["--line-length", "200", "--exclude", "migrations/"] 22 | 23 | - repo: https://github.com/astral-sh/ruff-pre-commit 24 | rev: v0.0.285 25 | hooks: 26 | - id: ruff 27 | alias: autoformat 28 | args: [--fix] 29 | 30 | - repo: https://github.com/pycqa/flake8 31 | rev: 7.1.0 32 | hooks: 33 | - id: flake8 34 | exclude: ^tests/(data|examples)/ 35 | 36 | - repo: https://github.com/pre-commit/mirrors-mypy 37 | rev: v1.10.1 38 | hooks: 39 | - id: mypy 40 | args: [--ignore-missing-imports, --no-namespace-packages] 41 | 42 | - repo: https://github.com/pre-commit/pre-commit-hooks 43 | rev: v3.2.0 44 | hooks: 45 | - id: trailing-whitespace 46 | - id: end-of-file-fixer 47 | - id: check-yaml 48 | - id: check-added-large-files 49 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [FORMAT] 2 | max-line-length=200 3 | 4 | [MASTER] 5 | extension-pkg-whitelist=tqdm,selenium,rich,openai 6 | 7 | [TYPECHECK] 8 | ignored-modules=tqdm,selenium,rich,openai 9 | -------------------------------------------------------------------------------- /.ruff.toml: -------------------------------------------------------------------------------- 1 | # Exclude a variety of commonly ignored directories. 2 | exclude = [ 3 | ".bzr", 4 | ".direnv", 5 | ".eggs", 6 | ".git", 7 | ".git-rewrite", 8 | ".hg", 9 | ".ipynb_checkpoints", 10 | ".mypy_cache", 11 | ".nox", 12 | ".pants.d", 13 | ".pyenv", 14 | ".pytest_cache", 15 | ".pytype", 16 | ".ruff_cache", 17 | ".svn", 18 | ".tox", 19 | ".venv", 20 | ".vscode", 21 | "__pypackages__", 22 | "_build", 23 | "buck-out", 24 | "build", 25 | "dist", 26 | "node_modules", 27 | "site-packages", 28 | "venv", 29 | ] 30 | 31 | line-length = 200 32 | target-version = "py312" 33 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "black-formatter.args": [ 3 | "--line-length", 4 | "200" 5 | ] 6 | } -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Hou" 5 | given-names: "Junyi" 6 | orcid: "https://orcid.org/0009-0003-0443-456X" 7 | 8 | title: "ChatGPT-API-Leakage" 9 | version: 1.5 10 | # doi: 10.5281/zenodo.1234 11 | date-released: 2024-02-21 12 | url: "https://github.com/Junyi-99/ChatGPT-API-Leakage" 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Junyi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ChatGPT-API-Scanner 2 | 3 | This tool scans GitHub for available OpenAI API Keys. 4 | 5 | ![Result Demo 1](pics/demo.png) 6 | 7 | > [!NOTE] 8 | > As of `August 21, 2024`, GitHub has enabled push protection to prevent API key leakage, which could significantly impact this repository. 9 | 10 | > [!NOTE] 11 | > As of `March 11, 2024`, secret scanning and push protection will be enabled by default for all new user-owned public repositories that you create. 12 | > Check this announcement [here](https://docs.github.com/en/code-security/getting-started/quickstart-for-securing-your-repository). 13 | 14 | > [!WARNING] 15 | > **⚠️ DISCLAIMER** 16 | > 17 | > THIS PROJECT IS ONLY FOR ***SECURITY RESEARCH*** AND REMINDS OTHERS TO PROTECT THEIR PROPERTY, DO NOT USE IT ILLEGALLY!! 18 | > 19 | > The project authors are not responsible for any consequences resulting from misuse. 20 | 21 | ## Keeping Your API Key Safe 22 | 23 | It's important to keep it safe to prevent unauthorized access. Here are some useful resources: 24 | 25 | - [Best Practices for API Key Safety](https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety) 26 | 27 | - [My API is getting leaked.. need advice!](https://community.openai.com/t/my-api-is-getting-leaked-need-advice/280564) 28 | 29 | - [My OpenAI API Key Leaked! What Should I Do?](https://www.gitguardian.com/remediation/openai-key) 30 | 31 | ## Prerequisites 32 | 33 | This project has been tested and works perfectly on macOS, Windows and WSL2 (see [Run Linux GUI apps on the Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/tutorials/gui-apps)) 34 | 35 | Ensure you have the following installed on your system: 36 | 37 | - Google Chrome 38 | - Python3 39 | 40 | ## Installation 41 | 42 | 1. Clone the repository: 43 | 44 | ```bash 45 | git clone https://github.com/Junyi-99/ChatGPT-API-Leakage 46 | 47 | cd ChatGPT-API-Leakage 48 | ``` 49 | 50 | 2. Install required pypi packages 51 | 52 | ```bash 53 | pip install selenium tqdm openai rich 54 | ``` 55 | 56 | ## Usage 57 | 58 | 1. Run the main script: 59 | 60 | ```bash 61 | python3 src/main.py 62 | ``` 63 | 64 | 2. You will be prompted to log in to your GitHub account in the browser. Please do so. 65 | 66 | That's it! The script will now scan GitHub for available OpenAI API Keys. 67 | 68 | ## Command Line Arguments 69 | 70 | The script supports several command line arguments for customization: 71 | 72 | | Parameter | Description | Default | 73 | |-----------|-------------|---------| 74 | | `--from-iter` | Start scanning from a specific iteration | `None` | 75 | | `--debug` | Enable debug mode for detailed logging | `False` | 76 | | `-ceko, --check-existed-keys-only` | Only check existing keys in the database | `False` | 77 | | `-k, --keywords` | Specify a list of search keywords | Default keyword list | 78 | | `-l, --languages` | Specify a list of programming languages to search | Default language list | 79 | 80 | Examples: 81 | 82 | ```bash 83 | # Start scanning from iteration 100 84 | python3 src/main.py --from-iter 100 85 | 86 | # Only check existing keys 87 | python3 src/main.py --check-existed-keys-only 88 | 89 | # Use custom keywords and languages 90 | python3 src/main.py -k "openai" "chatgpt" -l python javascript 91 | ``` 92 | 93 | ## Results 94 | 95 | The results are stored in the `github.db` SQLite database, which is created in the same directory as the script. 96 | 97 | You can view the contents of this database using any SQLite database browser of your choice. 98 | 99 |
100 | Running Demo 103 |

104 | Running Demo 105 |

106 |
107 | 108 |
109 | Result in DB 112 |

113 | Result stored in SQLite (different API Key status) 114 |

115 |
116 | 117 | ## FAQ 118 | 119 | **Q: Why are you using Selenium instead of the GitHub Search API?** 120 | 121 | A: The official GitHub search API does not support regex search. Only web-based search does. 122 | 123 | **Q: Why are you limiting the programming language in the search instead of searching all languages?** 124 | 125 | A: The web-based search only provides the first 5 pages of results. There are many API keys available. By limiting the language, we can break down the search results and obtain more keys. 126 | 127 | **Q: Why don't you use multithreading?** 128 | 129 | A: Because GitHub searches and OpenAI are rate-limited. Using multithreading does not significantly increase efficiency. 130 | 131 | **Q: Why is the API Key provided in your repository not working?** 132 | 133 | A: The screenshots in this repo demonstrate the tool's ability to scan for available API keys. However, these keys may expire within hours or days. Please use the tool to scan for your own keys instead of relying on the provided examples. 134 | 135 | **Q: What's the push protection?** 136 | 137 | A: see picture. 138 | 139 |

140 | GitHub Push Protection 141 |

142 | -------------------------------------------------------------------------------- /github.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Junyi-99/ChatGPT-API-Scanner/d9e08670d30f7dd850c9cbef56ab211f2189c4ce/github.db -------------------------------------------------------------------------------- /pics/db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Junyi-99/ChatGPT-API-Scanner/d9e08670d30f7dd850c9cbef56ab211f2189c4ce/pics/db.png -------------------------------------------------------------------------------- /pics/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Junyi-99/ChatGPT-API-Scanner/d9e08670d30f7dd850c9cbef56ab211f2189c4ce/pics/demo.png -------------------------------------------------------------------------------- /pics/demo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Junyi-99/ChatGPT-API-Scanner/d9e08670d30f7dd850c9cbef56ab211f2189c4ce/pics/demo2.png -------------------------------------------------------------------------------- /pics/warning1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Junyi-99/ChatGPT-API-Scanner/d9e08670d30f7dd850c9cbef56ab211f2189c4ce/pics/warning1.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core dependencies 2 | selenium>=4.0.0 3 | tqdm>=4.65.0 4 | openai>=1.0.0 5 | rich>=13.0.0 6 | 7 | # Development dependencies 8 | pylint>=3.0.0 9 | flake8>=7.0.0 10 | ruff>=0.2.0 -------------------------------------------------------------------------------- /src/configs.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module is used to store the configurations. 3 | """ 4 | 5 | import re 6 | 7 | # Keywords are not enabled by current version. 8 | KEYWORDS = [ 9 | "CoT", 10 | "DPO", 11 | "RLHF", 12 | "agent", 13 | "ai model", 14 | "aios", 15 | "api key", 16 | "apikey", 17 | "artificial intelligence", 18 | "chain of thought", 19 | "chatbot", 20 | "chatgpt", 21 | "competitor analysis", 22 | "content strategy", 23 | "conversational AI", 24 | "data analysis", 25 | "deep learning", 26 | "direct preference optimization", 27 | "experiment", 28 | "gpt", 29 | "gpt-3", 30 | "gpt-4", 31 | "gpt4", 32 | "key", 33 | "keyword clustering", 34 | "keyword research", 35 | "lab", 36 | "language model experimentation", 37 | "large language model", 38 | "llama.cpp", 39 | "llm", 40 | "long-tail keywords", 41 | "machine learning", 42 | "multi-agent", 43 | "multi-agent systems", 44 | "natural language processing", 45 | "openai", 46 | "personalized AI", 47 | "project", 48 | "rag", 49 | "reinforcement learning from human feedback", 50 | "retrieval-augmented generation", 51 | "search intent", 52 | "semantic search", 53 | "thoughts", 54 | "virtual assistant", 55 | "实验", 56 | "密钥", 57 | "测试", 58 | "语言模型", 59 | ] 60 | 61 | LANGUAGES = [ 62 | "Dotenv", 63 | "Text", 64 | "JavaScript", 65 | "Python", 66 | "TypeScript", 67 | "Dockerfile", 68 | "Markdown", 69 | '"Jupyter Notebook"', 70 | "Shell", 71 | "Java", 72 | "Go", 73 | "C%2B%2B", 74 | "PHP", 75 | ] 76 | 77 | PATHS = [ 78 | "path:.xml OR path:.json OR path:.properties OR path:.sql OR path:.txt OR path:.log OR path:.tmp OR path:.backup OR path:.bak OR path:.enc", 79 | "path:.yml OR path:.yaml OR path:.toml OR path:.ini OR path:.config OR path:.conf OR path:.cfg OR path:.env OR path:.envrc OR path:.prod", 80 | "path:.secret OR path:.private OR path:*.key", 81 | ] 82 | 83 | # regex, have_many_results, result_too_lang 84 | REGEX_LIST = [ 85 | # Named Project API Key (no matter normal or restricted) still valid until Dec 2, 2024 86 | (re.compile(r"sk-proj-[A-Za-z0-9-_]{74}T3BlbkFJ[A-Za-z0-9-_]{73}A"), True, True), 87 | # Old Project API Key 88 | (re.compile(r"sk-proj-[A-Za-z0-9-_]{58}T3BlbkFJ[A-Za-z0-9-_]{58}"), True, True), 89 | # Service Account Key 90 | (re.compile(r"sk-svcacct-[A-Za-z0-9-_]\+T3BlbkFJ[A-Za-z0-9-_]+"), False, False), 91 | (re.compile(r"sk-proj-[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}"), True, False), 92 | # Old key format (deprecated by OpenAI) 93 | (re.compile(r"sk-[a-zA-Z0-9]{48}"), True, False), 94 | ] 95 | -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scan GitHub for available OpenAI API Keys 3 | """ 4 | 5 | import argparse 6 | import logging 7 | import os 8 | import time 9 | from concurrent.futures import ThreadPoolExecutor 10 | 11 | import rich 12 | from selenium import webdriver 13 | from selenium.webdriver.common.by import By 14 | from selenium.webdriver.support import expected_conditions as EC 15 | from selenium.webdriver.support.ui import WebDriverWait 16 | from tqdm import tqdm 17 | 18 | from configs import KEYWORDS, LANGUAGES, PATHS, REGEX_LIST 19 | from manager import CookieManager, DatabaseManager, ProgressManager 20 | from utils import check_key 21 | 22 | FORMAT = "%(message)s" 23 | logging.basicConfig(level=logging.INFO, format=FORMAT, datefmt="[%X]") 24 | log = logging.getLogger("ChatGPT-API-Leakage") 25 | httpx_logger = logging.getLogger("httpx") 26 | httpx_logger.setLevel(logging.WARNING) 27 | 28 | 29 | class APIKeyLeakageScanner: 30 | """ 31 | Scan GitHub for available OpenAI API Keys 32 | """ 33 | 34 | def __init__(self, db_file: str, keywords: list, languages: list): 35 | self.db_file = db_file 36 | self.driver: webdriver.Chrome | None = None 37 | self.cookies: CookieManager | None = None 38 | rich.print(f"📂 Opening database file {self.db_file}") 39 | 40 | self.dbmgr = DatabaseManager(self.db_file) 41 | 42 | self.keywords = keywords 43 | self.languages = languages 44 | self.candidate_urls = [] 45 | for regex, too_many_results, _ in REGEX_LIST: 46 | # Add the paths to the search query 47 | for path in PATHS: 48 | self.candidate_urls.append(f"https://github.com/search?q=(/{regex.pattern}/)+AND+({path})&type=code&ref=advsearch") 49 | 50 | for language in self.languages: 51 | if too_many_results: # if the regex is too many results, then we need to add AND condition 52 | self.candidate_urls.append(f"https://github.com/search?q=(/{regex.pattern}/)+language:{language}&type=code&ref=advsearch") 53 | else: # if the regex is not too many results, then we just need the regex 54 | self.candidate_urls.append(f"https://github.com/search?q=(/{regex.pattern}/)&type=code&ref=advsearch") 55 | 56 | def login_to_github(self): 57 | """ 58 | Login to GitHub 59 | """ 60 | rich.print("🌍 Opening Chrome ...") 61 | 62 | options = webdriver.ChromeOptions() 63 | options.add_argument("--ignore-certificate-errors") 64 | options.add_argument("--ignore-ssl-errors") 65 | 66 | self.driver = webdriver.Chrome(options=options) 67 | self.driver.implicitly_wait(3) 68 | 69 | self.cookies = CookieManager(self.driver) 70 | 71 | cookie_exists = os.path.exists("cookies.pkl") 72 | self.driver.get("https://github.com/login") 73 | 74 | if not cookie_exists: 75 | rich.print("🤗 No cookies found, please login to GitHub first") 76 | input("Press Enter after you logged in: ") 77 | self.cookies.save() 78 | else: 79 | rich.print("🍪 Cookies found, loading cookies") 80 | self.cookies.load() 81 | 82 | self.cookies.verify_user_login() 83 | 84 | def _expand_all_code(self): 85 | """ 86 | Expand all the code in the current page 87 | """ 88 | elements = self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'more match')]") 89 | for element in elements: 90 | element.click() 91 | 92 | def _find_urls_and_apis(self) -> tuple[list[str], list[str]]: 93 | """ 94 | Find all the urls and apis in the current page 95 | """ 96 | apis_found = [] 97 | urls_need_expand = [] 98 | 99 | codes = self.driver.find_elements(by=By.CLASS_NAME, value="code-list") # type: ignore 100 | for element in codes: 101 | apis = [] 102 | # Check all regex for each code block 103 | for regex, _, too_long in REGEX_LIST[2:]: 104 | if not too_long: 105 | apis.extend(regex.findall(element.text)) 106 | 107 | if len(apis) == 0: 108 | # Need to show full code. (because the api key is too long) 109 | # get the tag 110 | a_tag = element.find_element(by=By.XPATH, value=".//a") 111 | urls_need_expand.append(a_tag.get_attribute("href")) 112 | apis_found.extend(apis) 113 | 114 | return apis_found, urls_need_expand 115 | 116 | def _process_url(self, url: str): 117 | """ 118 | Process a search query url 119 | """ 120 | if self.driver is None: 121 | raise ValueError("Driver is not initialized") 122 | 123 | self.driver.get(url) 124 | 125 | while True: # Loop until all the pages are processed 126 | # If current webpage is reached the rate limit, then wait for 30 seconds 127 | if self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'You have exceeded a secondary rate limit')]"): 128 | for _ in tqdm(range(30), desc="⏳ Rate limit reached, waiting ..."): 129 | time.sleep(1) 130 | self.driver.refresh() 131 | continue 132 | 133 | self._expand_all_code() 134 | 135 | apis_found, urls_need_expand = self._find_urls_and_apis() 136 | rich.print(f" 🌕 There are {len(urls_need_expand)} urls waiting to be expanded") 137 | 138 | try: 139 | next_buttons = self.driver.find_elements(by=By.XPATH, value="//a[@aria-label='Next Page']") 140 | rich.print("🔍 Clicking next page") 141 | WebDriverWait(self.driver, 5).until(EC.presence_of_element_located((By.XPATH, "//a[@aria-label='Next Page']"))) 142 | next_buttons = self.driver.find_elements(by=By.XPATH, value="//a[@aria-label='Next Page']") 143 | next_buttons[0].click() 144 | except Exception: # pylint: disable=broad-except 145 | rich.print("⚪️ No more pages") 146 | break 147 | 148 | # Handle the expand_urls 149 | for u in tqdm(urls_need_expand, desc="🔍 Expanding URLs ..."): 150 | if self.driver is None: 151 | raise ValueError("Driver is not initialized") 152 | 153 | with self.dbmgr as mgr: 154 | if mgr.get_url(u): 155 | rich.print(f" 🔑 skipping url '{u[:10]}...{u[-10:]}'") 156 | continue 157 | 158 | self.driver.get(u) 159 | time.sleep(3) # TODO: find a better way to wait for the page to load # pylint: disable=fixme 160 | 161 | retry = 0 162 | while retry <= 3: 163 | matches = [] 164 | for regex, _, _ in REGEX_LIST: 165 | matches.extend(regex.findall(self.driver.page_source)) 166 | matches = list(set(matches)) 167 | 168 | if len(matches) == 0: 169 | rich.print(f" ⚪️ No matches found in the expanded page, retrying [{retry}/3]...") 170 | retry += 1 171 | time.sleep(3) 172 | continue 173 | 174 | with self.dbmgr as mgr: 175 | new_apis = [api for api in matches if not mgr.key_exists(api)] 176 | new_apis = list(set(new_apis)) 177 | apis_found.extend(new_apis) 178 | rich.print(f" 🔬 Found {len(matches)} matches in the expanded page, adding them to the list") 179 | for match in matches: 180 | rich.print(f" '{match[:10]}...{match[-10:]}'") 181 | 182 | with self.dbmgr as mgr: 183 | mgr.insert_url(url) 184 | break 185 | 186 | self.check_api_keys_and_save(apis_found) 187 | 188 | def check_api_keys_and_save(self, keys: list[str]): 189 | """ 190 | Check a list of API keys 191 | """ 192 | with self.dbmgr as mgr: 193 | unique_keys = list(set(keys)) 194 | unique_keys = [api for api in unique_keys if not mgr.key_exists(api)] 195 | 196 | with ThreadPoolExecutor(max_workers=10) as executor: 197 | results = list(executor.map(check_key, unique_keys)) 198 | with self.dbmgr as mgr: 199 | for idx, result in enumerate(results): 200 | mgr.insert(unique_keys[idx], result) 201 | 202 | def search(self, from_iter: int | None = None): 203 | """ 204 | Search for API keys, and save the results to the database 205 | """ 206 | progress = ProgressManager() 207 | total = len(self.candidate_urls) 208 | pbar = tqdm( 209 | enumerate(self.candidate_urls), 210 | total=total, 211 | desc="🔍 Searching ...", 212 | ) 213 | if from_iter is None: 214 | from_iter = progress.load(total=total) 215 | 216 | for idx, url in enumerate(self.candidate_urls): 217 | if idx < from_iter: 218 | pbar.update() 219 | time.sleep(0.05) # let tqdm print the bar 220 | log.debug("⚪️ Skip %s", url) 221 | continue 222 | self._process_url(url) 223 | progress.save(idx, total) 224 | log.debug("🔍 Finished %s", url) 225 | pbar.update() 226 | pbar.close() 227 | 228 | def deduplication(self): 229 | """ 230 | Deduplicate the database 231 | """ 232 | with self.dbmgr as mgr: 233 | mgr.deduplicate() 234 | 235 | def update_existed_keys(self): 236 | """ 237 | Update previously checked API keys in the database with their current status 238 | """ 239 | with self.dbmgr as mgr: 240 | rich.print("🔄 Updating existed keys") 241 | keys = mgr.all_keys() 242 | for key in tqdm(keys, desc="🔄 Updating existed keys ..."): 243 | result = check_key(key[0]) 244 | mgr.delete(key[0]) 245 | mgr.insert(key[0], result) 246 | 247 | def update_iq_keys(self): 248 | """ 249 | Update insuffcient quota keys 250 | """ 251 | with self.dbmgr as mgr: 252 | rich.print("🔄 Updating insuffcient quota keys") 253 | keys = mgr.all_iq_keys() 254 | for key in tqdm(keys, desc="🔄 Updating insuffcient quota keys ..."): 255 | result = check_key(key[0]) 256 | mgr.delete(key[0]) 257 | mgr.insert(key[0], result) 258 | 259 | def all_available_keys(self) -> list: 260 | """ 261 | Get all available keys 262 | """ 263 | with self.dbmgr as mgr: 264 | return mgr.all_keys() 265 | 266 | def __del__(self): 267 | if hasattr(self, "driver") and self.driver is not None: 268 | self.driver.quit() 269 | 270 | 271 | def main(from_iter: int | None = None, check_existed_keys_only: bool = False, keywords: list | None = None, languages: list | None = None, check_insuffcient_quota: bool = False): 272 | """ 273 | Main function to scan GitHub for available OpenAI API Keys 274 | """ 275 | keywords = KEYWORDS.copy() if keywords is None else keywords 276 | languages = LANGUAGES.copy() if languages is None else languages 277 | 278 | leakage = APIKeyLeakageScanner("github.db", keywords, languages) 279 | 280 | if not check_existed_keys_only: 281 | leakage.login_to_github() 282 | leakage.search(from_iter=from_iter) 283 | 284 | if check_insuffcient_quota: 285 | leakage.update_iq_keys() 286 | 287 | leakage.update_existed_keys() 288 | leakage.deduplication() 289 | keys = leakage.all_available_keys() 290 | 291 | rich.print(f"🔑 [bold green]Available keys ({len(keys)}):[/bold green]") 292 | for key in keys: 293 | rich.print(f"[bold green]{key[0]}[/bold green]") 294 | 295 | 296 | if __name__ == "__main__": 297 | parser = argparse.ArgumentParser() 298 | parser.add_argument("--from-iter", type=int, default=None, help="Start from the specific iteration") 299 | parser.add_argument( 300 | "--debug", 301 | action="store_true", 302 | default=False, 303 | help="Enable debug mode, otherwise INFO mode. Default is False (INFO mode)", 304 | ) 305 | parser.add_argument( 306 | "-ceko", 307 | "--check-existed-keys-only", 308 | action="store_true", 309 | default=False, 310 | help="Only check existed keys", 311 | ) 312 | parser.add_argument( 313 | "-ciq", 314 | "--check-insuffcient-quota", 315 | action="store_true", 316 | default=False, 317 | help="Check and update status of the insuffcient quota keys", 318 | ) 319 | parser.add_argument( 320 | "-k", 321 | "--keywords", 322 | nargs="+", 323 | default=KEYWORDS, 324 | help="Keywords to search", 325 | ) 326 | parser.add_argument( 327 | "-l", 328 | "--languages", 329 | nargs="+", 330 | default=LANGUAGES, 331 | help="Languages to search", 332 | ) 333 | args = parser.parse_args() 334 | 335 | if args.debug: 336 | logging.getLogger().setLevel(logging.DEBUG) 337 | 338 | main( 339 | from_iter=args.from_iter, 340 | check_existed_keys_only=args.check_existed_keys_only, 341 | keywords=args.keywords, 342 | languages=args.languages, 343 | check_insuffcient_quota=args.check_insuffcient_quota, 344 | ) 345 | -------------------------------------------------------------------------------- /src/manager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Progress and Cookie Management Module 3 | 4 | This module provides functionality for managing application progress, cookies, 5 | and database operations. 6 | 7 | Classes: 8 | ProgressManager: Handles progress tracking and persistence 9 | CookieManager: Manages browser cookie operations 10 | DatabaseManager: Handles database interactions 11 | """ 12 | 13 | import logging 14 | import os 15 | import pickle 16 | import sqlite3 17 | import sys 18 | import time 19 | from datetime import date 20 | 21 | from selenium.common.exceptions import UnableToSetCookieException 22 | from selenium.webdriver.common.by import By 23 | 24 | LOGGER_NAME = "ChatGPT-API-Leakage" 25 | LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" 26 | logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt="[%X]") 27 | logger = logging.getLogger(LOGGER_NAME) 28 | 29 | 30 | class ProgressManagerError(Exception): 31 | """Custom exception for ProgressManager class errors""" 32 | 33 | def __init__(self, message): 34 | super().__init__(message) 35 | 36 | 37 | class ProgressManager: 38 | """ 39 | Manages and persists progress information for long-running operations. 40 | 41 | Attributes: 42 | progress_file (Path): Path to the progress file 43 | 44 | Methods: 45 | save: Saves current progress 46 | load: Loads saved progress 47 | """ 48 | 49 | def __init__(self, progress_file=".progress.txt"): 50 | """ 51 | Initialize the ProgressManager with a specified progress file. 52 | 53 | Args: 54 | progress_file (str): The file where progress data is stored. 55 | """ 56 | self.progress_file = progress_file 57 | 58 | def save(self, from_iter: int, total: int): 59 | """ 60 | Saves the current progress to a file. 61 | 62 | Args: 63 | from_iter (int): The current iteration progress. 64 | total (int): The total number of iterations. 65 | """ 66 | with open(self.progress_file, "w", encoding="utf-8") as file: 67 | file.write(f"{from_iter}/{total}/{time.time()}") 68 | 69 | def load(self, total: int) -> int: 70 | """ 71 | Loads the previously saved progress if available and valid. 72 | 73 | Args: 74 | total (int): The total number of iterations for the current process. 75 | 76 | Returns: 77 | int: The iteration number to continue from. 78 | """ 79 | if not os.path.exists(self.progress_file): 80 | return 0 81 | 82 | with open(self.progress_file, "r", encoding="utf-8") as file: 83 | last_, totl_, tmst_ = file.read().strip().split("/") 84 | last, totl = int(last_), int(totl_) 85 | 86 | if time.time() - float(tmst_) < 3600 and totl == total: 87 | action = input(f"🔍 Progress found, do you want to continue from the last progress ({last}/{totl})? [yes] | no: ").lower() 88 | if action in {"yes", "y", ""}: 89 | return last 90 | 91 | return 0 92 | 93 | 94 | class CookieManager: 95 | """ 96 | Manages browser cookie operations. 97 | 98 | Methods: 99 | save: Saves cookies to a file 100 | load: Loads cookies from a file 101 | verify_user_login: Checks if the user is currently logged in 102 | """ 103 | 104 | def __init__(self, driver): 105 | """ 106 | Initialize the CookieManager with a Selenium WebDriver instance. 107 | 108 | Args: 109 | driver (WebDriver): The Selenium WebDriver for cookie operations. 110 | """ 111 | self.driver = driver 112 | 113 | def save(self): 114 | """ 115 | Save cookies from the current browser session to a file. 116 | """ 117 | cookies = self.driver.get_cookies() 118 | with open("cookies.pkl", "wb") as file: 119 | pickle.dump(cookies, file) 120 | logger.info("🍪 Cookies saved") 121 | 122 | def load(self): 123 | """ 124 | Load cookies from a file and attempt to add them to the current browser session. 125 | """ 126 | try: 127 | with open("cookies.pkl", "rb") as file: 128 | cookies = pickle.load(file) 129 | for cookie in cookies: 130 | try: 131 | self.driver.add_cookie(cookie) 132 | except UnableToSetCookieException: 133 | logger.debug("🟡 Warning, unable to set a cookie %s", cookie) 134 | except (EOFError, pickle.UnpicklingError): 135 | if os.path.exists("cookies.pkl"): 136 | os.remove("cookies.pkl") 137 | logger.error("🔴 Error, unable to load cookies, invalid cookies has been removed, please restart.") 138 | 139 | def verify_user_login(self): 140 | """ 141 | Test if the user is really logged in by navigating to GitHub and checking login status. 142 | """ 143 | logger.info("🤗 Redirecting ...") 144 | self.driver.get("https://github.com/") 145 | 146 | if self.driver.find_elements(by=By.XPATH, value="//*[contains(text(), 'Sign in')]"): 147 | if os.path.exists("cookies.pkl"): 148 | os.remove("cookies.pkl") 149 | logger.error("🔴 Error, you are not logged in, please restart and try again.") 150 | sys.exit(1) 151 | return True 152 | 153 | 154 | class DatabaseManager: 155 | """ 156 | This class is used to manage the database, including creating tables and handling data interactions. 157 | """ 158 | 159 | def __init__(self, db_filename: str): 160 | """ 161 | Initialize the DatabaseManager with the specified database filename. 162 | 163 | Args: 164 | db_filename (str): Path to the SQLite database file. 165 | """ 166 | self.db_filename = db_filename 167 | self.con = None 168 | self.cur = None 169 | 170 | def __enter__(self): 171 | """ 172 | Enter the runtime context related to this object, initializing the database if needed. 173 | """ 174 | if not os.path.exists(self.db_filename): 175 | logging.info("Creating database github.db") 176 | 177 | self.con = sqlite3.connect(self.db_filename) 178 | self.cur = self.con.cursor() 179 | 180 | self.cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='APIKeys'") 181 | if self.cur.fetchone() is None: 182 | logging.info("Creating table APIKeys") 183 | self.cur.execute("CREATE TABLE APIKeys(apiKey, status, lastChecked)") 184 | 185 | self.cur.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='URLs'") 186 | if self.cur.fetchone() is None: 187 | logging.info("Creating table URLs") 188 | self.cur.execute("CREATE TABLE URLs(url, key)") 189 | 190 | return self 191 | 192 | def __exit__(self, exc_type, exc_value, traceback): 193 | """ 194 | Exit the runtime context and close the database connection. 195 | """ 196 | if self.con: 197 | self.con.close() 198 | 199 | def all_iq_keys(self) -> list: 200 | """ 201 | Get all keys with the status 'insufficient_quota'. 202 | 203 | Returns: 204 | list: A list of tuples containing API keys. 205 | """ 206 | if self.cur is None: 207 | raise ValueError("Cursor is not initialized") 208 | self.cur.execute("SELECT apiKey FROM APIKeys WHERE status='insufficient_quota'") 209 | return self.cur.fetchall() 210 | 211 | def all_keys(self) -> list: 212 | """ 213 | Get all keys with the status 'yes'. 214 | 215 | Returns: 216 | list: A list of tuples containing API keys. 217 | """ 218 | if self.cur is None: 219 | raise ValueError("Cursor is not initialized") 220 | self.cur.execute("SELECT apiKey FROM APIKeys WHERE status='yes'") 221 | return self.cur.fetchall() 222 | 223 | def deduplicate(self) -> None: 224 | """ 225 | Deduplicate the 'APIKeys' table by retaining only the latest record for each key. 226 | """ 227 | if self.con is None: 228 | raise ValueError("Connection is not initialized") 229 | if self.cur is None: 230 | raise ValueError("Cursor is not initialized") 231 | self.cur.execute("CREATE TABLE temp_table as SELECT apiKey, status, MAX(lastChecked) as lastChecked FROM APIKeys GROUP BY apiKey;") 232 | self.cur.execute("DROP TABLE APIKeys;") 233 | self.cur.execute("ALTER TABLE temp_table RENAME TO APIKeys;") 234 | self.con.commit() 235 | 236 | def delete(self, api_key: str) -> None: 237 | """ 238 | Delete a specific API key from the database. 239 | 240 | Args: 241 | api_key (str): The unique API key to remove. 242 | """ 243 | if self.con is None: 244 | raise ValueError("Connection is not initialized") 245 | if self.cur is None: 246 | raise ValueError("Cursor is not initialized") 247 | self.cur.execute("DELETE FROM APIKeys WHERE apiKey=?", (api_key,)) 248 | self.con.commit() 249 | 250 | def insert(self, api_key: str, status: str): 251 | """ 252 | Insert a new API key and status into the database. 253 | 254 | Args: 255 | api_key (str): The API key to insert. 256 | status (str): The status of the API key. 257 | """ 258 | if self.con is None: 259 | raise ValueError("Connection is not initialized") 260 | if self.cur is None: 261 | raise ValueError("Cursor is not initialized") 262 | today = date.today() 263 | self.cur.execute("INSERT INTO APIKeys(apiKey, status, lastChecked) VALUES(?, ?, ?)", (api_key, status, today)) 264 | self.con.commit() 265 | 266 | def key_exists(self, api_key: str) -> bool: 267 | """ 268 | Check if a given API key exists in the database. 269 | 270 | Args: 271 | api_key (str): The API key to search for. 272 | 273 | Returns: 274 | bool: True if the API key exists, False otherwise. 275 | """ 276 | if self.cur is None: 277 | raise ValueError("Cursor is not initialized") 278 | self.cur.execute("SELECT apiKey FROM APIKeys WHERE apiKey=?", (api_key,)) 279 | return self.cur.fetchone() is not None 280 | 281 | def insert_url(self, url: str) -> None: 282 | """ 283 | Insert a new URL into the 'URLs' table. 284 | 285 | Args: 286 | url (str): The URL to add. 287 | """ 288 | if self.con is None: 289 | raise ValueError("Connection is not initialized") 290 | if self.cur is None: 291 | raise ValueError("Cursor is not initialized") 292 | self.cur.execute("INSERT INTO URLs(url, key) VALUES(?, ?)", (url, 1)) 293 | self.con.commit() 294 | 295 | def get_url(self, url: str) -> str | None: 296 | """ 297 | Retrieve the 'key' associated with the given URL. 298 | 299 | Args: 300 | url (str): The URL to look up. 301 | 302 | Returns: 303 | str | None: The key if it exists, None if not. 304 | """ 305 | if self.cur is None: 306 | raise ValueError("Cursor is not initialized") 307 | self.cur.execute("SELECT key FROM URLs WHERE url=?", (url,)) 308 | fetch = self.cur.fetchone() 309 | return fetch[0] if fetch else None 310 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides a function to check the validity of an OpenAI API key by making a test request 3 | to a chosen model using the OpenAI client. It captures various exceptions and prints error details. 4 | """ 5 | 6 | import rich 7 | from openai import APIStatusError, AuthenticationError, OpenAI, RateLimitError 8 | 9 | 10 | def check_key(key, model="gpt-4o-mini") -> str | None: 11 | """ 12 | Check if the API key is valid. 13 | """ 14 | try: 15 | client = OpenAI(api_key=key) 16 | 17 | completion = client.chat.completions.create( 18 | model=model, 19 | messages=[ 20 | { 21 | "role": "system", 22 | "content": "You are a yeser, you only output lowercase yes.", 23 | }, 24 | {"role": "user", "content": "yes or no? say yes"}, 25 | ], 26 | ) 27 | result = completion.choices[0].message.content 28 | rich.print(f"🔑 [bold green]available key[/bold green]: [orange_red1]'{key}'[/orange_red1] ({result})\n") 29 | return "yes" 30 | except AuthenticationError as e: 31 | rich.print(f"[deep_sky_blue1]{e.body['code']} ({e.status_code})[/deep_sky_blue1]: '{key[:10]}...{key[-10:]}'") # type: ignore 32 | return e.body["code"] # type: ignore 33 | except RateLimitError as e: 34 | rich.print(f"[deep_sky_blue1]{e.body['code']} ({e.status_code})[/deep_sky_blue1]: '{key[:10]}...{key[-10:]}'") # type: ignore 35 | return e.body["code"] # type: ignore 36 | except APIStatusError as e: 37 | rich.print(f"[bold red]{e.body['code']} ({e.status_code})[/bold red]: '{key[:10]}...{key[-10:]}'") # type: ignore 38 | return e.body["code"] # type: ignore 39 | except Exception as e: # pylint: disable=broad-except 40 | rich.print(f"[bold red]{e}[/bold red]: '{key[:10]}...{key[-10:]}'") # type: ignore 41 | return "Unknown Error" 42 | 43 | 44 | if __name__ == "__main__": 45 | check_key("sk-proj-12345") 46 | --------------------------------------------------------------------------------