├── .gitignore ├── LICENSE ├── README.md ├── config.py ├── docker-compose.yml ├── files ├── lists.txt └── youtube_channels.txt └── main.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Moein Erfnaian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Writeup Finder A writeup every day 3 | 4 | **Writeup Finder** is an automated script that fetches and processes security-related writeups from multiple sources including YouTube channels, Medium, and Pentester.land. The script supports integration with PostgreSQL for storing data and can send notifications to a Discord webhook. 5 | 6 | ## Features 7 | 8 | 9 | - **YouTube Videos**: Retrieve and process video data from specified YouTube channels. 10 | - **Medium Writeups**: Fetch and process writeups from Medium using RSS feeds. 11 | - **Pentester.land Writeups**: Collect writeups from Pentester.land’s JSON feed. 12 | - **PostgreSQL Database**: Store data in a PostgreSQL database for efficient management. 13 | - **Discord Notifications**: Optionally send notifications about new content to a Discord webhook. 14 | 15 | ## Setup 16 | 17 | ### Prerequisites 18 | 19 | - Python 3.x 20 | - PostgreSQL 21 | - Docker (for PostgreSQL) 22 | - Required Python packages: `requests`, `psycopg2` 23 | 24 | ### Installation 25 | 26 | 1. **Clone the Repository** 27 | 28 | ```bash 29 | git clone https://github.com/moeinerfanian/writeup-finder.git 30 | cd writeup-finder 31 | ``` 32 | 2. **Set Up PostgreSQL with Docker** 33 | 34 | ```bash 35 | docker compose up -d 36 | ``` 37 | 38 | 3. **Install Python Dependencies** 39 | ```bash 40 | pip3 install -r requirements.txt 41 | ``` 42 | 43 | 4. **Configure the Script** 44 | - Edit ```config.py``` to include your PostgreSQL database credentials and Discord webhook URL. 45 | 46 | 5. **Create Database Tables** 47 | - Initialize the database schema with: 48 | ```bash 49 | python3 main.py db 50 | ``` 51 | ## Usage 52 | 1. **Run the Script** 53 | - Do in order 54 | - Create DB: 55 | `python3 main.py db` 56 | - To skip sending Discord notifications, use the `nodiscord` flag: 57 | `python3 main.py nodiscord` 58 | - Normally Run: 59 | `python3 main.py` 60 | 61 | ## Improvements 62 | 63 | - **PostgreSQL Integration**: Enhanced database handling and connectivity. 64 | - **RSS Feed Handling**: Improved fetching and parsing of YouTube and Medium RSS feeds. 65 | - **Error Handling**: Better management of rate limits and parsing errors. 66 | 67 | ## Scheduling with Crontab 68 | 69 | To run the script automatically every 5 hours, add the following line to your crontab: 70 | ```bash 71 | 0 */5 * * * /usr/bin/python3 /path/to/your/repo/main.py 72 | ``` 73 | - Replace `/path/to/your/repo/` with the path to your cloned repository. 74 | 75 | ## Author 76 | 77 | Moein Erfanian (Electro0ne) 78 | 79 | ![Screenshot from 2024-02-24 17-06-40](https://github.com/moeinerfanian/writeup-finder/assets/122752399/f505f5a6-3176-4c19-8766-5eeecd8950eb) -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | WEB_HOOK = '' 2 | GIF_URL = '' 3 | 4 | DB_CONFIG = { 5 | 'dbname': '', 6 | 'user': '', 7 | 'password': '', 8 | 'host': 'localhost', 9 | 'port': '5432' 10 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | postgres: 5 | image: postgres:13 6 | container_name: postgres_db 7 | environment: 8 | POSTGRES_USER: username 9 | POSTGRES_PASSWORD: password 10 | POSTGRES_DB: dbname 11 | ports: 12 | - "5432:5432" 13 | volumes: 14 | - pg_data:/var/lib/postgresql/data 15 | 16 | volumes: -------------------------------------------------------------------------------- /files/lists.txt: -------------------------------------------------------------------------------- 1 | https://medium.com/feed/tag/bug-bounty-writeup 2 | https://medium.com/feed/tag/cybersecurity 3 | https://medium.com/feed/tag/application-security 4 | https://medium.com/feed/tag/hacking 5 | https://medium.com/feed/tag/infosec 6 | https://medium.com/feed/tag/ctf 7 | https://medium.com/feed/tag/penetration-testing 8 | https://medium.com/feed/tag/writeup 9 | https://medium.com/feed/tag/tryhackme 10 | https://medium.com/feed/tag/vulnhub 11 | https://medium.com/feed/tag/security 12 | https://medium.com/feed/tag/security 13 | https://medium.com/feed/tag/bug-bounty 14 | https://medium.com/feed/tag/bug-hunter 15 | https://medium.com/feed/tag/info-sec-writeup 16 | https://medium.com/feed/tag/hackthebox-writeup 17 | https://medium.com/feed/tag/ethical-hacking 18 | https://medium.com/feed/tag/api-security 19 | https://medium.com/feed/tag/hackerone 20 | https://medium.com/feed/tag/authentication 21 | https://medium.com/feed/tag/vulnerability 22 | https://medium.com/feed/tag/recon 23 | https://surya-dev.medium.com/feed 24 | https://infosecwriteups.com/feed 25 | https://medium.com/feed/@securitylit 26 | https://medium.com/feed/@tomnomnom 27 | https://medium.com/feed/@cappriciosec 28 | https://medium.com/feed/@302Found 29 | https://medium.com/feed/@newp_th 30 | https://medium.com/feed/@pdelteil 31 | https://ruvlol.medium.com/feed 32 | https://medium.com/@know.0nix/feed 33 | https://medium.com/@bugh4nter/feed 34 | https://seqrity.medium.com/feed 35 | https://vickieli.medium.com/feed 36 | https://medium.com/feed/intigriti 37 | https://medium.com/@intideceukelaire/feed 38 | https://medium.com/@projectdiscovery/feed 39 | https://jonathandata1.medium.com/feed 40 | https://medium.com/@Hacker0x01/feed 41 | https://medium.com/feed/pentesternepal 42 | https://0xjin.medium.com/feed 43 | https://medium.com/@infosecwriteups/feed 44 | https://medium.com/@jhaddix/feed 45 | https://medium.com/@NahamSec/feed 46 | https://orwaatyat.medium.com/feed 47 | https://zseano.medium.com/feed 48 | https://d0nut.medium.com/feed 49 | https://medium.com/feed/towards-aws 50 | https://medium.com/@stackzero/feed -------------------------------------------------------------------------------- /files/youtube_channels.txt: -------------------------------------------------------------------------------- 1 | Voorivex, UCz4A6ALhUVHuiXzoJrIGc1Q 2 | Nahamsec, UCCZDt7MuC3Hzs6IH4xODLBw -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import requests, pwn, os, psycopg2, time 2 | import xml.etree.ElementTree as ET 3 | from datetime import datetime 4 | from config import * 5 | from bs4 import BeautifulSoup 6 | from datetime import datetime 7 | # Database connection setup 8 | def get_db_connection(): 9 | conn = psycopg2.connect(**DB_CONFIG) 10 | return conn 11 | 12 | def is_link_processed(conn, table_name, link): 13 | with conn.cursor() as cursor: 14 | cursor.execute(f"SELECT 1 FROM {table_name} WHERE link = %s", (link,)) 15 | return cursor.fetchone() is not None 16 | 17 | def save_processed_link(conn, table_name, link, title, pub_date): 18 | with conn.cursor() as cursor: 19 | cursor.execute( 20 | f"INSERT INTO {table_name} (link, title, pub_date) VALUES (%s, %s, %s)", 21 | (link, title, pub_date) 22 | ) 23 | conn.commit() 24 | 25 | def fetch_rss_data(url): 26 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'} 27 | response = requests.get(url, headers=headers) 28 | 29 | if response.status_code != 200: 30 | pwn.log.error(f"Failed to fetch data from {url}. Status code: {response.status_code}") 31 | return [] 32 | 33 | response_content = response.content.decode('utf-8') 34 | 35 | try: 36 | root = ET.fromstring(response_content) 37 | except ET.ParseError as e: 38 | pwn.log.error(f"Failed to parse XML: {e}") 39 | return [] 40 | 41 | namespace = {'atom': 'http://www.w3.org/2005/Atom'} 42 | 43 | items = [] 44 | for item in root.findall(".//atom:entry", namespace): 45 | title_element = item.find("atom:title", namespace) 46 | link_element = item.find("atom:link", namespace) 47 | pub_date_element = item.find("atom:published", namespace) 48 | 49 | title = title_element.text if title_element is not None else 'No Title' 50 | link = link_element.attrib['href'] if link_element is not None else 'No Link' 51 | pub_date = pub_date_element.text if pub_date_element is not None else 'No Date' 52 | 53 | items.append({"title": title, "link": link, "pub_date": pub_date}) 54 | pwn.log.info(f"Video found: {title} - {link}") 55 | 56 | return items 57 | 58 | def fetch_medium_writeups(file_path): 59 | items = [] 60 | headers = {'User-Agent': 'curl/7.81.0', 'accept': '*/*'} 61 | 62 | with open(file_path, 'r') as file: 63 | urls = file.readlines() 64 | 65 | for url in urls: 66 | url = url.strip() 67 | try: 68 | response = requests.get(url, headers=headers) 69 | if response.status_code == 200: 70 | try: 71 | root = ET.fromstring(response.content) 72 | namespace = { 73 | '': 'http://purl.org/rss/1.0/', 74 | 'atom': 'http://www.w3.org/2005/Atom' 75 | } 76 | channel = root.find('channel') 77 | if channel is not None: 78 | items_list = channel.findall('item') 79 | for item in items_list: 80 | title = item.find('title').text if item.find('title') is not None else 'No Title' 81 | link = item.find('link').text if item.find('link') is not None else 'No Link' 82 | pub_date = item.find('pubDate').text if item.find('pubDate') is not None else str(datetime.now()) 83 | 84 | items.append({ 85 | "title": title, 86 | "link": link, 87 | "pub_date": pub_date 88 | }) 89 | except ET.ParseError as e: 90 | pwn.log.error(f"Error parsing RSS feed from {url}: {e}") 91 | elif response.status_code == 429: 92 | retry_after = int(response.headers.get('Retry-After', 60)) # Default to 60 seconds 93 | pwn.log.info(f"Rate limit exceeded for {url}. Waiting for {retry_after} seconds before retrying...") 94 | time.sleep(retry_after) 95 | response = requests.get(url, headers=headers) 96 | if response.status_code == 200: 97 | try: 98 | root = ET.fromstring(response.content) 99 | namespace = { 100 | '': 'http://purl.org/rss/1.0/', 101 | 'atom': 'http://www.w3.org/2005/Atom' 102 | } 103 | channel = root.find('channel') 104 | if channel is not None: 105 | items_list = channel.findall('item') 106 | for item in items_list: 107 | title = item.find('title').text if item.find('title') is not None else 'No Title' 108 | link = item.find('link').text if item.find('link') is not None else 'No Link' 109 | pub_date = item.find('pubDate').text if item.find('pubDate') is not None else str(datetime.now()) 110 | 111 | items.append({ 112 | "title": title, 113 | "link": link, 114 | "pub_date": pub_date 115 | }) 116 | except ET.ParseError as e: 117 | pwn.log.error(f"Error parsing RSS feed from {url} after retry: {e}") 118 | else: 119 | pwn.log.error(f"Failed to fetch Medium writeup from {url} after retry. Status code: {response.status_code}") 120 | elif response.status_code == 404: 121 | pwn.log.info(f"URL {url} returned 404. Skipping...") 122 | else: 123 | pwn.log.error(f"Failed to fetch Medium writeup from {url}. Status code: {response.status_code}") 124 | except Exception as e: 125 | pwn.log.error(f"Error fetching Medium writeup from {url}: {e}") 126 | 127 | pwn.log.info(f"Fetched items: {items}") 128 | return items 129 | 130 | def fetch_pentesterland_writeups(): 131 | pentesterland_url = 'https://pentester.land/writeups.json' 132 | response = requests.get(pentesterland_url) 133 | if response.status_code == 200: 134 | data = response.json() 135 | writeups = [] 136 | for selected_item in data['data']: 137 | for item in selected_item['Links']: 138 | writeup_link = item['Link'] 139 | writeup_title = item['Title'] 140 | authors = selected_item['Authors'] 141 | programs = selected_item['Programs'] 142 | bugs = selected_item['Bugs'] 143 | pub_date = selected_item['PublicationDate'] 144 | added_date = selected_item['AddedDate'] 145 | writeups.append({ 146 | "title": writeup_title, 147 | "link": writeup_link, 148 | "pub_date": pub_date, 149 | "authors": authors, 150 | "programs": programs, 151 | "bugs": bugs, 152 | "added_date": added_date 153 | }) 154 | return writeups 155 | return [] 156 | 157 | def send_to_discord(webhook_url, title, link, pub_date, extra_fields=None): 158 | embed = { 159 | "title": title, 160 | "description": f"[{title}]({link})", 161 | "color": 16777215, 162 | "fields": [{"name": "Published on", "value": pub_date}] 163 | } 164 | 165 | if extra_fields: 166 | embed['fields'].extend(extra_fields) 167 | payload = {"content": None, "embeds": [embed]} 168 | headers = {"Content-Type": "application/json"} 169 | response = requests.post(webhook_url, json=payload, headers=headers) 170 | 171 | if response.status_code == 204: 172 | pwn.log.info(f"New item sent to Discord: {title}") 173 | else: 174 | pwn.log.info("Failed to send item to Discord.") 175 | 176 | def setup_database(): 177 | conn = get_db_connection() 178 | with conn.cursor() as cursor: 179 | cursor.execute(""" 180 | CREATE TABLE IF NOT EXISTS youtube_videos ( 181 | id SERIAL PRIMARY KEY, 182 | link TEXT UNIQUE NOT NULL, 183 | title TEXT, 184 | pub_date TIMESTAMP 185 | ); 186 | """) 187 | cursor.execute(""" 188 | CREATE TABLE IF NOT EXISTS medium_writeups ( 189 | id SERIAL PRIMARY KEY, 190 | link TEXT UNIQUE NOT NULL, 191 | title TEXT, 192 | pub_date TIMESTAMP 193 | ); 194 | """) 195 | cursor.execute(""" 196 | CREATE TABLE IF NOT EXISTS pentesterland_writeups ( 197 | id SERIAL PRIMARY KEY, 198 | link TEXT UNIQUE NOT NULL, 199 | title TEXT, 200 | pub_date TIMESTAMP, 201 | authors TEXT[], 202 | programs TEXT[], 203 | bugs TEXT[], 204 | added_date TIMESTAMP 205 | ); 206 | """) 207 | conn.commit() 208 | conn.close() 209 | 210 | def main(): 211 | conn = get_db_connection() 212 | import sys 213 | nodiscord = False 214 | if len(sys.argv) > 1 and sys.argv[1] == 'nodiscord': 215 | nodiscord = True 216 | if len(sys.argv) > 1 and sys.argv[1] == 'db': 217 | setup_database() 218 | pwn.log.info("Database setup completed.") 219 | return 220 | 221 | youtube_file = 'files/youtube_channels.txt' 222 | medium_file = 'files/lists.txt' 223 | 224 | if os.path.exists(youtube_file): 225 | with open(youtube_file, 'r') as file: 226 | youtube_channels = file.readlines() 227 | 228 | for line in youtube_channels: 229 | channel_name, channel_id = line.strip().split(',') 230 | rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}" 231 | videos = fetch_rss_data(rss_url) 232 | if videos: 233 | for video in videos: 234 | video_link = video['link'] 235 | video_title = video['title'] 236 | pub_date = video['pub_date'] 237 | pub_date_dt = datetime.strptime(pub_date, '%Y-%m-%dT%H:%M:%S+00:00') 238 | 239 | if not is_link_processed(conn, 'youtube_videos', video_link): 240 | if not nodiscord: 241 | send_to_discord(WEB_HOOK, video_title, video_link, pub_date) 242 | save_processed_link(conn, 'youtube_videos', video_link, video_title, pub_date) 243 | pwn.log.info(f"New YouTube video found: {video_title}") 244 | 245 | medium_writeups = fetch_medium_writeups(medium_file) 246 | for writeup in medium_writeups: 247 | writeup_link = writeup['link'] 248 | writeup_title = writeup['title'] 249 | pub_date = writeup['pub_date'] 250 | 251 | if not is_link_processed(conn, 'medium_writeups', writeup_link): 252 | if not nodiscord: 253 | send_to_discord(WEB_HOOK, writeup_title, writeup_link, pub_date) 254 | save_processed_link(conn, 'medium_writeups', writeup_link, writeup_title, pub_date) 255 | pwn.log.info(f"New Medium writeup found: {writeup_title}") 256 | 257 | pentesterland_writeups = fetch_pentesterland_writeups() 258 | for writeup in pentesterland_writeups: 259 | writeup_link = writeup['link'] 260 | writeup_title = writeup['title'] 261 | pub_date = writeup['pub_date'] 262 | authors = writeup['authors'] 263 | programs = writeup['programs'] 264 | bugs = writeup['bugs'] 265 | added_date = writeup['added_date'] 266 | 267 | if not is_link_processed(conn, 'pentesterland_writeups', writeup_link): 268 | if not nodiscord: 269 | send_to_discord(WEB_HOOK, writeup_title, writeup_link, pub_date, [ 270 | {"name": "Authors", "value": ", ".join(authors)}, 271 | {"name": "Programs", "value": ", ".join(programs)}, 272 | {"name": "Bugs", "value": ", ".join(bugs)}, 273 | {"name": "Added Date", "value": added_date} 274 | ]) 275 | save_processed_link(conn, 'pentesterland_writeups', writeup_link, writeup_title, pub_date) 276 | pwn.log.info(f"New Pentester.land writeup found: {writeup_title}") 277 | 278 | conn.close() 279 | 280 | if __name__ == '__main__': 281 | main() 282 | --------------------------------------------------------------------------------