├── .gitignore
├── LICENSE
├── README.md
├── config.py
├── docker-compose.yml
├── files
    ├── lists.txt
    └── youtube_channels.txt
└── main.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Moein Erfnaian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Writeup Finder A writeup every day
 3 |   
 4 | **Writeup Finder** is an automated script that fetches and processes security-related writeups from multiple sources including YouTube channels, Medium, and Pentester.land. The script supports integration with PostgreSQL for storing data and can send notifications to a Discord webhook.
 5 | 
 6 | ## Features
 7 |   
 8 | 
 9 | -  **YouTube Videos**: Retrieve and process video data from specified YouTube channels.
10 | -  **Medium Writeups**: Fetch and process writeups from Medium using RSS feeds.
11 | -  **Pentester.land Writeups**: Collect writeups from Pentester.land’s JSON feed.
12 | -  **PostgreSQL Database**: Store data in a PostgreSQL database for efficient management.
13 | -  **Discord Notifications**: Optionally send notifications about new content to a Discord webhook.
14 | 
15 | ## Setup
16 | 
17 | ### Prerequisites
18 | 
19 | - Python 3.x
20 | - PostgreSQL
21 | - Docker (for PostgreSQL)
22 | - Required Python packages: `requests`, `psycopg2`
23 | 
24 | ### Installation
25 | 
26 | 1.  **Clone the Repository**
27 | 
28 | ```bash
29 | git clone https://github.com/moeinerfanian/writeup-finder.git
30 | cd writeup-finder
31 | ```
32 | 2.  **Set Up PostgreSQL with Docker**
33 | 
34 | ```bash
35 | docker compose up -d
36 | ```
37 | 
38 | 3.  **Install Python Dependencies**
39 | ```bash
40 | pip3 install -r requirements.txt
41 | ```
42 | 
43 | 4.  **Configure the Script**
44 | - Edit ```config.py``` to include your PostgreSQL database credentials and Discord webhook URL.
45 | 
46 | 5. **Create Database Tables**
47 | - Initialize the database schema with:
48 | ```bash 
49 | python3 main.py db
50 | ```
51 | ## Usage
52 | 1.  **Run the Script**
53 |     - Do in order
54 |     - Create DB:
55 |     `python3 main.py db` 
56 |     - To skip sending Discord notifications, use the `nodiscord` flag:
57 |     `python3 main.py nodiscord` 
58 |     - Normally Run:
59 |     `python3 main.py`
60 | 
61 | ## Improvements
62 | 
63 | -   **PostgreSQL Integration**: Enhanced database handling and connectivity.
64 | -   **RSS Feed Handling**: Improved fetching and parsing of YouTube and Medium RSS feeds.
65 | -   **Error Handling**: Better management of rate limits and parsing errors.
66 | 
67 | ## Scheduling with Crontab
68 | 
69 | To run the script automatically every 5 hours, add the following line to your crontab:
70 | ```bash
71 | 0 */5 * * * /usr/bin/python3 /path/to/your/repo/main.py
72 | ``` 
73 | - Replace `/path/to/your/repo/` with the path to your cloned repository.
74 | 
75 | ## Author
76 | 
77 | Moein Erfanian (Electro0ne)
78 | 
79 | ![Screenshot from 2024-02-24 17-06-40](https://github.com/moeinerfanian/writeup-finder/assets/122752399/f505f5a6-3176-4c19-8766-5eeecd8950eb)


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | WEB_HOOK = ''
 2 | GIF_URL = ''
 3 | 
 4 | DB_CONFIG = {
 5 |     'dbname': '',
 6 |     'user': '',
 7 |     'password': '',
 8 |     'host': 'localhost',
 9 |     'port': '5432'
10 | }


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: postgres:13
 6 |     container_name: postgres_db
 7 |     environment:
 8 |       POSTGRES_USER: username
 9 |       POSTGRES_PASSWORD: password
10 |       POSTGRES_DB: dbname
11 |     ports:
12 |       - "5432:5432"
13 |     volumes:
14 |       - pg_data:/var/lib/postgresql/data
15 | 
16 | volumes:


--------------------------------------------------------------------------------
/files/lists.txt:
--------------------------------------------------------------------------------
 1 | https://medium.com/feed/tag/bug-bounty-writeup
 2 | https://medium.com/feed/tag/cybersecurity
 3 | https://medium.com/feed/tag/application-security
 4 | https://medium.com/feed/tag/hacking
 5 | https://medium.com/feed/tag/infosec
 6 | https://medium.com/feed/tag/ctf
 7 | https://medium.com/feed/tag/penetration-testing
 8 | https://medium.com/feed/tag/writeup
 9 | https://medium.com/feed/tag/tryhackme
10 | https://medium.com/feed/tag/vulnhub
11 | https://medium.com/feed/tag/security
12 | https://medium.com/feed/tag/security
13 | https://medium.com/feed/tag/bug-bounty
14 | https://medium.com/feed/tag/bug-hunter
15 | https://medium.com/feed/tag/info-sec-writeup
16 | https://medium.com/feed/tag/hackthebox-writeup
17 | https://medium.com/feed/tag/ethical-hacking
18 | https://medium.com/feed/tag/api-security
19 | https://medium.com/feed/tag/hackerone
20 | https://medium.com/feed/tag/authentication
21 | https://medium.com/feed/tag/vulnerability
22 | https://medium.com/feed/tag/recon
23 | https://surya-dev.medium.com/feed
24 | https://infosecwriteups.com/feed
25 | https://medium.com/feed/@securitylit
26 | https://medium.com/feed/@tomnomnom
27 | https://medium.com/feed/@cappriciosec
28 | https://medium.com/feed/@302Found
29 | https://medium.com/feed/@newp_th
30 | https://medium.com/feed/@pdelteil
31 | https://ruvlol.medium.com/feed
32 | https://medium.com/@know.0nix/feed
33 | https://medium.com/@bugh4nter/feed
34 | https://seqrity.medium.com/feed
35 | https://vickieli.medium.com/feed
36 | https://medium.com/feed/intigriti
37 | https://medium.com/@intideceukelaire/feed
38 | https://medium.com/@projectdiscovery/feed
39 | https://jonathandata1.medium.com/feed
40 | https://medium.com/@Hacker0x01/feed
41 | https://medium.com/feed/pentesternepal
42 | https://0xjin.medium.com/feed
43 | https://medium.com/@infosecwriteups/feed
44 | https://medium.com/@jhaddix/feed
45 | https://medium.com/@NahamSec/feed
46 | https://orwaatyat.medium.com/feed
47 | https://zseano.medium.com/feed
48 | https://d0nut.medium.com/feed
49 | https://medium.com/feed/towards-aws
50 | https://medium.com/@stackzero/feed


--------------------------------------------------------------------------------
/files/youtube_channels.txt:
--------------------------------------------------------------------------------
1 | Voorivex, UCz4A6ALhUVHuiXzoJrIGc1Q
2 | Nahamsec, UCCZDt7MuC3Hzs6IH4xODLBw


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import requests, pwn, os, psycopg2, time
  2 | import xml.etree.ElementTree as ET
  3 | from datetime import datetime
  4 | from config import *
  5 | from bs4 import BeautifulSoup
  6 | from datetime import datetime
  7 | # Database connection setup
  8 | def get_db_connection():
  9 |     conn = psycopg2.connect(**DB_CONFIG)
 10 |     return conn
 11 | 
 12 | def is_link_processed(conn, table_name, link):
 13 |     with conn.cursor() as cursor:
 14 |         cursor.execute(f"SELECT 1 FROM {table_name} WHERE link = %s", (link,))
 15 |         return cursor.fetchone() is not None
 16 | 
 17 | def save_processed_link(conn, table_name, link, title, pub_date):
 18 |     with conn.cursor() as cursor:
 19 |         cursor.execute(
 20 |             f"INSERT INTO {table_name} (link, title, pub_date) VALUES (%s, %s, %s)",
 21 |             (link, title, pub_date)
 22 |         )
 23 |         conn.commit()
 24 | 
 25 | def fetch_rss_data(url):
 26 |     headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}
 27 |     response = requests.get(url, headers=headers)
 28 | 
 29 |     if response.status_code != 200:
 30 |         pwn.log.error(f"Failed to fetch data from {url}. Status code: {response.status_code}")
 31 |         return []
 32 | 
 33 |     response_content = response.content.decode('utf-8')
 34 | 
 35 |     try:
 36 |         root = ET.fromstring(response_content)
 37 |     except ET.ParseError as e:
 38 |         pwn.log.error(f"Failed to parse XML: {e}")
 39 |         return []
 40 | 
 41 |     namespace = {'atom': 'http://www.w3.org/2005/Atom'}
 42 | 
 43 |     items = []
 44 |     for item in root.findall(".//atom:entry", namespace):
 45 |         title_element = item.find("atom:title", namespace)
 46 |         link_element = item.find("atom:link", namespace)
 47 |         pub_date_element = item.find("atom:published", namespace)
 48 | 
 49 |         title = title_element.text if title_element is not None else 'No Title'
 50 |         link = link_element.attrib['href'] if link_element is not None else 'No Link'
 51 |         pub_date = pub_date_element.text if pub_date_element is not None else 'No Date'
 52 | 
 53 |         items.append({"title": title, "link": link, "pub_date": pub_date})
 54 |         pwn.log.info(f"Video found: {title} - {link}")
 55 | 
 56 |     return items
 57 | 
 58 | def fetch_medium_writeups(file_path):
 59 |     items = []
 60 |     headers = {'User-Agent': 'curl/7.81.0', 'accept': '*/*'}
 61 | 
 62 |     with open(file_path, 'r') as file:
 63 |         urls = file.readlines()
 64 | 
 65 |     for url in urls:
 66 |         url = url.strip()
 67 |         try:
 68 |             response = requests.get(url, headers=headers)
 69 |             if response.status_code == 200:
 70 |                 try:
 71 |                     root = ET.fromstring(response.content)
 72 |                     namespace = {
 73 |                         '': 'http://purl.org/rss/1.0/',
 74 |                         'atom': 'http://www.w3.org/2005/Atom'
 75 |                     }
 76 |                     channel = root.find('channel')
 77 |                     if channel is not None:
 78 |                         items_list = channel.findall('item')
 79 |                         for item in items_list:
 80 |                             title = item.find('title').text if item.find('title') is not None else 'No Title'
 81 |                             link = item.find('link').text if item.find('link') is not None else 'No Link'
 82 |                             pub_date = item.find('pubDate').text if item.find('pubDate') is not None else str(datetime.now())
 83 | 
 84 |                             items.append({
 85 |                                 "title": title,
 86 |                                 "link": link,
 87 |                                 "pub_date": pub_date
 88 |                             })
 89 |                 except ET.ParseError as e:
 90 |                     pwn.log.error(f"Error parsing RSS feed from {url}: {e}")
 91 |             elif response.status_code == 429:
 92 |                 retry_after = int(response.headers.get('Retry-After', 60))  # Default to 60 seconds
 93 |                 pwn.log.info(f"Rate limit exceeded for {url}. Waiting for {retry_after} seconds before retrying...")
 94 |                 time.sleep(retry_after)
 95 |                 response = requests.get(url, headers=headers)
 96 |                 if response.status_code == 200:
 97 |                     try:
 98 |                         root = ET.fromstring(response.content)
 99 |                         namespace = {
100 |                             '': 'http://purl.org/rss/1.0/',
101 |                             'atom': 'http://www.w3.org/2005/Atom'
102 |                         }
103 |                         channel = root.find('channel')
104 |                         if channel is not None:
105 |                             items_list = channel.findall('item')
106 |                             for item in items_list:
107 |                                 title = item.find('title').text if item.find('title') is not None else 'No Title'
108 |                                 link = item.find('link').text if item.find('link') is not None else 'No Link'
109 |                                 pub_date = item.find('pubDate').text if item.find('pubDate') is not None else str(datetime.now())
110 | 
111 |                                 items.append({
112 |                                     "title": title,
113 |                                     "link": link,
114 |                                     "pub_date": pub_date
115 |                                 })
116 |                     except ET.ParseError as e:
117 |                         pwn.log.error(f"Error parsing RSS feed from {url} after retry: {e}")
118 |                 else:
119 |                     pwn.log.error(f"Failed to fetch Medium writeup from {url} after retry. Status code: {response.status_code}")
120 |             elif response.status_code == 404:
121 |                 pwn.log.info(f"URL {url} returned 404. Skipping...")
122 |             else:
123 |                 pwn.log.error(f"Failed to fetch Medium writeup from {url}. Status code: {response.status_code}")
124 |         except Exception as e:
125 |             pwn.log.error(f"Error fetching Medium writeup from {url}: {e}")
126 | 
127 |     pwn.log.info(f"Fetched items: {items}")
128 |     return items
129 | 
130 | def fetch_pentesterland_writeups():
131 |     pentesterland_url = 'https://pentester.land/writeups.json'
132 |     response = requests.get(pentesterland_url)
133 |     if response.status_code == 200:
134 |         data = response.json()
135 |         writeups = []
136 |         for selected_item in data['data']:
137 |             for item in selected_item['Links']:
138 |                 writeup_link = item['Link']
139 |                 writeup_title = item['Title']
140 |                 authors = selected_item['Authors']
141 |                 programs = selected_item['Programs']
142 |                 bugs = selected_item['Bugs']
143 |                 pub_date = selected_item['PublicationDate']
144 |                 added_date = selected_item['AddedDate']
145 |                 writeups.append({
146 |                     "title": writeup_title,
147 |                     "link": writeup_link,
148 |                     "pub_date": pub_date,
149 |                     "authors": authors,
150 |                     "programs": programs,
151 |                     "bugs": bugs,
152 |                     "added_date": added_date
153 |                 })
154 |         return writeups
155 |     return []
156 | 
157 | def send_to_discord(webhook_url, title, link, pub_date, extra_fields=None):
158 |     embed = {
159 |         "title": title,
160 |         "description": f"[{title}]({link})",
161 |         "color": 16777215,
162 |         "fields": [{"name": "Published on", "value": pub_date}]
163 |     }
164 | 
165 |     if extra_fields:
166 |         embed['fields'].extend(extra_fields)
167 |         payload = {"content": None, "embeds": [embed]}
168 |         headers = {"Content-Type": "application/json"}
169 |         response = requests.post(webhook_url, json=payload, headers=headers)
170 |         
171 |     if response.status_code == 204:
172 |         pwn.log.info(f"New item sent to Discord: {title}")
173 |     else:
174 |         pwn.log.info("Failed to send item to Discord.")
175 |         
176 | def setup_database():
177 |     conn = get_db_connection()
178 |     with conn.cursor() as cursor:
179 |         cursor.execute("""
180 |         CREATE TABLE IF NOT EXISTS youtube_videos (
181 |             id SERIAL PRIMARY KEY,
182 |             link TEXT UNIQUE NOT NULL,
183 |             title TEXT,
184 |             pub_date TIMESTAMP
185 |         );
186 |         """)
187 |         cursor.execute("""
188 |         CREATE TABLE IF NOT EXISTS medium_writeups (
189 |             id SERIAL PRIMARY KEY,
190 |             link TEXT UNIQUE NOT NULL,
191 |             title TEXT,
192 |             pub_date TIMESTAMP
193 |         );
194 |         """)
195 |         cursor.execute("""
196 |         CREATE TABLE IF NOT EXISTS pentesterland_writeups (
197 |             id SERIAL PRIMARY KEY,
198 |             link TEXT UNIQUE NOT NULL,
199 |             title TEXT,
200 |             pub_date TIMESTAMP,
201 |             authors TEXT[],
202 |             programs TEXT[],
203 |             bugs TEXT[],
204 |             added_date TIMESTAMP
205 |         );
206 |         """)
207 |         conn.commit()
208 |     conn.close()
209 | 
210 | def main():
211 |     conn = get_db_connection()
212 |     import sys
213 |     nodiscord = False
214 |     if len(sys.argv) > 1 and sys.argv[1] == 'nodiscord':
215 |         nodiscord = True
216 |         if len(sys.argv) > 1 and sys.argv[1] == 'db':
217 |             setup_database()
218 |             pwn.log.info("Database setup completed.")
219 |             return
220 |         
221 |     youtube_file = 'files/youtube_channels.txt'
222 |     medium_file = 'files/lists.txt'
223 | 
224 |     if os.path.exists(youtube_file):
225 |         with open(youtube_file, 'r') as file:
226 |             youtube_channels = file.readlines()
227 | 
228 |         for line in youtube_channels:
229 |             channel_name, channel_id = line.strip().split(',')
230 |             rss_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
231 |             videos = fetch_rss_data(rss_url)
232 |             if videos:
233 |                 for video in videos:
234 |                     video_link = video['link']
235 |                     video_title = video['title']
236 |                     pub_date = video['pub_date']
237 |                     pub_date_dt = datetime.strptime(pub_date, '%Y-%m-%dT%H:%M:%S+00:00')
238 | 
239 |                     if not is_link_processed(conn, 'youtube_videos', video_link):
240 |                         if not nodiscord:
241 |                             send_to_discord(WEB_HOOK, video_title, video_link, pub_date)
242 |                         save_processed_link(conn, 'youtube_videos', video_link, video_title, pub_date)
243 |                         pwn.log.info(f"New YouTube video found: {video_title}")
244 | 
245 |     medium_writeups = fetch_medium_writeups(medium_file)
246 |     for writeup in medium_writeups:
247 |         writeup_link = writeup['link']
248 |         writeup_title = writeup['title']
249 |         pub_date = writeup['pub_date']
250 | 
251 |         if not is_link_processed(conn, 'medium_writeups', writeup_link):
252 |             if not nodiscord:
253 |                 send_to_discord(WEB_HOOK, writeup_title, writeup_link, pub_date)
254 |             save_processed_link(conn, 'medium_writeups', writeup_link, writeup_title, pub_date)
255 |             pwn.log.info(f"New Medium writeup found: {writeup_title}")
256 | 
257 |     pentesterland_writeups = fetch_pentesterland_writeups()
258 |     for writeup in pentesterland_writeups:
259 |         writeup_link = writeup['link']
260 |         writeup_title = writeup['title']
261 |         pub_date = writeup['pub_date']
262 |         authors = writeup['authors']
263 |         programs = writeup['programs']
264 |         bugs = writeup['bugs']
265 |         added_date = writeup['added_date']
266 | 
267 |         if not is_link_processed(conn, 'pentesterland_writeups', writeup_link):
268 |             if not nodiscord:
269 |                 send_to_discord(WEB_HOOK, writeup_title, writeup_link, pub_date, [
270 |                     {"name": "Authors", "value": ", ".join(authors)},
271 |                     {"name": "Programs", "value": ", ".join(programs)},
272 |                     {"name": "Bugs", "value": ", ".join(bugs)},
273 |                     {"name": "Added Date", "value": added_date}
274 |                 ])
275 |             save_processed_link(conn, 'pentesterland_writeups', writeup_link, writeup_title, pub_date)
276 |             pwn.log.info(f"New Pentester.land writeup found: {writeup_title}")
277 | 
278 |     conn.close()
279 | 
280 | if __name__ == '__main__':
281 |     main()
282 | 


--------------------------------------------------------------------------------