├── .gitignore
├── assets
    └── imgs
    │   └── medium.jpg
├── external.py
├── requirements.txt
├── LICENSE.txt
├── README.md
├── medium.py
└── functions.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *_blogs/
2 | __pycache__/
3 | 


--------------------------------------------------------------------------------
/assets/imgs/medium.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/0xAmmar/Medium-Miner/HEAD/assets/imgs/medium.jpg


--------------------------------------------------------------------------------
/external.py:
--------------------------------------------------------------------------------
1 | class bcolors:
2 |     OKCYAN = "\033[96m"
3 |     OKGREEN = "\033[92m"
4 |     WARNING = "\033[93m"
5 |     FAIL = "\033[91m"
6 |     ENDC = "\033[0m"
7 |     BOLD = "\033[1m"
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.12.2
 2 | bs4==0.0.1
 3 | certifi==2023.7.22
 4 | charset-normalizer==3.2.0
 5 | click==8.1.6
 6 | cssselect==1.2.0
 7 | feedfinder2==0.0.4
 8 | filelock==3.12.2
 9 | google==3.0.0
10 | idna==3.4
11 | jieba3k==0.35.1
12 | joblib==1.3.2
13 | lxml==4.9.3
14 | markdownify==0.11.6
15 | nltk==3.8.1
16 | Pillow==10.0.0
17 | python-dateutil==2.8.2
18 | PyYAML==6.0.1
19 | regex==2023.8.8
20 | requests==2.31.0
21 | requests-file==1.5.1
22 | sgmllib3k==1.0.0
23 | six==1.16.0
24 | soupsieve==2.4.1
25 | tinysegmenter==0.3
26 | tldextract==3.4.4
27 | tqdm==4.66.1
28 | urllib3==2.0.4
29 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Medium Miner
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Medium Miner
 2 | 
 3 | > A Medium Scraper That You Need!
 4 | 
 5 | <img src="https://raw.githubusercontent.com/xLe0x/Medium-Miner/main/assets/imgs/medium.jpg" />
 6 | 
 7 | 
 8 | ## 🚩 Table of Contents
 9 | 
10 | - Why Should I Use It?
11 | - Install
12 | - How To Use
13 | - Preview
14 | 
15 | ### Why Should I Use It?
16 | 1. It's Free.
17 | 2. Reading Medium Blogs Offline.
18 | 3. You Can Read On Your Favourite Markdown Reader for me it's Obsidian.
19 | 4. Easy To Use.
20 | 5. It's Free.
21 | 
22 | ### Install:
23 | ```bash
24 | git clone https://github.com/xLe0x/Medium-Miner.git
25 | ```
26 | ```bash
27 | cd Medium-Miner
28 | ```
29 | **Install required Dependencies:**
30 | ```bash
31 | pip3 install -r requirements.txt
32 | ```
33 | 
34 | **Show Commands:**
35 | ```bash
36 | python3 medium.py -h
37 | ```
38 | 
39 | ### How To Use:
40 | 
41 | **To search for a specific topic (word) on medium**
42 | ```bash
43 | python3 medium.py -w "how to lose?" -c 20
44 | ```
45 | **To list and download all author's blogs**
46 | ```bash
47 | python3 medium.py -a ammarmosaber
48 | ```
49 | 
50 | ### Preview
51 | 
52 | https://github.com/xLe0x/Medium-Miner/assets/112324284/fe885e86-d69a-4369-9b63-3ab861d3ffd7
53 | 
54 | This program is licensed under the [MIT](https://github.com/xLe0x/Medium-Miner/blob/main/LICENSE.txt) © [xLe0x](https://github.com/xle0x).
55 | 


--------------------------------------------------------------------------------
/medium.py:
--------------------------------------------------------------------------------
 1 | from functions import (
 2 |     searchGoogle,
 3 |     SearchAuthorMedium,
 4 | )
 5 | from external import bcolors
 6 | import argparse,sys
 7 | 
 8 | 
 9 | parser = argparse.ArgumentParser(description="Medium Miner.")
10 | 
11 | word_group = parser.add_mutually_exclusive_group()
12 | word_group.add_argument(
13 |     "-w",
14 |     "--word",
15 |     type=str,
16 |     help="Search by word in google example: python3 medium.py -w xss",
17 | )
18 | 
19 | parser.add_argument(
20 |     "-c",
21 |     "--count",
22 |     type=int,
23 |     help="How many output you want? example: python3 medium.py -w idor -c 10",
24 | )
25 | 
26 | parser.add_argument(
27 |     "-a",
28 |     "--author",
29 |     type=str,
30 |     help="Search by author in medium example: python3 medium.py -a ammarmosaber",
31 | )
32 | 
33 | parser.add_argument("-v", action="store_true", help="Prints the program version")
34 | 
35 | args = parser.parse_args()
36 | 
37 | 
38 | try:
39 |     if args.word:
40 |         if not args.count:
41 |             args.count = 10
42 |         searchGoogle(str(args.word), args.count)
43 | 
44 |     elif args.author:
45 |         SearchAuthorMedium(args.author)
46 | 
47 |     elif args.v:
48 |         print("1.0 Beta")
49 | 
50 |     else:
51 |         print()
52 |         parser.print_help()
53 |         print()
54 | 
55 | except ValueError:
56 |     print(f"{bcolors.FAIL}Username is wrong or there is no blogs!{bcolors.ENDC}")
57 | except:
58 |     print(f"{bcolors.FAIL}Something went wrong please try again!{bcolors.ENDC}")
59 | 


--------------------------------------------------------------------------------
/functions.py:
--------------------------------------------------------------------------------
  1 | import requests, markdownify, os
  2 | from re import match
  3 | from googlesearch import search
  4 | from external import bcolors
  5 | from bs4 import BeautifulSoup
  6 | 
  7 | 
  8 | def save_MD_files(blog, i):
  9 |     print(f"{bcolors.OKCYAN}Started to download {i}!{bcolors.ENDC}")
 10 |     try:
 11 |         with open(
 12 |             f'{i} - {blog[int(i)]["title"]}.md',
 13 |             "x",
 14 |         ) as mdfile:
 15 |             mdfile.write(markdownify.markdownify(blog[int(i)]["content"]))
 16 | 
 17 |     except FileExistsError:
 18 |         print(f"#{i} file is already downloaded")
 19 | 
 20 | 
 21 | def download_MD_files(blog, user_choice, isNumber, isComma, isAll):
 22 |     if isComma:
 23 |         if user_choice.split(",")[-1] == "":
 24 |             print("you suck")
 25 |         elif "," in user_choice:
 26 |             for i in user_choice.split(","):
 27 |                 save_MD_files(blog, user_choice)
 28 | 
 29 |     if isNumber:
 30 |         save_MD_files(blog, user_choice)
 31 | 
 32 |     if isAll:
 33 |         for i, _ in enumerate(blog):
 34 |             save_MD_files(blog, int(i))
 35 | 
 36 | 
 37 | def getResults(url):
 38 |     blog_list = []
 39 |     r = requests.get(url)
 40 |     soup = BeautifulSoup(r.content, features="xml")
 41 |     blogs = soup.findAll("item")
 42 |     for a in blogs:
 43 |         title = a.find("title").text
 44 |         link = a.find("link").text
 45 |         content = a.find("content:encoded")
 46 |         content = content.text if content is not None else ""
 47 |         blog = {"title": title, "link": link, "content": content}
 48 |         blog_list.append(blog)
 49 |     return blog_list
 50 | 
 51 | 
 52 | def listBlogs(blogs):
 53 |     if blogs:
 54 |         print()
 55 |         print("Here is a list of all the blogs:")
 56 |         for i, blog in enumerate(blogs):
 57 |             if blog["content"] == "":
 58 |                 print(
 59 |                     f"{bcolors.BOLD}{i} - {blog['title']}{bcolors.WARNING} (Member-Only Story){bcolors.ENDC}{bcolors.ENDC}"
 60 |                 )
 61 |             else:
 62 |                 print(f"{bcolors.BOLD}{i} - {blog['title']}{bcolors.ENDC}")
 63 | 
 64 |     else:
 65 |         raise ValueError()
 66 | 
 67 | 
 68 | def checkUserInput(blogs):
 69 |     print()
 70 |     user_choice = input(
 71 |         f"{bcolors.OKGREEN}(A number,numbers (comma separated) or 'all' to downlaod): {bcolors.ENDC}"
 72 |     )
 73 |     print()
 74 |     if user_choice.isnumeric():
 75 |         download_MD_files(blogs, user_choice, isNumber=True, isAll=False, isComma=False)
 76 |     elif "," in user_choice:
 77 |         download_MD_files(blogs, user_choice, isComma=True, isNumber=False, isAll=False)
 78 |     elif user_choice == "all":
 79 |         download_MD_files(blogs, user_choice, isAll=True, isComma=False, isNumber=False)
 80 |     else:
 81 |         raise Exception()
 82 | 
 83 | 
 84 | def checkForDir(author):
 85 |     if getResults(f"https://medium.com/@{author}/feed"):
 86 |         if not os.path.exists(f"{author}_blogs"):
 87 |             os.mkdir(f"{author}_blogs")
 88 | 
 89 |         os.chdir(f"{author}_blogs")
 90 | 
 91 | 
 92 | def searchGoogle(userInput, count):
 93 |     def extractUsername(url):
 94 |         try:
 95 |             return match(r"https:\/\/(.*)\/@(.*)/", url).group(2)
 96 |         except:
 97 |             return Exception()
 98 | 
 99 |     query = f"site:medium.com {userInput}"
100 | 
101 |     for result in search(query, num=count, stop=count, pause=1):
102 |         if "@" in result:
103 |             print()
104 |             print(f"{bcolors.BOLD}The Author: {extractUsername(result)} {bcolors.ENDC}")
105 |             print(f"{bcolors.OKGREEN}{result} {bcolors.ENDC}")
106 | 
107 | 
108 | def SearchAuthorMedium(author):
109 |     data = getResults(f"https://medium.com/@{author}/feed")
110 |     listBlogs(data)
111 |     checkForDir(author)
112 |     checkUserInput(data)
113 | 


--------------------------------------------------------------------------------