├── LICENSE
├── README.md
├── docs_publish_step_1.jpg
├── docs_publish_step_2.PNG
├── docs_publish_step_3.PNG
├── dtm.py
└── requirements.txt


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 LordGhostX
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Docs To MD CLI
 2 | 
 3 | Welcome to the codebase for the Docs TO MD CLI.
 4 | 
 5 | A Command Line Interface(CLI) tool that converts Google Docs files to MarkDown
 6 | 
 7 | ## Usage
 8 | 
 9 | ### Publish your Google Docs file to the web
10 | 
11 | * STEP 1: Select `Publish to the web` tab found under the file menu as seen in the image below:
12 | 
13 | ![publish_step_1](docs_publish_step_1.jpg)
14 | 
15 | 
16 | * STEP 2: Click on the "Publish" button:
17 | 
18 | ![publish_step_2](docs_publish_step_2.PNG)
19 | 
20 | 
21 | * STEP 3: Copy the Google Docs publish link under the link tab :
22 | 
23 | ![publish_step_3](docs_publish_step_3.PNG)
24 | 
25 | 
26 | ### Convert Google Docs to Markdown
27 | 
28 | ```bash
29 | $ python dtm.py <docs URL>
30 | $ python dtm.py <docs URL> <local|docs>
31 | ```
32 | 
33 | ### Convert Google Docs to MarkDown and Saving Images Locally
34 | 
35 | ```bash
36 | $ python dtm.py <docs URL> local
37 | ```
38 | 
39 | ### Convert Google Docs to MarkDown and Saving Images on Google Servers
40 | 
41 | ```bash
42 | $ python dtm.py <docs URL> docs
43 | ```
44 | 
45 | ## Author
46 | 
47 | * [LordGhostX](https://twitter.com/LordGhostX) - Everyone's Friendly Neighbourhood Ghost
48 | 
49 | ## License
50 | 
51 | * MIT
52 | 


--------------------------------------------------------------------------------
/docs_publish_step_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_1.jpg


--------------------------------------------------------------------------------
/docs_publish_step_2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_2.PNG


--------------------------------------------------------------------------------
/docs_publish_step_3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_3.PNG


--------------------------------------------------------------------------------
/dtm.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import codecs
  4 | import secrets
  5 | import wget
  6 | import requests
  7 | from bs4 import BeautifulSoup
  8 | from markdownify import markdownify
  9 | 
 10 | 
 11 | def show_help():
 12 |     print("python dtm.py <docs URL>")
 13 |     print("python dtm.py <docs URL> <local|docs|devto>")
 14 | 
 15 | 
 16 | def get_title(soup):
 17 |     r_title = soup.find("title").text.strip()
 18 |     title = "".join([i if i.isalnum() else "-" for i in r_title.lower()])
 19 |     try:
 20 |         os.mkdir(title)
 21 |     except:
 22 |         pass
 23 |     return r_title, title
 24 | 
 25 | 
 26 | def clean_href(html):
 27 |     def parse_href(href):
 28 |         return href.split("?q=")[1].split("&sa")[0]
 29 | 
 30 |     for i in html.find_all("a"):
 31 |         try:
 32 |             html = BeautifulSoup(str(html).replace(i["href"].replace(
 33 |                 "&", "&amp;"), parse_href(i["href"])), "html.parser")
 34 |         except:
 35 |             pass
 36 | 
 37 |     return str(html)
 38 | 
 39 | 
 40 | def download_images(title, html):
 41 |     html = BeautifulSoup(html, "html.parser")
 42 |     for i in html.find_all("img"):
 43 |         image_name = secrets.token_urlsafe(15).lower() + ".png"
 44 |         image_path = os.path.join(title, image_name)
 45 |         wget.download(i["src"], image_path)
 46 |         html = str(html).replace(i["src"], image_name)
 47 |     return str(html)
 48 | 
 49 | 
 50 | def upload_devto(title, html):
 51 |     html = BeautifulSoup(html, "html.parser")
 52 |     for i in html.find_all("img"):
 53 |         image_path = os.path.join(title, "image.png")
 54 |         wget.download(i["src"], image_path)
 55 |         image_url = requests.post(
 56 |             url="https://dev.to/image_uploads",
 57 |             files={"image[]": open(image_path, "rb")},
 58 |             headers={
 59 |                 "Cookie": "_Devto_Forem_Session=6f0e677ee8a6b618dc5938599647973d",
 60 |                 "X-CSRF-Token": "4QHd9XxW/f4naFxb5eU6On6BaFLvOTB6swIftLEmYlZgmtnHewRkirUgRG/tv8zhAV3ZS1/qWdGQmgLIPdwuaw=="
 61 |             }
 62 |         ).json()["links"][0]
 63 |         os.remove(image_path)
 64 |         html = str(html).replace(i["src"], image_url)
 65 |     return str(html)
 66 | 
 67 | 
 68 | def main(docs_url, method):
 69 |     r = requests.get(docs_url)
 70 | 
 71 |     soup = BeautifulSoup(r.text, "html.parser")
 72 |     r_title, title = get_title(soup)
 73 |     html = soup.find("div", {"id": "contents"}).find("div")
 74 | 
 75 |     html = clean_href(html)
 76 |     if method == "local":
 77 |         html = download_images(title, html)
 78 |     if method == "devto":
 79 |         html = upload_devto(title, html)
 80 | 
 81 |     markdown = markdownify(html, heading_style="ATX").replace("\_", "_")
 82 |     with codecs.open(os.path.join(title, "index.md"), "w", "utf-8") as md:
 83 |         md.write(markdown)
 84 | 
 85 |     print(f"\nSuccessfully converted '{r_title}' from Docs to MarkDown")
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     if len(sys.argv) < 2:
 90 |         show_help()
 91 |         exit()
 92 |     else:
 93 |         if len(sys.argv) == 2:
 94 |             method = "local"
 95 |         else:
 96 |             method = sys.argv[2].lower()
 97 |         if method in ["local", "docs", "devto"]:
 98 |             main(sys.argv[1], method)
 99 |         else:
100 |             show_help()
101 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | markdownify
3 | wget
4 | bs4
5 | 


--------------------------------------------------------------------------------