├── LICENSE ├── README.md ├── docs_publish_step_1.jpg ├── docs_publish_step_2.PNG ├── docs_publish_step_3.PNG ├── dtm.py └── requirements.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 LordGhostX 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Docs To MD CLI 2 | 3 | Welcome to the codebase for the Docs TO MD CLI. 4 | 5 | A Command Line Interface(CLI) tool that converts Google Docs files to MarkDown 6 | 7 | ## Usage 8 | 9 | ### Publish your Google Docs file to the web 10 | 11 | * STEP 1: Select `Publish to the web` tab found under the file menu as seen in the image below: 12 | 13 | ![publish_step_1](docs_publish_step_1.jpg) 14 | 15 | 16 | * STEP 2: Click on the "Publish" button: 17 | 18 | ![publish_step_2](docs_publish_step_2.PNG) 19 | 20 | 21 | * STEP 3: Copy the Google Docs publish link under the link tab : 22 | 23 | ![publish_step_3](docs_publish_step_3.PNG) 24 | 25 | 26 | ### Convert Google Docs to Markdown 27 | 28 | ```bash 29 | $ python dtm.py 30 | $ python dtm.py 31 | ``` 32 | 33 | ### Convert Google Docs to MarkDown and Saving Images Locally 34 | 35 | ```bash 36 | $ python dtm.py local 37 | ``` 38 | 39 | ### Convert Google Docs to MarkDown and Saving Images on Google Servers 40 | 41 | ```bash 42 | $ python dtm.py docs 43 | ``` 44 | 45 | ## Author 46 | 47 | * [LordGhostX](https://twitter.com/LordGhostX) - Everyone's Friendly Neighbourhood Ghost 48 | 49 | ## License 50 | 51 | * MIT 52 | -------------------------------------------------------------------------------- /docs_publish_step_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_1.jpg -------------------------------------------------------------------------------- /docs_publish_step_2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_2.PNG -------------------------------------------------------------------------------- /docs_publish_step_3.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LordGhostX/docs-to-md-cli/4d638fdc8edf3a42f763ec6006bb53debedae4f2/docs_publish_step_3.PNG -------------------------------------------------------------------------------- /dtm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import codecs 4 | import secrets 5 | import wget 6 | import requests 7 | from bs4 import BeautifulSoup 8 | from markdownify import markdownify 9 | 10 | 11 | def show_help(): 12 | print("python dtm.py ") 13 | print("python dtm.py ") 14 | 15 | 16 | def get_title(soup): 17 | r_title = soup.find("title").text.strip() 18 | title = "".join([i if i.isalnum() else "-" for i in r_title.lower()]) 19 | try: 20 | os.mkdir(title) 21 | except: 22 | pass 23 | return r_title, title 24 | 25 | 26 | def clean_href(html): 27 | def parse_href(href): 28 | return href.split("?q=")[1].split("&sa")[0] 29 | 30 | for i in html.find_all("a"): 31 | try: 32 | html = BeautifulSoup(str(html).replace(i["href"].replace( 33 | "&", "&"), parse_href(i["href"])), "html.parser") 34 | except: 35 | pass 36 | 37 | return str(html) 38 | 39 | 40 | def download_images(title, html): 41 | html = BeautifulSoup(html, "html.parser") 42 | for i in html.find_all("img"): 43 | image_name = secrets.token_urlsafe(15).lower() + ".png" 44 | image_path = os.path.join(title, image_name) 45 | wget.download(i["src"], image_path) 46 | html = str(html).replace(i["src"], image_name) 47 | return str(html) 48 | 49 | 50 | def upload_devto(title, html): 51 | html = BeautifulSoup(html, "html.parser") 52 | for i in html.find_all("img"): 53 | image_path = os.path.join(title, "image.png") 54 | wget.download(i["src"], image_path) 55 | image_url = requests.post( 56 | url="https://dev.to/image_uploads", 57 | files={"image[]": open(image_path, "rb")}, 58 | headers={ 59 | "Cookie": "_Devto_Forem_Session=6f0e677ee8a6b618dc5938599647973d", 60 | "X-CSRF-Token": "4QHd9XxW/f4naFxb5eU6On6BaFLvOTB6swIftLEmYlZgmtnHewRkirUgRG/tv8zhAV3ZS1/qWdGQmgLIPdwuaw==" 61 | } 62 | ).json()["links"][0] 63 | os.remove(image_path) 64 | html = str(html).replace(i["src"], image_url) 65 | return str(html) 66 | 67 | 68 | def main(docs_url, method): 69 | r = requests.get(docs_url) 70 | 71 | soup = BeautifulSoup(r.text, "html.parser") 72 | r_title, title = get_title(soup) 73 | html = soup.find("div", {"id": "contents"}).find("div") 74 | 75 | html = clean_href(html) 76 | if method == "local": 77 | html = download_images(title, html) 78 | if method == "devto": 79 | html = upload_devto(title, html) 80 | 81 | markdown = markdownify(html, heading_style="ATX").replace("\_", "_") 82 | with codecs.open(os.path.join(title, "index.md"), "w", "utf-8") as md: 83 | md.write(markdown) 84 | 85 | print(f"\nSuccessfully converted '{r_title}' from Docs to MarkDown") 86 | 87 | 88 | if __name__ == "__main__": 89 | if len(sys.argv) < 2: 90 | show_help() 91 | exit() 92 | else: 93 | if len(sys.argv) == 2: 94 | method = "local" 95 | else: 96 | method = sys.argv[2].lower() 97 | if method in ["local", "docs", "devto"]: 98 | main(sys.argv[1], method) 99 | else: 100 | show_help() 101 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | markdownify 3 | wget 4 | bs4 5 | --------------------------------------------------------------------------------