├── .gitignore
├── README.md
├── endless-library.py
├── example_config.json
├── integration_tests
├── feral_gods_book.html
├── generative_ai_book.html
└── test_download_book.py
├── pyproject.toml
├── requirements.txt
├── send-book.py
└── src
├── anna_list.py
├── book.py
├── constants.py
├── goodreads_list.py
├── io_utils.py
├── menu.py
├── scaper.py
└── searcher.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | config.json
3 | downloads
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # endless-library
2 |
3 | endless-library is a tool to easily download and send books to a Kindle, featuring a search menu, Goodreads list imports, and an intuitive CLI menu.
4 |
5 | ## Features
6 |
7 | - Download (and send) books from Goodreads link
8 | - Profile lists such as 'Want to Read', 'Read' (e.g. https://www.goodreads.com/review/list/12345678?shelf=to-read)
9 | - **NOTE: Account must be public**
10 | - Listopia lists (e.g. https://www.goodreads.com/list/show/1.Best_Books_Ever)
11 | - Book series (e.g. https://www.goodreads.com/series/73758-the-hunger-games)
12 | - Download (and send) books by searching
13 |
14 | ## Installation
15 | 1. Extract all files into a folder (can delete the .gitignore)
16 | 2. Open a terminal in the folder
17 | 3. Install the required dependencies using
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 | 4. Rename `example_config.json` to `config.json`, and populate with the proper information
22 | - "mode"
23 | - "download" to only download books
24 | - **NOTE: if using "download" mode, you do not need to configure the email settings**
25 | - "kindle" to download books, and email to Kindle
26 | - "email_sender"
27 | - the email address being used to send the emails
28 | - **NOTE: this email address must be under your [approved email addresses](https://www.amazon.com/gp/help/customer/display.html?nodeId=GX9XLEVV8G4DB28H)**
29 | - "email_receiver"
30 | - the email address of the Kindle device you want to send e-books to
31 | - **NOTE: this email address can be found [here](https://www.amazon.com/sendtokindle/email)**
32 | - "email_password"
33 | - the app password of the Gmail account under "email_sender"
34 | - **NOTE: an app password can be generated [here](https://support.google.com/accounts/answer/185833?hl=en)**
35 |
36 | ## Usage
37 |
38 | Run `endless-library.py` by double clicking, or by opening a terminal in the root folder and typing
39 |
40 | ```bash
41 | python endless-library.py
42 | ```
43 |
44 | ## Screenshots
45 |
46 |
47 |
48 | ## Known Issues / Shortcomings
49 | - Some .epub files will be rejected when sent to Kindle. I haven't determined what causes some .epubs to be unsupported, but when an error occurs, you will receive an email titled "There was a problem with the document(s) you sent to Kindle." This is not a shortcoming of this program, rather, the Kindle's .epub processing.
50 | - Email sending only supports Gmail accounts.
51 |
52 | ## Future Features
53 | - Email support outside of Gmail accounts.
54 | - Documented testing
55 |
56 | ## Send epub to kindle
57 |
58 | If you only want to send an epub to your Kindle, you can use the `send-book.py` script.
59 |
60 | First, configure email settings in `config.json` as described above.
61 |
62 | ```bash
63 | python send-book.py "path/to/your/book.epub"
64 | ```
65 |
--------------------------------------------------------------------------------
/endless-library.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from src.menu import Menu
3 | from src.io_utils import IOUtils
4 |
5 | if __name__ == "__main__":
6 | while True:
7 | print("===== Endless Library =====")
8 | print("[1] Search Mode")
9 | print("[2] List Mode")
10 | print("[3] Exit")
11 |
12 | choice = input("Enter your choice [1/2/3]: ")
13 |
14 | if choice == "1":
15 | Menu.book_search_menu()
16 | elif choice == "2":
17 | Menu.goodreads_menu()
18 | elif choice == "3":
19 | print("Exiting the program. Goodbye!")
20 | sys.exit("Goodbye!")
21 | else:
22 | print("Invalid choice. Please enter 1, 2, or 3.")
--------------------------------------------------------------------------------
/example_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "mode" : "download",
3 | "email_sender" : "example@domain.com",
4 | "email_receiver" : "example@kindle.com",
5 | "email_password" : "aaaa bbbb cccc dddd"
6 | }
--------------------------------------------------------------------------------
/integration_tests/feral_gods_book.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/integration_tests/generative_ai_book.html:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/integration_tests/test_download_book.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | from bs4 import BeautifulSoup
4 |
5 | from src.io_utils import IOUtils
6 | from src.book import Book
7 |
8 | def tests_download_book():
9 | with open("integration_tests/feral_gods_book.html", "r") as file:
10 | soup = BeautifulSoup(file.read())
11 | book = Book(soup, "anna")
12 | io_utils = IOUtils()
13 | assert os.path.exists(book.filepath) is False
14 | io_utils.download_book(book)
15 | assert os.path.exists(book.filepath) is True
16 | os.remove(book.filepath)
17 |
18 |
19 | def tests_book_dead_libgenli_link():
20 | with open("integration_tests/generative_ai_book.html", "r") as file:
21 | soup = BeautifulSoup(file.read())
22 | book = Book(soup, "anna")
23 | io_utils = IOUtils()
24 | assert os.path.exists(book.filepath) is False
25 | io_utils.download_book(book)
26 | assert os.path.exists(book.filepath) is True
27 | os.remove(book.filepath)
28 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | pythonpath = [
3 | "."
4 | ]
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bbrown430/endless-library/7d008ee2ce9a793f55fd407f2ab3916548cfeeb3/requirements.txt
--------------------------------------------------------------------------------
/send-book.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import re
4 | import sys
5 |
6 | from src.io_utils import IOUtils
7 |
8 | class Book:
9 | def __init__(self, filepath):
10 | restricted_characters = r'[\/:*?"<>|]'
11 | filename_formatted = re.sub(restricted_characters, '', os.path.basename(filepath))[:123].replace('.epub', '')
12 | self.title = filename_formatted
13 | self.attachment_name = filename_formatted + ".epub"
14 | self.filepath = filepath
15 |
16 | def main():
17 | parser = argparse.ArgumentParser(description='Send book to Kindle')
18 | parser.add_argument('file', type=str, help='File to send')
19 | args = parser.parse_args()
20 | if not args.file.endswith('.epub'):
21 | print('File must be an EPUB')
22 | sys.exit(1)
23 | book = Book(args.file)
24 | IOUtils().send_email(book)
25 |
26 | if __name__ == '__main__':
27 | main()
28 |
--------------------------------------------------------------------------------
/src/anna_list.py:
--------------------------------------------------------------------------------
1 | from src.scaper import Scraper
2 | from src.io_utils import IOUtils
3 | from src.book import Book
4 |
5 | class AnnaList(Scraper):
6 | def __init__(self):
7 | super().__init__()
8 | self.base_url="https://annas-archive.org/search?index=&q="
9 |
10 | # formats the search url from search term
11 | def search_formatter(self, search_term):
12 | formatted_search = search_term.replace(" ", "+")
13 | url_ending = "&ext=epub&src=lgrs&sort=&lang=en" #TODO file type and language preference
14 | full_url = self.base_url + formatted_search + url_ending
15 |
16 | return full_url
17 |
18 | # processes returned list to filter and sort
19 | def list_processor(self, book_list):
20 | # filters out books with no title
21 | book_list = [book for book in book_list if book.title != "no_title"]
22 |
23 | # filters out unknown genres
24 | book_list = [book for book in book_list if book.genre != "unknown"]
25 |
26 | bad_terms = ["summary", "conversation starters", "summaries"]
27 |
28 | # filters out bad terms
29 | book_list = [book for book in book_list if all(bad_term not in book.title.lower() for bad_term in bad_terms)]
30 |
31 | # filters out bad terms
32 | book_list = [book for book in book_list if all(bad_term not in book.author.lower() for bad_term in bad_terms)]
33 |
34 | # filters out larger than 10mb
35 | book_list = [book for book in book_list if float(book.size[:-2]) <= 10]
36 |
37 | #sorts list by filesize
38 | sorted_list = sorted(book_list, key=lambda book: float(book.size[:-2]))
39 |
40 | return sorted_list
41 |
42 | # scrapes list of books from returned search, given search_term
43 | def scrape(self, search_term):
44 | url = self.search_formatter(search_term)
45 | while True:
46 |
47 | soup = IOUtils.cook_soup(url)
48 | books_html = soup.find_all('div', class_="h-[110px] flex flex-col justify-center")
49 | books_html.pop()
50 |
51 | books = []
52 | scope = 10
53 |
54 | for book_html in books_html[:scope]:
55 | book = Book(book_html, "anna")
56 | books.append(book)
57 |
58 | if books:
59 | books = self.list_processor(books)
60 | return books
--------------------------------------------------------------------------------
/src/book.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | class Book:
4 | def __init__(self, book_html, website):
5 | self.parse_html(book_html, website)
6 |
7 | def set_directory(self, list_name):
8 | restricted_characters = r'[\/:*?"<>|]'
9 | formatted_list_name = re.sub(restricted_characters, '', list_name)
10 | self.filepath = f"downloads/{formatted_list_name}/{self.filename}"
11 |
12 | def filepath_prep(self, title, list_name):
13 | restricted_characters = r'[\/:*?"<>|]'
14 | if title is None:
15 | title = "no_title"
16 | if ":" in title:
17 | split_title = title.split(":")
18 | title = split_title[0]
19 | self.title = re.sub(restricted_characters, '', title)
20 | temp_author = re.sub(restricted_characters, '', self.author)
21 | self.author = temp_author
22 | self.filename = self.title + " - " + self.author + ".epub"
23 | self.attachment_name = self.title + ".epub"
24 | #self.attachment_name = attachment_name.replace("'", "")
25 | if list_name is not None:
26 | self.set_directory(list_name)
27 | else:
28 | self.filepath = f"downloads/{self.filename}"
29 |
30 | # parses html to determine book metadata
31 | def parse_html(self, book_html, website):
32 | if website == "anna":
33 | title = book_html.find('h3').string
34 | link = book_html.find('a')["href"]
35 | self.md5 = link.split("/")[2]
36 | author = book_html.find("div", class_="max-lg:line-clamp-[2] lg:truncate leading-[1.2] lg:leading-[1.35] max-lg:text-sm italic").string
37 | metadata = book_html.find("div", class_="line-clamp-[2] leading-[1.2] text-[10px] lg:text-xs text-gray-500").string
38 | split_metadata = metadata.split(",")
39 | # There may be more than one command separated language option
40 | epub_index = _get_epub_index(split_metadata)
41 | # If there is more than one language then this value will not be accurate
42 | self.language = split_metadata[0]
43 | self.size = split_metadata[epub_index + 2].strip()
44 | self.genre = split_metadata[epub_index + 3].split("(")[1].split(")")[0]
45 | if website == "profile":
46 | title = book_html.select_one('td.field.title a[title]').text.strip()
47 | if "\n" in title:
48 | split_title = title.split("\n")
49 | title = split_title[0]
50 | author = book_html.select_one('td.field.author a[href]').text
51 | if website == "listopia":
52 | title = book_html.find('span', {'itemprop': 'name'}).text
53 | author = book_html.find('span', {'itemprop': 'author'}).text
54 | filtered_author = author.replace("\n", "")
55 | author = filtered_author
56 | if "(" in title:
57 | split_title = title.split(" (")[0]
58 | title = split_title
59 | if "(" in author:
60 | split_author = filtered_author.split(" (")[0]
61 | author = split_author
62 | if website == "series":
63 | title = book_html.find('span', {'itemprop': 'name'}).text
64 | author = book_html.find('span', {'itemprop': 'author'}).text
65 | filtered_author = author.replace("\n", "")
66 | author = filtered_author
67 | if ", " in author:
68 | author_split = author.split(", ")
69 | author = author_split[1] + " " + author_split[0]
70 | if " Jr." in author:
71 | temp_author = author.replace(" Jr.", "")
72 | author = temp_author
73 | self.author = author
74 | self.filepath_prep(title, None)
75 |
76 | def update_metadata(self, abs_book, list_name):
77 | title = abs_book.title
78 | self.author = abs_book.author
79 | self.filepath_prep(title, list_name)
80 |
81 | # returns a string fomatted "'book' by 'author'"
82 | def string(self):
83 | return f"{self.title} by {self.author}"
84 |
85 |
86 | def _get_epub_index(split_metadata):
87 | epub_index = 0
88 | while True:
89 | if "epub" in split_metadata[epub_index]:
90 | break
91 | epub_index += 1
92 | if epub_index >= len(split_metadata):
93 | raise Exception("Non-epub book found")
94 | return epub_index
95 |
--------------------------------------------------------------------------------
/src/constants.py:
--------------------------------------------------------------------------------
1 | DEBUG = False
2 |
--------------------------------------------------------------------------------
/src/goodreads_list.py:
--------------------------------------------------------------------------------
1 | from src.scaper import Scraper
2 | from src.book import Book
3 | from src.io_utils import IOUtils
4 | import validators
5 | import math
6 |
7 | class GoodreadsList(Scraper):
8 | def __init__(self):
9 | super().__init__()
10 |
11 | def link_checker(self, list_url):
12 | if validators.url(list_url):
13 | if "/review/" in list_url:
14 | page = 1
15 | formatted_url = list_url + f"&page={page}&per_page=10"
16 | return "profile", formatted_url
17 | elif "/list/show" in list_url:
18 | return "listopia", list_url
19 | elif "/series/" in list_url:
20 | return "series", list_url
21 | else:
22 | return None, False
23 |
24 |
25 | def how_many_books(self):
26 | while True:
27 | user_input = IOUtils.input_menu(f"This list contains {self.book_count} books. How many would you like to download?: ")
28 | if user_input is not None:
29 | try:
30 | user_count = int(user_input)
31 | except ValueError:
32 | print("Invalid input. Please enter a numeric value.")
33 | continue
34 | if 1 <= user_count <= self.book_count:
35 | self.user_count = user_count
36 | return
37 | else:
38 | print("Invalid input. Please enter a number within the valid range.")
39 |
40 | # scrapes a list of books from a goodreads list, given the list url
41 | def scrape(self, list_url):
42 | page = 1
43 |
44 | type, url = self.link_checker(list_url)
45 |
46 | if not url:
47 | return None
48 |
49 | soup = IOUtils.cook_soup(url)
50 |
51 | goodreads_books = []
52 | if type == "profile":
53 | # determine number of books on shelf
54 | want_to_read_span = soup.find('span', class_='h1Shelf')
55 | count_text = want_to_read_span.find('span', class_='greyText').text
56 | want_to_read_span = soup.find('span', class_='h1Shelf')
57 | count_text = want_to_read_span.find('span', class_='greyText').text
58 | self.book_count = int(count_text.replace('(', '').replace(')', ''))
59 | # get list name
60 | raw_list_name = soup.text
61 | filtered_list_name = raw_list_name.replace("\n", "")
62 | name_split = filtered_list_name.split(" (")
63 | self.list_name = name_split[0]
64 |
65 | self.how_many_books()
66 | pages_needed = math.ceil(self.user_count / 10)
67 |
68 | while page <= pages_needed:
69 | book_table = soup.find("tbody", {"id": "booksBody"})
70 | book_list = book_table.findAll("tr")
71 | if page*10 > self.user_count:
72 | page_amount = self.user_count % 10
73 | temp_book_list = book_list[:page_amount]
74 | book_list = temp_book_list
75 | for book_html in book_list:
76 | goodreads_book = Book(book_html, "profile")
77 | goodreads_book.set_directory(self.list_name)
78 | goodreads_books.append(goodreads_book)
79 |
80 | page +=1
81 | url_with_attrs = list_url + f"&page={page}&per_page=10"
82 | soup = IOUtils.cook_soup(url_with_attrs)
83 |
84 | if type == "listopia":
85 | # get book count
86 | book_count_container = soup.find("div", class_="stacked")
87 | book_string = book_count_container.text.strip().split(' books')[0].strip()
88 | book_count_string = book_string.replace(",", "")
89 | self.book_count = int(book_count_string)
90 | # get list title
91 | self.list_name = soup.find("h1", class_="gr-h1 gr-h1--serif").text.strip()
92 |
93 | self.how_many_books()
94 | pages_needed = math.ceil(self.user_count / 100)
95 |
96 | while page <= pages_needed:
97 | book_list = soup.find_all("tr")
98 | if page*100 > self.user_count:
99 | page_amount = self.user_count % 100
100 | temp_book_list = book_list[:page_amount]
101 | book_list = temp_book_list
102 |
103 | for book_html in book_list:
104 | goodreads_book = Book(book_html, "listopia")
105 | goodreads_book.set_directory(self.list_name)
106 | goodreads_books.append(goodreads_book)
107 |
108 | page +=1
109 | url_with_attrs = list_url + f"&page={page}"
110 | soup = IOUtils.cook_soup(url_with_attrs)
111 |
112 | if type == "series":
113 | # get book count
114 | book_count_container = soup.find("div", class_="responsiveSeriesHeader__subtitle u-paddingBottomSmall").text
115 | book_count = int(book_count_container.split(" ")[0])
116 |
117 | # get list name
118 | raw_list_name = soup.text
119 | filtered_list_name = raw_list_name.replace("\n", "")
120 | name_split = filtered_list_name.split(" by")
121 | self.list_name = name_split[0]
122 |
123 | # get books
124 | book_list = soup.find_all("div", class_="listWithDividers__item")
125 | main_series_count = 0
126 |
127 | for book_html in book_list:
128 | if main_series_count < book_count:
129 | entry_number_container = book_html.find("h3").text
130 | entry_number_float = float(entry_number_container.split("Book ")[1])
131 | if entry_number_float % 1 == 0: #determine if main series entry
132 | if entry_number_float != 0:
133 | main_series_count += 1
134 | goodreads_book = Book(book_html, "series")
135 | goodreads_book.set_directory(self.list_name)
136 | goodreads_books.append(goodreads_book)
137 |
138 |
139 |
140 | return goodreads_books
141 |
--------------------------------------------------------------------------------
/src/io_utils.py:
--------------------------------------------------------------------------------
1 | import ssl
2 | import sys
3 | import json
4 | import smtplib
5 | from email.mime.multipart import MIMEMultipart
6 | from email.mime.base import MIMEBase
7 | from email import encoders
8 | import os
9 | import time
10 | import requests
11 | import urllib.request
12 | import urllib.parse
13 | from bs4 import BeautifulSoup
14 |
15 | from src.constants import DEBUG
16 |
17 | class IOUtils:
18 | # an adaptable input menu with back and exit functionality
19 | @staticmethod
20 | def input_menu(input_message):
21 | while True:
22 | user_input = input(input_message)
23 | if user_input.lower() == "exit":
24 | sys.exit("Goodbye!")
25 | elif user_input.lower() == "back":
26 | return None
27 | else:
28 | return user_input
29 |
30 | @staticmethod
31 | def duplicate_checker(filename):
32 | directory = "downloads"
33 |
34 | all_files = []
35 |
36 | # Walk through all directories and subdirectories
37 | for path, subdirs, files in os.walk(directory):
38 | for name in files:
39 | all_files.append(name)
40 |
41 | if filename in all_files:
42 | return True
43 | else:
44 | return False
45 |
46 | # returns HTML from a website into a parseable format
47 | @staticmethod
48 | def cook_soup(url, cdn=None):
49 | headers = {
50 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
51 | }
52 | error_count = 0
53 | while True:
54 | response = requests.get(url, headers=headers)
55 |
56 | if response.status_code == 200:
57 | soup = BeautifulSoup(response.text, 'html.parser')
58 | return soup
59 | elif response.status_code == 500:
60 | error_count += 1
61 | print("Internal Server Error! Retrying in 2 seconds...")
62 | time.sleep(2)
63 | elif response.status_code == 429:
64 | error_count += 1
65 | print("Moving too fast! Retrying in 10 seconds...")
66 | time.sleep(10)
67 | else:
68 | error_count += 1
69 | # if DEBUG:
70 | # print(f"url={url}")
71 | print(f"Failed to retrieve the page. Status code: {response.status_code}")
72 | time.sleep(1)
73 |
74 | if error_count == 5:
75 | print("Failed to retrieve the page after 5 attempts")
76 | return None
77 |
78 | @staticmethod
79 | def get_cdn():
80 | cdn = LimitedRotatingBookCDN(
81 | ["https://libgen.is", "https://libgen.rs", "https://libgen.st"]
82 | )
83 | return cdn
84 |
85 | # downloads book from library.lol server
86 | def download_book(self, book, cdn=None):
87 | headers = {
88 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
89 | }
90 |
91 | if cdn is None:
92 | cdn = IOUtils().get_cdn()
93 | dir_path = os.path.dirname(book.filepath)
94 |
95 | if not os.path.exists(dir_path):
96 | os.makedirs(dir_path)
97 |
98 | soup = None
99 | while True:
100 | book_url = cdn.get_book_url(book)
101 | print(f"Attempting to download {book.title} from {cdn.cur_url}...")
102 | try:
103 | soup = self.cook_soup(book_url)
104 | except requests.exceptions.RequestException as e:
105 | print(f"Failed to download {book.title} from {cdn.cur_url} due to: {e}.")
106 | try:
107 | cdn.next()
108 | except StopIteration:
109 | break
110 | continue
111 | if soup is None:
112 | try:
113 | cdn.next()
114 | except StopIteration:
115 | break
116 | continue
117 | break
118 | if soup is not None:
119 | download_link_container = soup.find("a")
120 | indirect_download = False
121 | if download_link_container is not None:
122 | download_link = download_link_container["href"]
123 | else:
124 | print(f"Download failed.")
125 | return False
126 | # libgen fiction mirror page
127 | if download_link == '/dbdumps/':
128 | indirect_download = True
129 | download_link = soup.find_all('ul', class_="record_mirrors")[0].find_all('a')[1]["href"]
130 | # Libgen mirror page for nonfiction
131 | elif "setlang" in download_link:
132 | download_link = soup.find_all('a', string="Libgen.li")[0]["href"]
133 | indirect_download = True
134 | max_retries = 5
135 | retries = 0
136 | while True:
137 | try:
138 | if DEBUG:
139 | print(f"Download link: {download_link}")
140 | if indirect_download:
141 | # libgen.is has an indrect download to libgen.li
142 | # for libgen.li links
143 | response = requests.get(download_link, headers=headers)
144 | soup2 = BeautifulSoup(response.text, 'html.parser')
145 | download_link_container = soup2.find_all("a", href=True, string="GET")
146 | if download_link_container:
147 | download_link = download_link_container[0]["href"]
148 | # libgen.li has a partial link for the download
149 | if "https://" not in download_link:
150 | download_link = urllib.parse.urlparse(response.url)._replace(path=download_link, query='').geturl()
151 | else:
152 | # libgen.li link is not available so try for IPFS link
153 | ipfs_link = get_ipfs_link(soup)
154 | ipfs_response = requests.get(ipfs_link, headers=headers)
155 | ipfs_soup = BeautifulSoup(ipfs_response.text, 'html.parser')
156 | ipfs_download_link_container = ipfs_soup.find_all("a", string="GET")
157 | if ipfs_download_link_container:
158 | download_link = ipfs_download_link_container[0]["href"]
159 | else:
160 | return False # Failure
161 | request = urllib.request.Request(download_link, headers=headers)
162 | print(f"Downloading {book.title} from {download_link}...")
163 | with urllib.request.urlopen(request) as response:
164 | with open(book.filepath, "wb") as file:
165 | file.write(response.read())
166 | print(f".epub file downloaded successfully to: {book.filepath}")
167 | return True
168 |
169 | except Exception as e:
170 | print(f"Download failed due to: {e}.")
171 | if retries < max_retries:
172 | retries += 1
173 | print(f"Retrying download for {book.title}...")
174 | else:
175 | return False
176 | else:
177 | print(f"Download failed.")
178 | return False
179 |
180 |
181 | # sends the book as an attachment to the kindle library
182 | def send_email(self, book):
183 | config = json.load(open("config.json"))
184 | email_sender = config["email_sender"]
185 | email_password = config["email_password"]
186 | email_receiver = config["email_receiver"]
187 |
188 | subject = f"Sending {book.title} to Kindle"
189 |
190 | try:
191 | em = MIMEMultipart()
192 | em["From"] = email_sender
193 | em["To"] = email_receiver
194 | em["Subject"] = subject
195 |
196 | file_path = book.filepath
197 | attachment = open(file_path, "rb")
198 |
199 | part = MIMEBase("application", "octet-stream")
200 | part.set_payload(attachment.read())
201 | encoders.encode_base64(part)
202 | part.add_header("Content-Disposition", f"attachment; filename={book.attachment_name}")
203 |
204 | em.attach(part)
205 |
206 | context = ssl.create_default_context()
207 |
208 | with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
209 | smtp.login(email_sender, email_password)
210 | smtp.sendmail(email_sender, email_receiver, em.as_string())
211 | print(f"{book.title} successfully emailed to Kindle.")
212 | except smtplib.SMTPException as e:
213 | print(f"Error sending email: {e}.")
214 | except Exception as e:
215 | print(f"An unexpected error occurred: {e}.")
216 |
217 |
218 | class LimitedRotatingBookCDN:
219 | """A rotating CDN for downloading books from multiple sources"""
220 | def __init__(self, urls):
221 | if isinstance(urls, str):
222 | urls = [urls]
223 | self.urls = urls
224 | self.url_index = 0
225 | self.cur_url = self.urls[self.url_index]
226 |
227 | def next(self):
228 | self.url_index += 1
229 | if self.url_index >= len(self.urls):
230 | raise StopIteration
231 | else:
232 | self.cur_url = self.urls[self.url_index]
233 | return self.cur_url
234 |
235 | def get_url(self, suffix=None):
236 | if suffix is None:
237 | return self.cur_url
238 | return f"{self.cur_url}/{suffix}"
239 |
240 | def __len__(self):
241 | return len(self.urls)
242 |
243 | def get_book_url(self, book):
244 | if DEBUG:
245 | print("Book genre:", book.genre)
246 | if book.genre == "non-fiction":
247 | url = self.get_url(f"book/index.php?md5={book.md5}")
248 | elif book.genre == "fiction":
249 | cdn_url = self.get_url("fiction")
250 | if not cdn_url.endswith("/"):
251 | cdn_url += "/"
252 | url = cdn_url + book.md5
253 | else:
254 | url = self.get_url()
255 |
256 | if DEBUG:
257 | print(f"get_book_url url: {url}")
258 | return url
259 |
260 |
261 | def get_ipfs_link(soup):
262 | for anchor in soup.find_all('a'):
263 | if "IPFS" in anchor.text or "IPFS" in anchor['href']:
264 | ipfs_link = anchor['href']
265 | return ipfs_link
266 |
--------------------------------------------------------------------------------
/src/menu.py:
--------------------------------------------------------------------------------
1 | from src.anna_list import AnnaList
2 | from src.goodreads_list import GoodreadsList
3 | from src.searcher import Searcher
4 | from src.io_utils import IOUtils
5 |
6 | class Menu:
7 | @staticmethod
8 | def mission_report(failed_downloads, goodread_list_length):
9 | print("----------------------------------------------")
10 | if len(failed_downloads) > 0:
11 | print(f"{len(failed_downloads)}/{goodread_list_length} failed downloads:")
12 | for book in failed_downloads:
13 | print(f"\t{book.string()}")
14 | else:
15 | print("No failed downloads!")
16 |
17 | # menu flow when inputting a goodreads list
18 | @staticmethod
19 | def goodreads_menu():
20 | while True:
21 | list_input = IOUtils.input_menu("Enter a Goodreads list (type 'exit' to exit, 'back' to go back): ")
22 | if list_input is not None: #allow for flow back
23 | goodreads_list = GoodreadsList()
24 | goodreads_books = goodreads_list.scrape(list_url=list_input)
25 | if goodreads_books is not None:
26 | failed_downloads = []
27 | for i, goodreads_book in enumerate(goodreads_books): # loops through each book in goodreads list
28 | print(f"Book {i+1}/{len(goodreads_books)} ---- {goodreads_book.string()}")
29 | if not IOUtils.duplicate_checker(goodreads_book.filename):
30 | search_term = f"{goodreads_book.title} {goodreads_book.author}"
31 | anna_list = AnnaList()
32 | anna_list = anna_list.scrape(search_term)
33 | if anna_list:
34 | for book in anna_list:
35 | book.update_metadata(goodreads_book, goodreads_list.list_name)
36 | searcher = Searcher()
37 | success = searcher.automated_search(anna_list)
38 | if not success:
39 | print(f"Unable to download {goodreads_book.title} :(")
40 | failed_downloads.append(goodreads_book)
41 | else:
42 | print("Book not found! Skipping...")
43 | failed_downloads.append(goodreads_book)
44 | else:
45 | print("Book already exists in downloads. Skipping...")
46 |
47 | Menu.mission_report(failed_downloads, len(goodreads_books))
48 | else:
49 | print("Unable to scrpae Goodreads list! Make sure the account linked is not private.")
50 | if list_input is None:
51 | break
52 |
53 | # menu flow when inputting a singular book
54 | @staticmethod
55 | def book_search_menu():
56 | while True:
57 | search_term = IOUtils.input_menu("Search for a book (type 'exit' to exit, 'back' to go back): ")
58 | if search_term is not None:
59 | anna_list = AnnaList()
60 | anna_list = anna_list.scrape(search_term)
61 | if anna_list:
62 | searcher = Searcher()
63 | searcher.interactive_search(anna_list)
64 | else:
65 | print("No results! Try another search.")
66 | if search_term is None:
67 | break
68 |
--------------------------------------------------------------------------------
/src/scaper.py:
--------------------------------------------------------------------------------
1 | class Scraper:
2 | def scrape(self):
3 | raise NotImplementedError("Subclasses must implement the scrape method")
4 |
--------------------------------------------------------------------------------
/src/searcher.py:
--------------------------------------------------------------------------------
1 | from src.io_utils import IOUtils
2 | import os
3 | import json
4 |
5 | class Searcher:
6 | def __init__(self):
7 | self.setup()
8 |
9 | def setup(self):
10 | config_file_path = "config.json"
11 | if os.path.isfile(config_file_path):
12 | config = json.load(open(config_file_path))
13 | self.mode = config["mode"]
14 | allowed_modes = ["kindle", "download"]
15 | if self.mode not in allowed_modes:
16 | print('Error: "mode" not set correctly in config.json. Mode must be "download" or "kindle". Defaulting to "download" mode.')
17 | self.mode = "download"
18 | if self.mode == "kindle":
19 | try:
20 | email_sender = config["email_sender"]
21 | email_password = config["email_password"]
22 | email_receiver = config["email_receiver"]
23 | except Exception:
24 | print('Error reading email settings. Double check the config.json and README.md. Defaulting to "download" mode.')
25 | else:
26 | print('Error: config.json does not exist. Defaulting to "download" mode.')
27 | self.mode = "download"
28 |
29 | # automatically grabs book from title and author
30 | def automated_search(self, anna_list):
31 | io_utils = IOUtils()
32 | cdn = io_utils.get_cdn()
33 | anna_length = len(anna_list)
34 | # loops through all anna search results
35 | for i, book in enumerate(anna_list):
36 | if i==0:
37 | print(f"Downloading {book.title} by {book.author} ({book.size})")
38 | else:
39 | print(f"Attempt {i+1}: Downloading {book.title} by {book.author} ({book.size})")
40 |
41 | successful = io_utils.download_book(book, cdn)
42 | if successful:
43 | if self.mode == "kindle":
44 | io_utils.send_email(book)
45 | return True
46 | if i+1 == anna_length: # all sources failed
47 | return False
48 |
49 | # formats metadata into properly formatted string
50 | def menu_formatter(self, anna_list):
51 | self.title_limit = 50
52 | max_title_length = max(len(book.title) for book in anna_list)
53 | self.max_author_length = max(len(book.author) for book in anna_list)
54 |
55 | if max_title_length >= self.title_limit:
56 | self.max_title_len = self.title_limit
57 | bumper = 3
58 | else:
59 | self.max_title_len = max_title_length
60 | bumper = 0
61 |
62 | for book in anna_list:
63 | title_len = len(book.title)
64 | if title_len > self.max_title_len:
65 | display_title = book.title[:self.max_title_len] + "..."
66 | else:
67 | title_padding = ' ' * (self.max_title_len - title_len + bumper)
68 | display_title = book.title + title_padding
69 | author_padding = ' ' * (self.max_author_length - len(book.author))
70 | display_author = book.author + author_padding
71 | book.display_string = f"{display_title} / {display_author} / {book.size}"
72 |
73 | # allows user to select a book from a list of entries
74 | def interactive_search(self, anna_list):
75 | io_utils = IOUtils()
76 | cdn = io_utils.get_cdn()
77 | self.menu_formatter(anna_list)
78 | if self.max_title_len < self.title_limit:
79 | title_padding = self.max_title_len - 5
80 | else:
81 | title_padding = self.max_title_len - 2
82 |
83 | print(f"[#] Title {(title_padding) * ' '}/ Author {(self.max_author_length - 6) * ' '}/ Size")
84 | for i, book in enumerate(anna_list):
85 | if i < 9:
86 | print(f"{[i+1]} {book.display_string}")
87 | else:
88 | print(f"{[i+1]} {book.display_string}")
89 | while True:
90 | book_number = IOUtils.input_menu("Enter the number of the book you want to select (type 'exit' to exit, 'back' to go back): ")
91 | if book_number is not None:
92 | try:
93 | user_choice = int(book_number)
94 | except ValueError:
95 | print("Invalid input. Please enter a numeric value.")
96 | continue
97 | if 1 <= user_choice <= len(anna_list):
98 | selected_book = anna_list[user_choice - 1]
99 | successful = io_utils.download_book(selected_book, cdn)
100 | if successful:
101 | if self.mode == "kindle":
102 | io_utils.send_email(selected_book)
103 | break
104 | else:
105 | print("Download failed. Try a different result.")
106 | else:
107 | print("Invalid input. Please enter a number within the valid range.")
108 | if book_number is None:
109 | break
110 |
--------------------------------------------------------------------------------