├── README.md ├── pylibgen.py ├── requirements.txt └── settings.py /README.md: -------------------------------------------------------------------------------- 1 | # PyLibGen 2 | A python3 script to download books from libgen.io 3 | 4 | ### Install 5 | 6 | You need python3 to run the script. 7 | To install the required dependencies: 8 | 9 | ``pip install -r requirements.txt`` 10 | 11 | ### Usage 12 | 13 | ``` 14 | usage: pylibgen.py [-h] [-t | -a | -p | -y] search [search ...] 15 | 16 | positional arguments: 17 | search search term 18 | 19 | optional arguments: 20 | -h, --help show this help message and exit 21 | -t, --title get books from the specified title 22 | -a, --author get books from the specified author 23 | -p, --publisher get books from the specified publisher 24 | -y, --year get books from the specified year 25 | ``` 26 | 27 | ### Settings 28 | 29 | The default download path for the books is set to the directory from where you run the script. You can easily tweak this and some other options changing the variable's values in ``settings.py``. 30 | 31 | ### Screenshot 32 | 33 | ![](https://i.imgur.com/FCLF4OQ.jpg) 34 | 35 | **Happy Reading!** 36 | -------------------------------------------------------------------------------- /pylibgen.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | import os 4 | from urllib import request 5 | from urllib.parse import urlencode 6 | from tabulate import tabulate 7 | from bs4 import BeautifulSoup 8 | from settings import * 9 | 10 | 11 | def getSearchResults(term, page, column): 12 | params = urlencode({'req': term, 'column': column, 'page': page}) 13 | url = 'http://libgen.io/search.php?&%s' % params 14 | 15 | source = request.urlopen(url) 16 | soup = BeautifulSoup(source, 'lxml') 17 | if page == 1: 18 | books_found = re.search(r'(\d+) books found', str(soup)) 19 | print(books_found.group().upper()) 20 | n_books = int(books_found.groups()[0]) 21 | 22 | page_books = soup.find_all('tr') 23 | page_books = page_books[3:-1] # Ignore 3 first and the last label. 24 | books = page_books 25 | if page == 1: 26 | return(books, n_books) 27 | else: 28 | return(books) 29 | 30 | 31 | def formatBooks(books, page): 32 | fmt_books = [] 33 | books_mirrors = [] # List of dics with complete titles and mirrors 34 | cont_book = (page - 1)*25 + 1 35 | for rawbook in books: 36 | 37 | book_attrs = rawbook.find_all('td') 38 | 39 | if len(book_attrs) >= 14: 40 | authors = [a.text for a in book_attrs[1].find_all('a')] 41 | author = ', '.join(authors[:N_AUTHORS]) 42 | author = author[:MAX_CHARS_AUTHORS] 43 | 44 | title = book_attrs[2].find(title=True).text 45 | tinytitle = title[:MAX_CHARS_TITLE] 46 | 47 | publisher = book_attrs[3].text[:MAX_CHARS_PUBLISHER] 48 | year = book_attrs[4].text 49 | lang = book_attrs[6].text[:2] # Show only 2 first characters 50 | size = book_attrs[7].text 51 | ext = book_attrs[8].text 52 | mirror_list = {} # Dictionary of all the four mirrors 53 | for i in range(10, 15): 54 | mirror = i - 10 55 | if book_attrs[i].a: 56 | mirror_list[mirror] = book_attrs[i].a.attrs['href'] 57 | 58 | book = (str(cont_book), author, tinytitle, publisher, 59 | year, lang, ext, size) # Start at 1 60 | 61 | book_mirrors = {'title': title, 'mirrors': mirror_list} 62 | books_mirrors.append(book_mirrors) 63 | cont_book += 1 64 | fmt_books.append(book) 65 | 66 | return(fmt_books, books_mirrors) 67 | 68 | 69 | def selectBook(books, mirrors, page, n_books): 70 | headers = ['#', 'Author', 'Title', 'Publisher', 71 | 'Year', 'Lang', 'Ext', 'Size'] 72 | 73 | print(tabulate(books[(page - 1) * 25:page * 25], headers)) 74 | # Detect when all the books are found. 75 | no_more_matches = n_books == len(books) 76 | 77 | if no_more_matches: 78 | print("\nEND OF LIST. NO MORE BOOKS FOUND") 79 | 80 | while True: 81 | if no_more_matches: 82 | elec = input('Type # of book to download or q to quit: ') 83 | else: 84 | elec = input( 85 | '\nType # of book to download, q to quit or just press Enter to see more matches: ') 86 | 87 | if elec.isnumeric(): 88 | choice = int(elec) - 1 89 | if choice < len(books) and choice >= 0: # Selection 90 | title = '{}.{}'.format( 91 | mirrors[choice]['title'], books[choice][-2]) 92 | 93 | if False: 94 | ''' This is the default mirror. 95 | In the case we can get the other mirrors to work, 96 | change True to a boolean variable defined in settings.py 97 | that defines if the user want to have a option to 98 | select from the different mirrors. ''' 99 | DownloadBook.default_mirror( 100 | mirrors[choice]['mirrors'][0], title) 101 | else: 102 | number_of_mirrors = len(mirrors[choice]['mirrors']) 103 | print_list = ( 104 | "#1: Mirror bookdescr.org (default)", 105 | "#2: Mirror libgen.me", 106 | "#3: Mirror library1.org", 107 | "#4: Mirror b-ok.cc", 108 | "#5: Mirror bookfi.net") 109 | 110 | while SHOW_MIRRORS: 111 | print("\nMirrors Available: \n") 112 | ava_mirrors = list(mirrors[choice]['mirrors'].keys()) 113 | for mir in ava_mirrors: 114 | print(print_list[mir]) 115 | 116 | option = input( 117 | '\nType # of mirror to start download or q to quit: ') 118 | 119 | if option.isnumeric() and int(option) > 0 and int(option) <= number_of_mirrors: 120 | if int(option) == 1: 121 | DownloadBook.default_mirror( 122 | mirrors[choice]['mirrors'][0], title) 123 | pass 124 | elif int(option) == 2: 125 | DownloadBook.second_mirror( 126 | mirrors[choice]['mirrors'][1], title) 127 | pass 128 | elif int(option) == 3: 129 | DownloadBook.third_mirror( 130 | mirrors[choice]['mirrors'][2], title) 131 | pass 132 | elif int(option) == 4: 133 | DownloadBook.fourth_mirror( 134 | mirrors[choice]['mirrors'][3], title) 135 | pass 136 | elif int(option) == 5: 137 | DownloadBook.fifth_mirror( 138 | mirrors[choice]['mirrors'][4], title) 139 | 140 | return(False) 141 | 142 | elif option == 'q' or option == 'Q': # Quit 143 | return(False) 144 | else: 145 | print("Not a valid option.") 146 | continue 147 | 148 | return(False) 149 | 150 | else: 151 | print("Couldn't fetch the book #{}".format(str(choice + 1))) 152 | continue 153 | 154 | elif elec == 'q' or elec == 'Q': # Quit 155 | return(False) 156 | 157 | elif not elec: 158 | if no_more_matches: 159 | print('Not a valid option') 160 | continue 161 | else: 162 | return(True) 163 | 164 | else: 165 | print('Not a valid option.') 166 | 167 | 168 | class DownloadBook(): 169 | user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11' 170 | accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 171 | accept_charset = 'ISO-8859-1,utf-8;q=0.7,*;q=0.3' 172 | accept_lang = 'en-US,en;q=0.8' 173 | connection = 'keep-alive' 174 | 175 | headers = { 176 | 'User-Agent': user_agent, 177 | 'Accept': accept, 178 | 'Accept-Charset': accept_charset, 179 | 'Accept-Language': accept_lang, 180 | 'Connection': connection, 181 | } 182 | 183 | def save_book(download_link, file_name): 184 | if os.path.exists(DOWNLOAD_PATH) and os.path.isdir(DOWNLOAD_PATH): 185 | bad_chars = '\/:*?"<>|' 186 | for char in bad_chars: 187 | file_name = file_name.replace(char, " ") 188 | print('Downloading...') 189 | path = '{}/{}'.format(DOWNLOAD_PATH, file_name) 190 | request.urlretrieve(download_link, filename=path) 191 | print('Book downloaded to {}'.format(os.path.abspath(path))) 192 | elif os.path.isfile(DOWNLOAD_PATH): 193 | print('The download path is not a directory. Change it in settings.py') 194 | else: 195 | print('The download path does not exist. Change it in settings.py') 196 | 197 | def default_mirror(link, filename): 198 | '''This is the default (and first) mirror to download. 199 | The base of this mirror is http://booksdescr.org''' 200 | req = request.Request(link, headers=DownloadBook.headers) 201 | source = request.urlopen(req) 202 | soup = BeautifulSoup(source, 'lxml') 203 | 204 | for a in soup.find_all('a'): 205 | if a.text == 'Libgen': 206 | download_url = a.attrs['href'] 207 | DownloadBook.save_book(download_url, filename) 208 | 209 | 210 | def second_mirror(link, filename): 211 | '''This is the second mirror to download. 212 | The base of this mirror is https://libgen.me''' 213 | req = request.Request(link, headers=DownloadBook.headers) 214 | source = request.urlopen(req) 215 | soup = BeautifulSoup(source, 'lxml') 216 | mother_url = "https://libgen.me" 217 | 218 | for a in soup.find_all('a'): 219 | if a.text == 'Get from vault': 220 | next_link = a.attrs['href'] 221 | next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers) 222 | next_source = request.urlopen(next_req) 223 | next_soup = BeautifulSoup(next_source, 'lxml') 224 | for next_a in next_soup.find_all('a'): 225 | if next_a.text == 'Get': 226 | item_url = next_a.attrs['href'] 227 | DownloadBook.save_book(item_url, filename) 228 | 229 | def third_mirror(link, filename): 230 | '''This is the third mirror to download. 231 | The base of this mirror is http://library1.org''' 232 | req = request.Request(link, headers=DownloadBook.headers) 233 | source = request.urlopen(req) 234 | soup = BeautifulSoup(source, 'lxml') 235 | 236 | for a in soup.find_all('a'): 237 | if a.text == 'GET': 238 | download_url = a.attrs['href'] 239 | DownloadBook.save_book(download_url, filename) 240 | 241 | def fourth_mirror(link, filename): 242 | '''This is the fourth mirror to download. 243 | The base of this mirror is https://b-ok.cc''' 244 | req = request.Request(link, headers=DownloadBook.headers) 245 | source = request.urlopen(req) 246 | soup = BeautifulSoup(source, 'lxml') 247 | mother_url = "https://b-ok.cc" 248 | 249 | for a in soup.find_all('a'): 250 | if a.text == 'DOWNLOAD': 251 | next_link = a.attrs['href'] 252 | next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers) 253 | next_source = request.urlopen(next_req) 254 | next_soup = BeautifulSoup(next_source, 'lxml') 255 | for next_a in next_soup.find_all('a'): 256 | if ' Download ' in next_a.text: 257 | item_url = next_a.attrs['href'] 258 | DownloadBook.save_book(mother_url + item_url, filename) 259 | 260 | def fifth_mirror(link, filename): 261 | '''This is the fifth mirror to download. 262 | The base of this mirror is https://bookfi.net''' 263 | req = request.Request(link, headers=DownloadBook.headers) 264 | source = request.urlopen(req) 265 | soup = BeautifulSoup(source, 'lxml') 266 | 267 | for a in soup.find_all('a'): 268 | if 'Скачать' in a.text: 269 | download_url = a.attrs['href'] 270 | DownloadBook.save_book(download_url, filename) 271 | 272 | 273 | 274 | if __name__ == '__main__': 275 | parser = argparse.ArgumentParser() 276 | column = parser.add_mutually_exclusive_group() 277 | parser.add_argument('search', nargs='+', help='search term') 278 | column.add_argument('-t', '--title', action='store_true', 279 | help='get books from the specified title') 280 | column.add_argument('-a', '--author', action='store_true', 281 | help='get books from the specified author') 282 | column.add_argument('-p', '--publisher', action='store_true', 283 | help='get books from the specified publisher') 284 | column.add_argument('-y', '--year', action='store_true', 285 | help='get books from the specified year') 286 | 287 | args = parser.parse_args() 288 | 289 | search_term = ' '.join(args.search) 290 | search_arguments = [(args.title, 'title'), 291 | (args.author, 'author'), 292 | (args.publisher, 'publisher'), 293 | (args.year, 'year')] 294 | 295 | sel_column = 'def' 296 | for arg in search_arguments: 297 | if arg[0]: 298 | sel_column = arg[1] 299 | 300 | books = [] 301 | mirrors = [] 302 | page = 1 303 | get_next_page = True 304 | 305 | while get_next_page: 306 | if page == 1: 307 | raw_books, n_books = getSearchResults(search_term, page, sel_column) 308 | else: 309 | raw_books = getSearchResults(search_term, page, sel_column) 310 | 311 | 312 | if raw_books: 313 | new_books, new_mirrors = formatBooks(raw_books, page) 314 | books += new_books 315 | mirrors += new_mirrors 316 | get_next_page = selectBook(books, mirrors, page, n_books) 317 | page += 1 318 | elif raw_books == [] and n_books != 0: # 0 matches in the last page 319 | get_next_page = selectBook(books, mirrors, page - 1, n_books) 320 | else: # 0 matches total 321 | get_next_page = False 322 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml==4.1.0 2 | beautifulsoup4==4.4.1 3 | tabulate==0.7.5 4 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | DOWNLOAD_PATH = "." # Where do you want the books to be downloaded. Default is the script directory. 2 | N_AUTHORS = 1 # Maximum of authors displayed. 3 | MAX_CHARS_AUTHORS = 25 # Maximum characters displayed for the author. Change according to N_AUTHORS. 4 | MAX_CHARS_TITLE = 50 # Maximum characters displayed for the book title 5 | MAX_CHARS_PUBLISHER = 20 # Maximum characters displayed for the publisher. 6 | SHOW_MIRRORS = True # Set to True or False depending if you want the program to show the download mirrors. 7 | --------------------------------------------------------------------------------