├── README.md
├── pylibgen.py
├── requirements.txt
└── settings.py
/README.md:
--------------------------------------------------------------------------------
1 | # PyLibGen
2 | A python3 script to download books from libgen.io
3 |
4 | ### Install
5 |
6 | You need python3 to run the script.
7 | To install the required dependencies:
8 |
9 | ``pip install -r requirements.txt``
10 |
11 | ### Usage
12 |
13 | ```
14 | usage: pylibgen.py [-h] [-t | -a | -p | -y] search [search ...]
15 |
16 | positional arguments:
17 | search search term
18 |
19 | optional arguments:
20 | -h, --help show this help message and exit
21 | -t, --title get books from the specified title
22 | -a, --author get books from the specified author
23 | -p, --publisher get books from the specified publisher
24 | -y, --year get books from the specified year
25 | ```
26 |
27 | ### Settings
28 |
29 | The default download path for the books is set to the directory from where you run the script. You can easily tweak this and some other options changing the variable's values in ``settings.py``.
30 |
31 | ### Screenshot
32 |
33 | 
34 |
35 | **Happy Reading!**
36 |
--------------------------------------------------------------------------------
/pylibgen.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import re
3 | import os
4 | from urllib import request
5 | from urllib.parse import urlencode
6 | from tabulate import tabulate
7 | from bs4 import BeautifulSoup
8 | from settings import *
9 |
10 |
11 | def getSearchResults(term, page, column):
12 | params = urlencode({'req': term, 'column': column, 'page': page})
13 | url = 'http://libgen.io/search.php?&%s' % params
14 |
15 | source = request.urlopen(url)
16 | soup = BeautifulSoup(source, 'lxml')
17 | if page == 1:
18 | books_found = re.search(r'(\d+) books found', str(soup))
19 | print(books_found.group().upper())
20 | n_books = int(books_found.groups()[0])
21 |
22 | page_books = soup.find_all('tr')
23 | page_books = page_books[3:-1] # Ignore 3 first and the last
label.
24 | books = page_books
25 | if page == 1:
26 | return(books, n_books)
27 | else:
28 | return(books)
29 |
30 |
31 | def formatBooks(books, page):
32 | fmt_books = []
33 | books_mirrors = [] # List of dics with complete titles and mirrors
34 | cont_book = (page - 1)*25 + 1
35 | for rawbook in books:
36 |
37 | book_attrs = rawbook.find_all('td')
38 |
39 | if len(book_attrs) >= 14:
40 | authors = [a.text for a in book_attrs[1].find_all('a')]
41 | author = ', '.join(authors[:N_AUTHORS])
42 | author = author[:MAX_CHARS_AUTHORS]
43 |
44 | title = book_attrs[2].find(title=True).text
45 | tinytitle = title[:MAX_CHARS_TITLE]
46 |
47 | publisher = book_attrs[3].text[:MAX_CHARS_PUBLISHER]
48 | year = book_attrs[4].text
49 | lang = book_attrs[6].text[:2] # Show only 2 first characters
50 | size = book_attrs[7].text
51 | ext = book_attrs[8].text
52 | mirror_list = {} # Dictionary of all the four mirrors
53 | for i in range(10, 15):
54 | mirror = i - 10
55 | if book_attrs[i].a:
56 | mirror_list[mirror] = book_attrs[i].a.attrs['href']
57 |
58 | book = (str(cont_book), author, tinytitle, publisher,
59 | year, lang, ext, size) # Start at 1
60 |
61 | book_mirrors = {'title': title, 'mirrors': mirror_list}
62 | books_mirrors.append(book_mirrors)
63 | cont_book += 1
64 | fmt_books.append(book)
65 |
66 | return(fmt_books, books_mirrors)
67 |
68 |
69 | def selectBook(books, mirrors, page, n_books):
70 | headers = ['#', 'Author', 'Title', 'Publisher',
71 | 'Year', 'Lang', 'Ext', 'Size']
72 |
73 | print(tabulate(books[(page - 1) * 25:page * 25], headers))
74 | # Detect when all the books are found.
75 | no_more_matches = n_books == len(books)
76 |
77 | if no_more_matches:
78 | print("\nEND OF LIST. NO MORE BOOKS FOUND")
79 |
80 | while True:
81 | if no_more_matches:
82 | elec = input('Type # of book to download or q to quit: ')
83 | else:
84 | elec = input(
85 | '\nType # of book to download, q to quit or just press Enter to see more matches: ')
86 |
87 | if elec.isnumeric():
88 | choice = int(elec) - 1
89 | if choice < len(books) and choice >= 0: # Selection
90 | title = '{}.{}'.format(
91 | mirrors[choice]['title'], books[choice][-2])
92 |
93 | if False:
94 | ''' This is the default mirror.
95 | In the case we can get the other mirrors to work,
96 | change True to a boolean variable defined in settings.py
97 | that defines if the user want to have a option to
98 | select from the different mirrors. '''
99 | DownloadBook.default_mirror(
100 | mirrors[choice]['mirrors'][0], title)
101 | else:
102 | number_of_mirrors = len(mirrors[choice]['mirrors'])
103 | print_list = (
104 | "#1: Mirror bookdescr.org (default)",
105 | "#2: Mirror libgen.me",
106 | "#3: Mirror library1.org",
107 | "#4: Mirror b-ok.cc",
108 | "#5: Mirror bookfi.net")
109 |
110 | while SHOW_MIRRORS:
111 | print("\nMirrors Available: \n")
112 | ava_mirrors = list(mirrors[choice]['mirrors'].keys())
113 | for mir in ava_mirrors:
114 | print(print_list[mir])
115 |
116 | option = input(
117 | '\nType # of mirror to start download or q to quit: ')
118 |
119 | if option.isnumeric() and int(option) > 0 and int(option) <= number_of_mirrors:
120 | if int(option) == 1:
121 | DownloadBook.default_mirror(
122 | mirrors[choice]['mirrors'][0], title)
123 | pass
124 | elif int(option) == 2:
125 | DownloadBook.second_mirror(
126 | mirrors[choice]['mirrors'][1], title)
127 | pass
128 | elif int(option) == 3:
129 | DownloadBook.third_mirror(
130 | mirrors[choice]['mirrors'][2], title)
131 | pass
132 | elif int(option) == 4:
133 | DownloadBook.fourth_mirror(
134 | mirrors[choice]['mirrors'][3], title)
135 | pass
136 | elif int(option) == 5:
137 | DownloadBook.fifth_mirror(
138 | mirrors[choice]['mirrors'][4], title)
139 |
140 | return(False)
141 |
142 | elif option == 'q' or option == 'Q': # Quit
143 | return(False)
144 | else:
145 | print("Not a valid option.")
146 | continue
147 |
148 | return(False)
149 |
150 | else:
151 | print("Couldn't fetch the book #{}".format(str(choice + 1)))
152 | continue
153 |
154 | elif elec == 'q' or elec == 'Q': # Quit
155 | return(False)
156 |
157 | elif not elec:
158 | if no_more_matches:
159 | print('Not a valid option')
160 | continue
161 | else:
162 | return(True)
163 |
164 | else:
165 | print('Not a valid option.')
166 |
167 |
168 | class DownloadBook():
169 | user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
170 | accept = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
171 | accept_charset = 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
172 | accept_lang = 'en-US,en;q=0.8'
173 | connection = 'keep-alive'
174 |
175 | headers = {
176 | 'User-Agent': user_agent,
177 | 'Accept': accept,
178 | 'Accept-Charset': accept_charset,
179 | 'Accept-Language': accept_lang,
180 | 'Connection': connection,
181 | }
182 |
183 | def save_book(download_link, file_name):
184 | if os.path.exists(DOWNLOAD_PATH) and os.path.isdir(DOWNLOAD_PATH):
185 | bad_chars = '\/:*?"<>|'
186 | for char in bad_chars:
187 | file_name = file_name.replace(char, " ")
188 | print('Downloading...')
189 | path = '{}/{}'.format(DOWNLOAD_PATH, file_name)
190 | request.urlretrieve(download_link, filename=path)
191 | print('Book downloaded to {}'.format(os.path.abspath(path)))
192 | elif os.path.isfile(DOWNLOAD_PATH):
193 | print('The download path is not a directory. Change it in settings.py')
194 | else:
195 | print('The download path does not exist. Change it in settings.py')
196 |
197 | def default_mirror(link, filename):
198 | '''This is the default (and first) mirror to download.
199 | The base of this mirror is http://booksdescr.org'''
200 | req = request.Request(link, headers=DownloadBook.headers)
201 | source = request.urlopen(req)
202 | soup = BeautifulSoup(source, 'lxml')
203 |
204 | for a in soup.find_all('a'):
205 | if a.text == 'Libgen':
206 | download_url = a.attrs['href']
207 | DownloadBook.save_book(download_url, filename)
208 |
209 |
210 | def second_mirror(link, filename):
211 | '''This is the second mirror to download.
212 | The base of this mirror is https://libgen.me'''
213 | req = request.Request(link, headers=DownloadBook.headers)
214 | source = request.urlopen(req)
215 | soup = BeautifulSoup(source, 'lxml')
216 | mother_url = "https://libgen.me"
217 |
218 | for a in soup.find_all('a'):
219 | if a.text == 'Get from vault':
220 | next_link = a.attrs['href']
221 | next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers)
222 | next_source = request.urlopen(next_req)
223 | next_soup = BeautifulSoup(next_source, 'lxml')
224 | for next_a in next_soup.find_all('a'):
225 | if next_a.text == 'Get':
226 | item_url = next_a.attrs['href']
227 | DownloadBook.save_book(item_url, filename)
228 |
229 | def third_mirror(link, filename):
230 | '''This is the third mirror to download.
231 | The base of this mirror is http://library1.org'''
232 | req = request.Request(link, headers=DownloadBook.headers)
233 | source = request.urlopen(req)
234 | soup = BeautifulSoup(source, 'lxml')
235 |
236 | for a in soup.find_all('a'):
237 | if a.text == 'GET':
238 | download_url = a.attrs['href']
239 | DownloadBook.save_book(download_url, filename)
240 |
241 | def fourth_mirror(link, filename):
242 | '''This is the fourth mirror to download.
243 | The base of this mirror is https://b-ok.cc'''
244 | req = request.Request(link, headers=DownloadBook.headers)
245 | source = request.urlopen(req)
246 | soup = BeautifulSoup(source, 'lxml')
247 | mother_url = "https://b-ok.cc"
248 |
249 | for a in soup.find_all('a'):
250 | if a.text == 'DOWNLOAD':
251 | next_link = a.attrs['href']
252 | next_req = request.Request(mother_url + next_link, headers=DownloadBook.headers)
253 | next_source = request.urlopen(next_req)
254 | next_soup = BeautifulSoup(next_source, 'lxml')
255 | for next_a in next_soup.find_all('a'):
256 | if ' Download ' in next_a.text:
257 | item_url = next_a.attrs['href']
258 | DownloadBook.save_book(mother_url + item_url, filename)
259 |
260 | def fifth_mirror(link, filename):
261 | '''This is the fifth mirror to download.
262 | The base of this mirror is https://bookfi.net'''
263 | req = request.Request(link, headers=DownloadBook.headers)
264 | source = request.urlopen(req)
265 | soup = BeautifulSoup(source, 'lxml')
266 |
267 | for a in soup.find_all('a'):
268 | if 'Скачать' in a.text:
269 | download_url = a.attrs['href']
270 | DownloadBook.save_book(download_url, filename)
271 |
272 |
273 |
274 | if __name__ == '__main__':
275 | parser = argparse.ArgumentParser()
276 | column = parser.add_mutually_exclusive_group()
277 | parser.add_argument('search', nargs='+', help='search term')
278 | column.add_argument('-t', '--title', action='store_true',
279 | help='get books from the specified title')
280 | column.add_argument('-a', '--author', action='store_true',
281 | help='get books from the specified author')
282 | column.add_argument('-p', '--publisher', action='store_true',
283 | help='get books from the specified publisher')
284 | column.add_argument('-y', '--year', action='store_true',
285 | help='get books from the specified year')
286 |
287 | args = parser.parse_args()
288 |
289 | search_term = ' '.join(args.search)
290 | search_arguments = [(args.title, 'title'),
291 | (args.author, 'author'),
292 | (args.publisher, 'publisher'),
293 | (args.year, 'year')]
294 |
295 | sel_column = 'def'
296 | for arg in search_arguments:
297 | if arg[0]:
298 | sel_column = arg[1]
299 |
300 | books = []
301 | mirrors = []
302 | page = 1
303 | get_next_page = True
304 |
305 | while get_next_page:
306 | if page == 1:
307 | raw_books, n_books = getSearchResults(search_term, page, sel_column)
308 | else:
309 | raw_books = getSearchResults(search_term, page, sel_column)
310 |
311 |
312 | if raw_books:
313 | new_books, new_mirrors = formatBooks(raw_books, page)
314 | books += new_books
315 | mirrors += new_mirrors
316 | get_next_page = selectBook(books, mirrors, page, n_books)
317 | page += 1
318 | elif raw_books == [] and n_books != 0: # 0 matches in the last page
319 | get_next_page = selectBook(books, mirrors, page - 1, n_books)
320 | else: # 0 matches total
321 | get_next_page = False
322 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml==4.1.0
2 | beautifulsoup4==4.4.1
3 | tabulate==0.7.5
4 |
--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
1 | DOWNLOAD_PATH = "." # Where do you want the books to be downloaded. Default is the script directory.
2 | N_AUTHORS = 1 # Maximum of authors displayed.
3 | MAX_CHARS_AUTHORS = 25 # Maximum characters displayed for the author. Change according to N_AUTHORS.
4 | MAX_CHARS_TITLE = 50 # Maximum characters displayed for the book title
5 | MAX_CHARS_PUBLISHER = 20 # Maximum characters displayed for the publisher.
6 | SHOW_MIRRORS = True # Set to True or False depending if you want the program to show the download mirrors.
7 |
--------------------------------------------------------------------------------