├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── VERSION ├── pylibgen ├── __init__.py ├── constants.py ├── pylibgen.py └── requests.py ├── setup.py └── tests └── test_pylibgen.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.3" 4 | - "3.4" 5 | - "3.5" 6 | - "3.6" 7 | branches: 8 | only: 9 | - master 10 | 11 | install: 12 | - "pip install flake8" 13 | - "python setup.py install" 14 | script: "make test" 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Mudew 4 | Copyright (c) 2017 Joshua Li 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.rst VERSION LICENSE .travis.yml 2 | recursive-include tests *.py 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: todo clean style test build register publish 2 | 3 | all: build 4 | 5 | clean: 6 | rm -rf __pycache__/ build/ dist/ *.egg-info/ .cache/ 7 | 8 | style: 9 | flake8 --exclude tests/test_*.py __init__.py 10 | 11 | test: style 12 | python3 -m pytest -s tests/test_*.py 13 | 14 | build: clean test 15 | # Builds the package into a source dist and a wheel binary, 16 | # then installs locally. 17 | python3 setup.py sdist bdist_wheel > /dev/null 18 | python3 -m pip install dist/*.whl --user --upgrade > /dev/null 19 | 20 | register: 21 | # Registers the package on PyPI. 22 | test -f ~/.pypirc 23 | python3 setup.py register -r pypi > /dev/null 24 | 25 | publish: build register 26 | # Updates the VERSION file, commits and tags that change, pushes to GitHub 27 | # to effectively trigger a tag tarball build and a TravisCI test. 28 | # Then publishes or re-registers (updates metadata) a new version of the 29 | # package to PyPI. 30 | # Must first locally pass tests, build + install successfully, 31 | # and have no uncommitted (unstaged or staged) changes in the current branch. 32 | @echo "Checking for clean branch ..." 33 | git diff --quiet 34 | git diff --quiet --cached 35 | @echo "Current version: $(shell python3 setup.py --version)" 36 | @read -p "Enter new version: " NEWVERSION && \ 37 | echo "$$NEWVERSION" > VERSION && \ 38 | git add VERSION && \ 39 | git commit -S -m "$$NEWVERSION" && \ 40 | git tag -s -a v"$$NEWVERSION" -m "$$NEWVERSION" && \ 41 | git push -u origin --follow-tags && \ 42 | python3 setup.py sdist bdist_wheel upload -r pypi 43 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pylibgen 2 | ========================== 3 | |License MIT| 4 | 5 | 6 | Python interface to Library Genesis. 7 | 8 | Currently supports the :code:`libgen.io` mirror. Will need to write custom parsers for other mirrors in the future. 9 | 10 | **This is fork of [pylibgen](https://github.com/JoshuaRLi/pylibgen) meant to work with python 2.7 without any external dependecnies (like requests)** 11 | 12 | Usage 13 | --------------------- 14 | 15 | .. code-block:: pycon 16 | 17 | >>> from pylibgen import Library 18 | >>> lg = Library() 19 | >>> ids = lg.search('automate the boring stuff', 'title') 20 | >>> ids 21 | 22 | ['1421206', '1421207', '1421208', '1351717', '1381538', '1381540', '1529338'] 23 | 24 | >>> books = lg.lookup(ids) 25 | >>> from pprint import pprint; pprint(books[0]) 26 | 27 | {'author': 'Albert Sweigart', 28 | 'edition': '', 29 | 'extension': 'epub', 30 | 'filesize': '4485769', 31 | 'identifier': '978-1593275990', 32 | 'md5': '054255117b2e86251415292ef48320fd', 33 | 'pages': '0', 34 | 'title': 'Automate the Boring Stuff with Python: Practical Programming for ' 35 | 'Total Beginners', 36 | 'year': '2015'} 37 | 38 | >>> lg.get_download_url(books[0]['md5']) 39 | 40 | 'http://libgen.io/get.php?md5=054255117b2e86251415292ef48320fd&key=NQTP585IPY102LYG' 41 | 42 | Compatibility 43 | --------------------- 44 | 45 | pylibgen is tested to work with python 3.3 - 3.6. 46 | 47 | Notes 48 | --------------------- 49 | 50 | Due to the nature of the service Library Genesis provides, its mirrors often get taken down. Feel free to submit any pull requests to update :code:`constants.MIRRORS` as time goes on! 51 | 52 | Support Library Genesis! 53 | -------------------------- 54 | 55 | :code:`Library.get_download_url` will by default parse the temporary download key from libgen's ads.php redirect page. This is necessary for a valid direct download URL since libgen uses those temp keys to get more ad revenue. 56 | 57 | If you want to support Library Genesis, I recommend passing :code:`enable_ads=True` to :code:`Library.get_download_url`. This will return the plain download URL, which shows an ad first when visited. 58 | 59 | Disclaimer 60 | --------------------- 61 | 62 | Use this at your own risk. I am not responsible or liable for any piracy, copyright infringement, or other things committed by anyone using pylibgen. Blah blah lawyer stuff, etc. 63 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.3.0 2 | -------------------------------------------------------------------------------- /pylibgen/__init__.py: -------------------------------------------------------------------------------- 1 | from .pylibgen import Library, constants 2 | -------------------------------------------------------------------------------- /pylibgen/constants.py: -------------------------------------------------------------------------------- 1 | # Mirrors may change over time. 2 | MIRRORS = [ 3 | 'libgen.io', 4 | ] 5 | 6 | DEFAULT_MIRROR = MIRRORS[0] 7 | 8 | ENDPOINTS = { 9 | 'search': 'http://{mirror}/search.php' 10 | '?req={req}&res=100&column={column}', 11 | 'lookup': 'http://{mirror}/json.php' 12 | '?ids={ids}&fields={fields}', 13 | 'download': 'http://{mirror}/get.php' 14 | '?md5={md5}', 15 | } 16 | 17 | DEFAULT_FIELDS = [ 18 | 'title', 19 | 'author', 20 | 'year', 21 | 'edition', 22 | 'pages', 23 | 'identifier', 24 | 'extension', 25 | 'filesize', 26 | 'md5', 27 | ] 28 | -------------------------------------------------------------------------------- /pylibgen/pylibgen.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import webbrowser 4 | from urllib import quote_plus 5 | from . import constants, requests 6 | 7 | 8 | class Library(object): 9 | """Library Genesis interface wrapper.""" 10 | 11 | def __init__(self, mirror=constants.DEFAULT_MIRROR): 12 | assert(mirror in constants.MIRRORS) 13 | self.mirror = mirror 14 | 15 | def __repr__(self): 16 | return ''.format(self.mirror) 17 | 18 | def __str__(self): 19 | return self.__repr__ 20 | 21 | def search(self, query, type='title'): 22 | """Searches Library Genesis. 23 | 24 | Note: 25 | For search type isbn, either ISBN 10 or 13 is accepted. 26 | 27 | Args: 28 | query (str): Search query. 29 | type (str): Query type. Can be title, author, isbn. 30 | 31 | Returns: 32 | List of LibraryGenesis book IDs that matched the query. 33 | """ 34 | assert(type in {'title', 'author', 'isbn'}) 35 | r = self.__req('search', { 36 | 'req': quote_plus(query), 37 | 'column': type, 38 | }) 39 | return re.findall("(\d+)", r.text) 40 | 41 | def lookup(self, ids, fields=constants.DEFAULT_FIELDS): 42 | """Looks up metadata on Library Genesis books. 43 | 44 | Note: 45 | To get book IDs, use search(). The default fields 46 | suffice for most use cases, but there are a LOT more 47 | like openlibraryid, publisher, etc. To get all fields, 48 | use fields=['*']. 49 | 50 | Args: 51 | ids (list): Library Genesis book IDs. 52 | fields (list): Library Genesis book properties. 53 | 54 | Returns: 55 | List of dicts each containing values for the specified 56 | fields for a Library Genesis book ID. 57 | A single dict if only one str or int id is passed in. 58 | """ 59 | # Allow for lookup of a single numeric string by auto casting 60 | # to a list for convenience. 61 | if isinstance(ids, (str, int)): 62 | ids = [str(ids)] 63 | res = self.__req('lookup', { 64 | 'ids': ','.join(ids), 65 | 'fields': ','.join(fields), 66 | }).json() 67 | if not res: 68 | # https://github.com/JoshuaRLi/pylibgen/pull/3 69 | raise requests.HTTPError(400) 70 | return res if len(res) > 1 else res[0] 71 | 72 | def get_download_url(self, md5, enable_ads=False): 73 | """Gets a direct download URL to a Library Genesis book. 74 | 75 | Note: 76 | This is actually specific only to the libgen.io mirror! 77 | Will need to be rewritten if things change. 78 | Use lookup() to obtain the MD5s for Library Genesis books. 79 | To support Library Genesis, pass True to enable_ads. 80 | See the package README for more detail. 81 | 82 | Args: 83 | md5 (str): Library Genesis unique book identifier checksum. 84 | enable_ads (bool): Toggle ad bypass via direct download key 85 | scraping. 86 | 87 | Returns: 88 | A direct download URL. 89 | """ 90 | url = self.__req('download', {'md5': md5}, urlonly=True) 91 | if enable_ads: 92 | return url 93 | r = self.__req('download', {'md5': md5}) 94 | key = re.findall("&key=(.*?)'", r.text)[0] 95 | return '{}&key={}'.format(url, key) 96 | 97 | def download(self, md5, dest='.', use_browser=False): 98 | """Downloads a Library Genesis book. 99 | 100 | Note: 101 | Libgen seems to delay programmatically sent dl requests, even 102 | if the UA string is spoofed and the URL contains a good key, 103 | so I recommend just using get_download_url. Alternatively, you 104 | can set use_browser=True, which will just open up the download 105 | URL in a new browser tab. 106 | 107 | Note that if you spam download requests, libgen will temporarily 108 | 503. Again, I recommend using get_download_url and downloading 109 | from the browser. 110 | 111 | Args: 112 | md5 (str): Library Genesis unique book identifier checksum. 113 | dest (str): Path to download directory. 114 | use_browser (bool): Use browser to download instead. 115 | """ 116 | auth_url = self.get_download_url(md5, enable_ads=False) 117 | if use_browser: 118 | webbrowser.open_new_tab(auth_url) 119 | return 120 | r = requests.get(auth_url) 121 | r.raise_for_status() 122 | with open(os.path.join(dest, md5), 'wb') as f: 123 | for chunk in r.iter_content(1024): 124 | f.write(chunk) 125 | 126 | def __req(self, endpoint, getargs, urlonly=False): 127 | url = constants.ENDPOINTS[endpoint].format( 128 | mirror=self.mirror, **getargs 129 | ) 130 | if urlonly: 131 | return url 132 | r = requests.get(url) 133 | r.raise_for_status() 134 | return r 135 | -------------------------------------------------------------------------------- /pylibgen/requests.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import codecs 3 | import json 4 | import httplib 5 | 6 | # A monkey patch for the requests library. 7 | 8 | bytes = str 9 | str = unicode 10 | 11 | CONTENT_CHUNK_SIZE = 10 * 1024 12 | 13 | # Null bytes; no need to recreate these on each call to guess_json_utf 14 | _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3 15 | _null2 = _null * 2 16 | _null3 = _null * 3 17 | 18 | def guess_json_utf(data): 19 | """ 20 | :rtype: str 21 | """ 22 | # JSON always starts with two ASCII characters, so detection is as 23 | # easy as counting the nulls and from their location and count 24 | # determine the encoding. Also detect a BOM, if present. 25 | sample = data[:4] 26 | if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE): 27 | return 'utf-32' # BOM included 28 | if sample[:3] == codecs.BOM_UTF8: 29 | return 'utf-8-sig' # BOM included, MS style (discouraged) 30 | if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): 31 | return 'utf-16' # BOM included 32 | nullcount = sample.count(_null) 33 | if nullcount == 0: 34 | return 'utf-8' 35 | if nullcount == 2: 36 | if sample[::2] == _null2: # 1st and 3rd are null 37 | return 'utf-16-be' 38 | if sample[1::2] == _null2: # 2nd and 4th are null 39 | return 'utf-16-le' 40 | # Did not detect 2 valid UTF-16 ascii-range characters 41 | if nullcount == 3: 42 | if sample[:3] == _null3: 43 | return 'utf-32-be' 44 | if sample[1:] == _null3: 45 | return 'utf-32-le' 46 | # Did not detect a valid UTF-32 ascii-range character 47 | return None 48 | 49 | def patch(obj, method): 50 | name = method.__name__ 51 | setattr(obj, name, types.MethodType(method, obj)) 52 | 53 | def iter_slices(string, slice_length): 54 | """Iterate over slices of a string.""" 55 | pos = 0 56 | if slice_length is None or slice_length <= 0: 57 | slice_length = len(string) 58 | while pos < len(string): 59 | yield string[pos:pos + slice_length] 60 | pos += slice_length 61 | 62 | class HTTPError(urllib2.HTTPError): 63 | def __init__(self, code, url="", msg=None): 64 | urllib2.HTTPError.__init__(self, url, code=code, 65 | msg=httplib.responses.get(code, "") if msg is None else msg, 66 | hdrs=None, fp=None) 67 | 68 | class RequestWrapper: 69 | def __init__(self, resp): 70 | self.resp = resp 71 | self.encoding = self.resp.headers.getparam('charset') 72 | self.status_code = self.resp.getcode() 73 | self._content = False 74 | self._content_consumed = False 75 | 76 | def raise_for_status(self): 77 | pass 78 | 79 | def iter_content(self, chunk_size=1): 80 | 81 | def generate(): 82 | # Standard file-like object. 83 | while True: 84 | chunk = self.resp.read(chunk_size) 85 | if not chunk: 86 | break 87 | yield chunk 88 | 89 | self._content_consumed = True 90 | 91 | if self._content_consumed and isinstance(self._content, bool): 92 | raise StreamConsumedError() 93 | elif chunk_size is not None and not isinstance(chunk_size, int): 94 | raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) 95 | 96 | # simulate reading small chunks of the content 97 | reused_chunks = iter_slices(self._content, chunk_size) 98 | 99 | stream_chunks = generate() 100 | 101 | chunks = reused_chunks if self._content_consumed else stream_chunks 102 | 103 | return chunks 104 | 105 | 106 | 107 | @property 108 | def content(self): 109 | """Content of the response, in bytes.""" 110 | if self._content is False: 111 | self._content = bytes().join(self.iter_content(CONTENT_CHUNK_SIZE)) or bytes() 112 | 113 | self._content_consumed = True 114 | return self._content 115 | 116 | @property 117 | def text(self): 118 | if not self.content: 119 | return str('') 120 | 121 | try: 122 | content = str(self.content, self.encoding, errors='replace') 123 | except (LookupError, TypeError): 124 | # A LookupError is raised if the encoding was not found which could 125 | # indicate a misspelling or similar mistake. 126 | # 127 | # A TypeError can be raised if encoding is None 128 | # 129 | # So we try blindly encoding. 130 | content = str(self.content, errors='replace') 131 | 132 | return content 133 | 134 | def json(self, **kwargs): 135 | r"""Returns the json-encoded content of a response, if any. 136 | :param \*\*kwargs: Optional arguments that ``json.loads`` takes. 137 | :raises ValueError: If the response body does not contain valid json. 138 | """ 139 | 140 | if not self.encoding and self.content and len(self.content) > 3: 141 | # No encoding set. JSON RFC 4627 section 3 states we should expect 142 | # UTF-8, -16 or -32. Detect which one to use; If the detection or 143 | # decoding fails, fall back to `self.text` (using chardet to make 144 | # a best guess). 145 | encoding = guess_json_utf(self.content) 146 | if encoding is not None: 147 | try: 148 | return json.loads( 149 | self.content.decode(encoding), **kwargs 150 | ) 151 | except UnicodeDecodeError: 152 | # Wrong UTF codec detected; usually because it's not UTF-8 153 | # but some other 8-bit codec. This is an RFC violation, 154 | # and the server didn't bother to tell us what codec *was* 155 | # used. 156 | pass 157 | return json.loads(self.text, **kwargs) 158 | 159 | 160 | def get(url): 161 | r = urllib2.urlopen(url) 162 | return RequestWrapper(r) 163 | 164 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | AUTHOR_GITHUB = 'JoshuaRLi' 5 | SETUP_BASE = { 6 | 'name': 'pylibgen', 7 | 'description': 'Programmatic Python interface for Library Genesis.', 8 | 'license': 'MIT', 9 | 'author': 'Joshua Li', 10 | 'author_email': 'joshua.r.li.98@gmail.com', 11 | 'keywords': [ 12 | 'libgen', 13 | 'library', 14 | 'genesis', 15 | 'search', 16 | 'download', 17 | 'books', 18 | 'ebooks', 19 | 'textbooks', 20 | ], 21 | 'packages': find_packages(exclude=['tests']), 22 | # 'entry_points': { 23 | # 'console_scripts': [ 24 | # 'executable_name=package:module:main', 25 | # ], 26 | # }, 27 | 'install_requires': [ 28 | 'requests', 29 | ], 30 | 'classifiers': [ 31 | 'Development Status :: 4 - Beta', 32 | 'Intended Audience :: Developers', 33 | 'Topic :: Software Development', 34 | 'Natural Language :: English', 35 | 'License :: OSI Approved :: MIT License', 36 | 'Programming Language :: Python', 37 | 'Programming Language :: Python :: 3', 38 | 'Programming Language :: Python :: 3.3', 39 | 'Programming Language :: Python :: 3.4', 40 | 'Programming Language :: Python :: 3.5', 41 | 'Programming Language :: Python :: 3.6', 42 | ], 43 | } 44 | 45 | here = os.path.abspath(os.path.dirname(__file__)) 46 | 47 | with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: 48 | SETUP_BASE['long_description'] = f.read().strip() 49 | 50 | with open(os.path.join(here, 'VERSION'), 'r') as f: 51 | SETUP_BASE['version'] = f.read().strip() 52 | 53 | setup( 54 | url='https://github.com/{0}/{name}'.format( 55 | AUTHOR_GITHUB, **SETUP_BASE 56 | ), 57 | download_url='https://github.com/{0}/{name}/tarball/v{version}'.format( 58 | AUTHOR_GITHUB, **SETUP_BASE 59 | ), 60 | **SETUP_BASE 61 | ) 62 | -------------------------------------------------------------------------------- /tests/test_pylibgen.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from pylibgen import Library, constants 3 | 4 | # Ensure that API endpoints are working and returning the 5 | # expected responses for all mirrors. 6 | def test_api_endpoints(): 7 | for mirror in constants.MIRRORS: 8 | lg = Library(mirror) 9 | ids = lg.search('automate the boring stuff', 'title') 10 | assert isinstance(ids, list) 11 | assert set(ids) == set([ 12 | '1421206', '1421207', '1421208', '1351717', 13 | '1381538', '1381540', '1529338', '2149756' 14 | ]) 15 | 16 | books = lg.lookup(ids) 17 | assert isinstance(books, list) 18 | assert isinstance(books[0], dict) 19 | assert {book['md5'].lower() for book in books} == { 20 | 'd826b3e593b12422784f50d59c97a966', 21 | 'b34564156c3778261ed03167b09f6694', 22 | '4e0efdd614737fd66408fd43a9d5ff10', 23 | '5a64e12e79af379110a31ea04bb6320c', 24 | 'c157d6ec28d1a7c4b528f4e6a1ea4c9e', 25 | '054255117b2e86251415292ef48320fd', 26 | '1af2c71c1342e850e1e47013b06f9eb9', 27 | '2699081bc2e3908ece25013109941028', 28 | } 29 | 30 | book = lg.lookup(1421206) 31 | assert isinstance(book, dict) 32 | assert book['md5'] == '1af2c71c1342e850e1e47013b06f9eb9' 33 | 34 | print lg.get_download_url(books[0]['md5']) 35 | 36 | test_api_endpoints() 37 | --------------------------------------------------------------------------------