├── .gitignore ├── Dockerfile ├── README.md ├── docker-compose.yml ├── downloader.py ├── main.py ├── requirements.txt └── website_actions ├── __init__.py ├── abstract_website_actions.py ├── bookwalker_jp_actions.py ├── bookwalker_tw_actions.py ├── cmoa_jp_actions.py ├── coma_jp_novel.py └── takeshobo_co_jp_actions.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8 2 | 3 | RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - 4 | RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list' 5 | 6 | # Updating apt to see and install Google Chrome 7 | RUN apt-get -y update 8 | 9 | # Magic happens 10 | RUN apt-get install -y google-chrome-stable 11 | 12 | # Installing Unzip 13 | RUN apt-get install -yqq unzip 14 | 15 | # Download the Chrome Driver 16 | RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/ \ 17 | && curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE \ 18 | | xargs -I{} wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/{}/chromedriver_linux64.zip \ 19 | && unzip /tmp/chromedriver.zip -d /usr/local/bin/ \ 20 | && rm /tmp/chromedriver.zip 21 | 22 | # Set display port as an environment variable 23 | ENV DISPLAY=:99 24 | 25 | WORKDIR /usr/src/app 26 | 27 | COPY . /app 28 | WORKDIR /app 29 | 30 | RUN pip install --upgrade pip 31 | 32 | RUN pip install -r requirements.txt 33 | 34 | CMD python main.py 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NEW METHOD FOR BW TO TRY! 2 | 3 | # New Version 4 | ## v0.3.3 (Update Recommended) 5 | - Update to Chromium 112.0.5590.0. 6 | - Better support for BW books page number pattern, support novels. 7 | 8 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.3) or here: [Windows x64 release build v0.3.3](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.3/BW-downloader-chrome-v0.3.3.7z) 9 | 10 | ## v0.3.2 11 | This version has some good features below for BW: 12 | - Could download the cover (If the cover image is jpeg, please check it or better convert it to png before you share it, because the jpeg file will contain your BW account info). 13 | - Could name the image automictically using the page number, you can start at any page you want and go forward or go back! 14 | - Could name the folder with the BW uuid but not a random one anymore. 15 | - No more blank or repeating page will be skipped, no more image hash to check repeating page, better performance. 16 | 17 | Example screenshot: 18 | ![1670681578(1)](https://user-images.githubusercontent.com/29002064/206859972-0c775ee2-02fd-4d62-8870-4cd262fc6116.jpg) 19 | 20 | If you find the file name all become "cover_or_extra_xxx" when downloading some manga, please file a bug, there may be more URL patterns in BW than I have seen or they changed the pattern, it should be covered to make the page number working correctly. 21 | 22 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.2) or here: [Windows x64 release build v0.3.2](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.2/BW-downloader-chrome-v0.3.2.7z) 23 | 24 | ## v0.3.1 25 | This version has improved the performance about saving snapshot, if you have some problems that the browser become very slow during downloading, please try the new version. 26 | 27 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.1) or here: [Windows x64 release build v0.3.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.1/BW-downloader-chrome-v0.3.1.7z) 28 | 29 | ## v0.3 30 | Fixed the problem that some manga has width less than 800px could not be downloaded, see [#113](/../../issues/113). 31 | 32 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3) or here: [Windows x64 release build v0.3](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3/BW-downloader-chrome-v0.3.7z) 33 | 34 | ## v0.2.1 35 | Pump Chromium to 109.0.5393, may fix some problems. 36 | 37 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.2.1) or here: [Windows x64 release build v0.2.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.2.1/BW-downloader-chrome-v0.2.1.7z) 38 | 39 | ## v0.2 40 | This version is based on Chromium 106.0.5243.0, the changes are below: 41 | - Support `https://ebook.tongli.com.tw`, will save the downloaded images in the `C:\bw_export_data\TONGLI_URL_STRING` 42 | - Support `https://www.dlsite.com`, but this is saving the cache images, so the final 3~4 pages should be downloaded as below (for example we have 10 pages): 43 | - Go through page 1 to 10 (Make sure the current page is full loaded when you go to next page). 44 | - You will find that at page 10, there are maybe only images for page 1-7. 45 | - Go back from page 10 to page 5, you will find that the final pages are saved. (but maybe in reverse order) 46 | - Currently we could not do anything better than this. 47 | - Works for `https://book.dmm.com`, use the script below to move page: 48 | ```js 49 | window.i=0;setInterval(()=>{NFBR.a6G.Initializer.views_.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000) 50 | ``` 51 | The script above is for **DMM**, for **BW** please use the script below: 52 | ```js 53 | window.i=0;setInterval(()=>{NFBR.a6G.Initializer.L7v.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000) 54 | ``` 55 | - Maybe slitely faster for BW and may download some images that width > height. 56 | 57 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.2) or here: [Windows x64 release build v0.2](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.2/BW-downloader-chrome-v0.2.7z) 58 | 59 | How to use (Same as the old version): 60 | 1. Unzip the file `BW-downloader-chrome-bin.zip`. 61 | 2. Open a `powsershell` or `cmd`, `cd` to the unzipped browser dir. 62 | 3. Open the browser with command line `.\chrome.exe --user-data-dir=c:\bw-downloader-profile --no-sandbox` 63 | 4. Browser the manga, manga will be saved to `C:\bw_export_data` 64 | 65 | **Do not use it for other website, only use it as a Managa downloader, it is not as safe as normal chrome browser!** 66 | 67 | # Old Version 68 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.1) or here: [Windows x64 release build v0.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.1/BW-downloader-chrome-v0.1.7z) 69 | 70 | **If you are finding something to download BW, please try this method for BW, it's really a good thing to try, you will like it!** 71 | 72 | **For coma, please see below.** 73 | 74 | Now have a new method, with a customized `chromium` browser, it can download BW original image with it's original size very easily. It can download both manga and novel, and may be used to every website that use canvas to render the page (now only tested on BW). 75 | 76 | It is only a dev version, may have bugs and may crash, but you can download the customized browser and try it now. 77 | 78 | **Do not use it for other website, only use it as a BW downloader, it is not as safe as normal chrome browser!** 79 | 80 | Clone this repo or only download the [BW-downloader-chrome-bin.zip](https://github.com/xuzhengyi1995/Manga_downloader/raw/master/BW-downloader-chrome-bin.zip) 81 | 82 | 1. Unzip the file `BW-downloader-chrome-bin.zip`. 83 | 2. Open a `powsershell` or `cmd`, `cd` to the unzipped browser dir. 84 | 3. Open the browser with command line `.\chrome.exe --user-data-dir=c:\bw-downloader-profile --no-sandbox` 85 | 4. Adjust your browser window size, make it smaller and can only display one manga page, example below 86 | ![image](https://user-images.githubusercontent.com/29002064/139255318-95531cd9-c442-4a61-acb4-cef3d71b7190.png) 87 | 88 | 89 | 5. You can now go to BW website, log in and open the manga you want to download, remember to reset the manga's read status before you open it. 90 | 6. Press `F12`, make it a separate window (see the image below) and run the script below (just go to `console` and copy-past the code, press enter) to move the page automatically, if your network is good, you can change the `3000` to a smaller number, 3000 means 3000ms -> 3s, every 3s it will move to the next page. 91 | 92 | You can also manually click the mouse left button / use a keyboard arrow key / use a keyboard simulation software to move the page, you can choose the way you like, just make sure the page is moving. 93 | ```js 94 | window.i=0;setInterval(()=>{NFBR.a6G.Initializer.L7v.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000) 95 | ``` 96 | 97 | If this not work and show 'Uncaught TypeError: Cannot read properties of undefined (reading 'menu') at :1:54', it means BW has updated the js, you can try to find it in the console, just try `NFBR.a6G.Initializer.*.menu` is not `undefined` and the * is the new object name; Or you can just file a bug. 98 | 99 | ![image](https://user-images.githubusercontent.com/29002064/138590508-e7555a2d-1528-4e59-8a50-e08e407bc1be.png) 100 | 101 | 102 | 7. Now you can check your `C:\bw_export_data`, you can find a random uuid folder with all the manga images in it. 103 | ![image](https://user-images.githubusercontent.com/29002064/139255390-03b9191a-e90b-4572-9cde-b7e50ca9787c.png) 104 | 105 | If you want to download multiple manga at the same time, just open as many manga as you want, and do the step 5 to 6. 106 | 107 | This method is very easy to use, stable and no need to find any resolution or cookies, and it can download the real original image with no barcode. 108 | 109 | Maybe will add a new browser ui to it and can click to download in the future. 110 | 111 | May not download the cover page now. 112 | 113 | Only built on windows, no program now for other platform. 114 | 115 | If you find some problem, please file a bug, thank you! 116 | 117 | # Manga_Downloader 118 | 119 | A Manga download framework using `selenium`. 120 | 121 | **Now support the websites below:** 122 | 123 | 1. [Bookwalker.jp](https://bookwalker.jp) 124 | 2. [Bookwalker.com.tw](https://www.bookwalker.com.tw) 125 | 3. [Cmoa.jp](https://www.cmoa.jp/) 126 | 127 | **Program will check the website of given URL automaticity** 128 | 129 | **If the website you given is unsupported, the program will raise an error.** 130 | 131 | **Now support multi manga download with only login one time** 132 | 133 | **现在支持批量下载** 134 | 135 | **you should prepare the information below:** 136 | 137 | 1. Manga URL 138 | 2. Cookies 139 | 3. Image dir (Where to put the image, folder name) 140 | 4. Some website you should see the size of image and set it at `res`. [Cmoa.jp](https://www.cmoa.jp/) doesn't need this. 141 | 142 | # How to Use 143 | 144 | ## All settings 145 | 146 | All the settings are in `main.py`. 147 | 148 | ```python 149 | settings = { 150 | # Manga urls, should be the same website 151 | 'manga_url': [ 152 | 'URL_1', 153 | 'URL_2' 154 | ], 155 | # Your cookies 156 | 'cookies': 'YOUR_COOKIES_HERE', 157 | # Folder names to store the Manga, the same order with manga_url 158 | 'imgdir': [ 159 | 'IMGDIR_FOR_URL_1', 160 | 'IMGDIR_FOR_URL_2' 161 | ], 162 | # Resolution, (Width, Height), For cmoa.jp this doesn't matter. 163 | 'res': (1393, 2048), 164 | # Sleep time for each page (Second), normally no need to change. 165 | 'sleep_time': 2, 166 | # Time wait for page loading (Second), if your network is good, you can reduce this parameter. 167 | 'loading_wait_time': 20, 168 | # Cut image, (left, upper, right, lower) in pixel, None means do not cut the image. This often used to cut the edge. 169 | # Like (0, 0, 0, 3) means cut 3 pixel from bottom of the image. 170 | 'cut_image': None, 171 | # File name prefix, if you want your file name like 'klk_v1_001.jpg', write 'klk_v1' here. 172 | 'file_name_prefix': '', 173 | # File name digits count, if you want your file name like '001.jpg', write 3 here. 174 | 'number_of_digits': 3, 175 | # Start page, if you want to download from page 3, set this to 3, None means from 0 176 | 'start_page': None, 177 | # End page, if you want to download until page 10, set this to 10, None means until finished 178 | 'end_page': None, 179 | } 180 | ``` 181 | 182 | ## Install environment & How to Get URL/Cookies 183 | 184 | **This program now work for Chrome, if you use another browser, please check [this page](https://selenium-python.readthedocs.io/installation.html)** 185 | 186 | 0. Install python packages _selenium_ and _pillow_ and get the _Google chrome Drivers_. 187 | 188 | 1. For _selenium_ ad _pillow_: 189 | 190 | ```shell 191 | pip install selenium 192 | pip install Pillow 193 | # This undetected_chromedriver is prevent us from been detected by BW 194 | pip install undetected_chromedriver 195 | ``` 196 | 197 | 2. For Google chrome Drivers: 198 | 199 | 1. Please check your Chrome version, 'Help'->'About Google Chrome'. 200 | 201 | 2. Download Chrome Driver fit to your Chrome version [here](https://sites.google.com/a/chromium.org/chromedriver/downloads). 202 | 203 | 3. Put it into any folder and add the folder into the PATH. 204 | 205 | 3. For more info, I suggest you to check it [here](https://selenium-python.readthedocs.io/installation.html) 206 | 207 | 208 | 1. Change the `IMGDIR` in the main.py to indicate where to put the manga. 209 | 210 | 2. Add your cookies in the program. 211 | 212 | **Remember to use F12 to see the cookies!** 213 | 214 | **Because some http only cookies can not be seen by javascript!** 215 | 216 | **Remember to go to the links below to get the cookies!** 217 | 218 | 1. For [Bookwalker.jp] cookies, go [here](https://member.bookwalker.jp/app/03/my/profile). 219 | 2. For [Bookwalker.com.tw] cookies, go [here](https://www.bookwalker.com.tw/member). 220 | 3. For [www.cmoa.jp] cookies, go [here](https://www.cmoa.jp/) and you **must** get cookies by plug-in [EditThisCookie](http://www.editthiscookie.com/), download it for chrome [here](https://chrome.google.com/webstore/detail/edit-this-cookie/fngmhnnpilhplaeedifhccceomclgfbg). 221 | 222 | - For `EditThisCookie`, this can be used in any website above, but for `cmoa` you **must** use this method 223 | 224 | 1. Go to user preferences (chrome-extension://fngmhnnpilhplaeedifhccceomclgfbg/options_pages/user_preferences.html) of `EditThisCookie` 225 | 2. Set the cookie export format to `Semicolon separated name=value pairs` 226 | 3. Go to [cmoa](https://www.cmoa.jp/), click the `EditThisCookie` and click `export` button 227 | 4. Copy the cookies in the file (**After the `// Example: http://www.tutorialspoint.com/javascript/javascript_cookies.htm`**) into the program 228 | 229 | - For the traditional way 230 | 231 | > 1. Open the page. 232 | > 2. Press F12. 233 | > 3. Click on the _Network_. 234 | > 4. Refresh the page. 235 | > 5. Find the first _profile_ request, click it. 236 | > 6. On the right, there will be a _Request Headers_, go there. 237 | > 7. Find the _cookie:...._, copy the string after the _cookie:_, paste to the _main.py_, _YOUR_COOKIES_HERE_ 238 | 239 | 3. Change the _manga_url_ in the _main.py_. 240 | 241 | 1. For [Bookwalker.jp] 242 | 243 | First go to [購入済み書籍一覧](https://bookwalker.jp/holdBooks/), you can find all your mangas here. 244 | 245 | This time the URL is the URL of **'この本を読む'** button for your manga. 246 | 247 | Right click this button, and click **'Copy link address'**. 248 | 249 | The URL is start with **member.bookwalker.jp**, not the **viewer.bookwalker.jp**. Here we use the manga [【期間限定 無料お試し版】あつまれ!ふしぎ研究部 1](https://member.bookwalker.jp/app/03/webstore/cooperation?r=BROWSER_VIEWER/640c0ddd-896c-4881-945f-ad5ce9a070a6/https%3A%2F%2Fbookwalker.jp%2FholdBooks%2F). 250 | 251 | This is the URL of the **あつまれ!ふしぎ研究部 1**: 252 | 253 | 2. For [Bookwalker.com.tw] 254 | 255 | Please go to [线上阅读](https://www.bookwalker.com.tw/member/available_book_list). 256 | 257 | The manga URL like this: 258 | 259 | 3. For [Cmoa.jp] 260 | 261 | Open the Manga and just copy the URL on the browser. 262 | 263 | The manga URL like this : 264 | 265 | Just copy this URL to the `MANGA_URL` in _main.py_. 266 | 267 | 4. After edit the program, run `python main.py` to run it. 268 | 269 | # Notice 270 | 271 | 1. The `SLEEP_TIME` by default is 2 seconds, you can adjust it with your own network situation, if the downloading has repeated images, you can change it to 5 or more. If you think it's too slow, try change it to 1 or even 0.5. 272 | 273 | 2. `LOADING_WAIT_TIME = 20`, this is the time to wait until the manga viewer page loaded, if your network is not good, you can set it to 30 or 50 seconds. 274 | 275 | 3. Resolution, you can change it as you want, but check the original image resolution first. 276 | 277 | ```python 278 | RES = (784, 1200) 279 | ``` 280 | 281 | If the original image has a higher resolution, you can change it like this (The resolution is just a example). 282 | 283 | ```python 284 | RES = (1568, 2400) 285 | ``` 286 | 287 | **For [Cmoa.jp] no need this, the resolution is fixed by [Cmoa.jp].** 288 | 289 | 4. Some time we should log out and log in, this website is very strict and take so many method to prevent abuse. 290 | 291 | 5. Now you can cut the image by setting `CUT_IMAGE` to (left, upper, right, lower). 292 | 293 | For example you want to cut 3px from the bottom of image, you can set it to: 294 | 295 | ```python 296 | CUT_IMAGE = (0, 0, 0, 3) 297 | ``` 298 | 299 | This function use `Pillow`, if you want to use it, you should install it by using the command: 300 | 301 | ```shell 302 | pip install Pillow 303 | ``` 304 | 305 | By default it is `None`, means do not cut the image. 306 | 307 | 6. You can now change the file name prefix and number of digits by changing `file_name_prefix` and `number_of_digits`. 308 | 309 | For example, if you are downloading Kill La Kill Manga Volume 1, and you want the file name like: 310 | 311 |
312 |         KLK_V1
313 |         │--KLK_V1_001.jpg
314 |         │--KLK_V1_002.jpg
315 |         │--KLK_V1_003.jpg
316 |     
317 | 318 | Then you can set the parameters like below: 319 | 320 | ```python 321 | settings = { 322 | ..., 323 | 'file_name_prefix': 'KLK_V1', 324 | # File name digits count, if you want your file name like '001.jpg', write 3 here. 325 | 'number_of_digits': 3 326 | } 327 | ``` 328 | 329 | # Develop 330 | 331 | 0. Concept 332 | 333 | To download Manga, normally we do like this: 334 | 335 |
336 |     +------------+     +-----------+      +------------+      +-------------------+      +--------------+
337 |     |            |     |           |      |            |      |                   | OVER |              |
338 |     |   Login    +-----+ Load page +----->+ Save image +----->+ Move to next page +----->+   Finished   |
339 |     |            |     |           |      |            |      |                   |      |              |
340 |     +------------+     +-----------+      +-----+------+      +---------+---------+      +--------------+
341 |                                                 ^                       |
342 |                                                 |                       |
343 |                                                 |      More page        |
344 |                                                 +-----------------------+
345 |     
346 | 347 | So we can create a framework to reuse the code, for new website, normally we only need to write some of the method. 348 | 349 | 1. Structure of file 350 | 351 |
352 |     |--main.py
353 |     │--downloader.py
354 |     │--README.MD
355 |     └─website_actions
356 |         │--abstract_website_actions.py
357 |         │--bookwalker_jp_actions.py
358 |         │--bookwalker_tw_actions.py
359 |         │--cmoa_jp_actions.py
360 |         │--__init__.py
361 |     
362 | 363 | 2. Introduction to abstract `WebsiteActions` class. 364 | 365 | For each website, the class should have the following methods/attributes, here we use bookwalker.jp as example: 366 | 367 | ```python 368 | class BookwalkerJP(WebsiteActions): 369 | ''' 370 | bookwalker.jp 371 | ''' 372 | 373 | # login_url is the page that we load first and put the cookies. 374 | login_url = 'https://member.bookwalker.jp/app/03/login' 375 | 376 | @staticmethod 377 | def check_url(manga_url): 378 | ''' 379 | This method return a bool, check if the given manga url is belong to this class. 380 | ''' 381 | return manga_url.find('bookwalker.jp') != -1 382 | 383 | def get_sum_page_count(self, driver): 384 | ''' 385 | This method return an integer, get total page number. 386 | ''' 387 | return int(str(driver.find_element_by_id('pageSliderCounter').text).split('/')[1]) 388 | 389 | def move_to_page(self, driver, page): 390 | ''' 391 | This method return nothing, move to given page number. 392 | ''' 393 | driver.execute_script( 394 | 'NFBR.a6G.Initializer.B0U.menu.a6l.moveToPage(%d)' % page) 395 | 396 | def wait_loading(self, driver): 397 | ''' 398 | This method return nothing, wait manga loading. 399 | ''' 400 | WebDriverWait(driver, 30).until_not(lambda x: self.check_is_loading( 401 | x.find_elements_by_css_selector(".loading"))) 402 | 403 | def get_imgdata(self, driver, now_page): 404 | ''' 405 | This method return String/something can be written to file or convert to BytesIO, get image data. 406 | ''' 407 | canvas = driver.find_element_by_css_selector(".currentScreen canvas") 408 | img_base64 = driver.execute_script( 409 | "return arguments[0].toDataURL('image/jpeg').substring(22);", canvas) 410 | return base64.b64decode(img_base64) 411 | 412 | def get_now_page(self, driver): 413 | ''' 414 | This method return an integer, the page number on the current page 415 | ''' 416 | return int(str(driver.find_element_by_id('pageSliderCounter').text).split('/')[0]) 417 | ``` 418 | 419 | We also have a `before_download` method, this method run before we start download, because some website need to close some pop-up component before we start downloading. 420 | 421 | ```python 422 | def before_download(self, driver): 423 | ''' 424 | This method return nothing, Run before download. 425 | ''' 426 | driver.execute_script('parent.closeTips()') 427 | ``` 428 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | services: 3 | python: 4 | container_name: mangadw 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | volumes: 9 | - .:/app -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Main downloader, XU Zhengyi, 2020/05/05 3 | ''' 4 | import base64 5 | import logging 6 | import os 7 | import random 8 | import time 9 | from io import BytesIO 10 | from PIL import ImageOps 11 | 12 | import PIL.Image as pil_image 13 | import undetected_chromedriver as uc 14 | from selenium.webdriver.support.ui import WebDriverWait 15 | 16 | # DO NOT REMOVE THIS LINE. Used for __subclasses__() 17 | from website_actions import * 18 | from website_actions.abstract_website_actions import WebsiteActions 19 | 20 | logging.basicConfig(format='[%(levelname)s](%(name)s) %(asctime)s : %(message)s', 21 | datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) 22 | 23 | 24 | def get_cookie_dict(cookies): 25 | cookies_split = cookies.split('; ') 26 | if len(cookies_split) == 1: 27 | cookies_split = cookies.split(';') 28 | cookies_dict = {} 29 | for i in cookies_split: 30 | if i == '': 31 | continue 32 | kv = i.split('=') 33 | cookies_dict[kv[0]] = '='.join(kv[1:]) 34 | return cookies_dict 35 | 36 | 37 | def add_cookies(driver, cookies): 38 | for i in cookies: 39 | driver.add_cookie({'name': i, 'value': cookies[i]}) 40 | 41 | 42 | class Downloader: 43 | ''' 44 | Main download class 45 | ''' 46 | 47 | def __init__( 48 | self, manga_url, cookies, imgdir, res, sleep_time=2, loading_wait_time=20, 49 | cut_image=None, file_name_prefix='', number_of_digits=3, start_page=None, 50 | end_page=None 51 | ): 52 | self.manga_url = manga_url 53 | self.cookies = get_cookie_dict(cookies) 54 | self.imgdir = imgdir 55 | self.res = res 56 | self.sleep_time = sleep_time 57 | self.loading_wait_time = loading_wait_time 58 | self.cut_image = cut_image 59 | self.file_name_model = '/' 60 | if len(file_name_prefix) != 0: 61 | self.file_name_model += file_name_prefix + '_' 62 | 63 | self.file_name_model += '%%0%dd.png' % number_of_digits 64 | self.start_page = start_page - 1 if start_page and start_page > 0 else 0 65 | self.end_page = end_page 66 | self.image_box = None 67 | 68 | self.init_function() 69 | 70 | def check_implementation(self, this_manga_url): 71 | is_implemented_website = False 72 | for temp_actions_class in WebsiteActions.__subclasses__(): 73 | if temp_actions_class.check_url(this_manga_url): 74 | is_implemented_website = True 75 | self.actions_class = temp_actions_class() 76 | logging.info('Find action class, use %s class.', 77 | self.actions_class.get_class_name()) 78 | break 79 | 80 | if not is_implemented_website: 81 | logging.error('This website has not been added...') 82 | raise NotImplementedError 83 | 84 | def str_to_data_uri(self, str): 85 | return ("data:text/plain;charset=utf-8;base64,%s" % 86 | base64.b64encode(bytes(str, 'utf-8')).decode('ascii')) 87 | 88 | def get_driver(self): 89 | option = uc.ChromeOptions() 90 | option.set_capability('unhandledPromptBehavior', 'accept') 91 | option.add_argument('--high-dpi-support=1') 92 | option.add_argument('--device-scale-factor=1') 93 | option.add_argument('--force-device-scale-factor=1') 94 | option.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36') 95 | option.add_argument("--app=%s" % self.str_to_data_uri('Manga_downloader')) 96 | option.add_argument('--headless') 97 | self.driver = uc.Chrome(options=option) 98 | self.driver.set_window_size(self.res[0], self.res[1]) 99 | viewport_dimensions = self.driver.execute_script("return [window.innerWidth, window.innerHeight];") 100 | logging.info('Viewport dimensions %s', viewport_dimensions) 101 | self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { 102 | "source": """ 103 | Object.defineProperty(navigator, 'webdriver', { 104 | get: () => false 105 | }) 106 | window.navigator.chrome = undefined; 107 | Object.defineProperty(navigator, 'languages', { 108 | get: () => ['en-US', 'en'], 109 | }); 110 | Object.defineProperty(navigator, 'plugins', { 111 | get: () => [1, 2, 3, 4, 5], 112 | }); 113 | const originalQuery = window.navigator.permissions.query; 114 | window.navigator.permissions.query = (parameters) => ( 115 | parameters.name === 'notifications' ? 116 | Promise.resolve({ state: Notification.permission }) : 117 | originalQuery(parameters) 118 | ); 119 | """ 120 | }) 121 | 122 | def init_function(self): 123 | if self.cut_image is not None and self.cut_image != 'dynamic': 124 | self.left, self.upper, self.right, self.lower = self.cut_image 125 | self.get_driver() 126 | random.seed() 127 | 128 | def login(self): 129 | logging.info('Login...') 130 | driver = self.driver 131 | driver.get(self.actions_class.login_url) 132 | driver.delete_all_cookies() 133 | add_cookies(driver, self.cookies) 134 | logging.info('Login finished...') 135 | 136 | def prepare_download(self, this_image_dir, this_manga_url): 137 | if not os.path.isdir(this_image_dir): 138 | os.mkdir(this_image_dir) 139 | logging.info('Loading Book page...') 140 | driver = self.driver 141 | driver.get(this_manga_url) 142 | logging.info('Book page Loaded...') 143 | logging.info('Preparing for downloading...') 144 | time.sleep(self.loading_wait_time) 145 | 146 | def download_book(self, this_image_dir): 147 | driver = self.driver 148 | logging.info('Run before downloading...') 149 | self.actions_class.before_download(driver) 150 | logging.info('Start download...') 151 | try: 152 | page_count = self.actions_class.get_sum_page_count(driver) 153 | logging.info('Has %d pages.', page_count) 154 | end_page = page_count 155 | if self.end_page and self.end_page <= page_count: 156 | end_page = self.end_page 157 | self.actions_class.move_to_page(driver, self.start_page) 158 | 159 | time.sleep(self.sleep_time) 160 | 161 | for i in range(self.start_page, end_page): 162 | self.actions_class.wait_loading(driver) 163 | image_data = self.actions_class.get_imgdata(driver, i + 1) 164 | with open(this_image_dir + self.file_name_model % i, 'wb') as img_file: 165 | if self.cut_image is None: 166 | img_file.write(image_data) 167 | elif self.cut_image == "dynamic": 168 | org_img = pil_image.open(BytesIO(image_data)) 169 | if self.image_box is None: 170 | org_img.load() 171 | invert_im = org_img.convert("RGB") 172 | invert_im = ImageOps.invert(invert_im) 173 | self.image_box = invert_im.getbbox() 174 | org_img.crop(self.image_box).save(img_file, format='PNG') 175 | else: 176 | org_img = pil_image.open(BytesIO(image_data)) 177 | width, height = org_img.size 178 | org_img.crop( 179 | (self.left, self.upper, width - self.right, height - self.lower)).save(img_file, format='PNG') 180 | 181 | logging.info('Page %d Downloaded', i + 1) 182 | if i == page_count - 1: 183 | logging.info('Finished.') 184 | self.image_box = None 185 | return 186 | 187 | self.actions_class.move_to_page(driver, i + 1) 188 | 189 | WebDriverWait(driver, 300).until_not( 190 | lambda x: self.actions_class.get_now_page(x) == i + 1) 191 | 192 | time.sleep(self.sleep_time + random.random() * 2) 193 | except Exception as err: 194 | with open("error.html", "w", encoding="utf-8") as err_source: 195 | err_source.write(driver.page_source) 196 | driver.save_screenshot('./error.png') 197 | logging.error('Something wrong or download finished,Please check the error.png to see the web page.\r\nNormally, you should logout and login, then renew the cookies to solve this problem.') 198 | logging.error(err) 199 | self.image_box = None 200 | return 201 | 202 | def download(self): 203 | total_manga = len(self.manga_url) 204 | total_dir = len(self.imgdir) 205 | if total_manga != total_dir: 206 | logging.error('Total manga urls given not equal to imgdir.') 207 | return 208 | 209 | for i in range(total_manga): 210 | t_manga_url = self.manga_url[i] 211 | t_img_dir = self.imgdir[i] 212 | self.check_implementation(t_manga_url) 213 | if i == 0: 214 | self.login() 215 | logging.info("Starting download manga %d, imgdir: %s", 216 | i + 1, t_img_dir) 217 | self.prepare_download(t_img_dir, t_manga_url) 218 | self.download_book(t_img_dir) 219 | logging.info("Finished download manga %d, imgdir: %s", 220 | i + 1, t_img_dir) 221 | time.sleep(2) 222 | self.driver.close() 223 | self.driver.quit() 224 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Main file 3 | ''' 4 | 5 | from downloader import Downloader 6 | 7 | settings = { 8 | # Manga urls, should be the same website 9 | 'manga_url': [ 10 | 'URL_1', 11 | 'URL_2' 12 | ], 13 | # Your cookies 14 | 'cookies': 'YOUR_COOKIES_HERE', 15 | # Folder names to store the Manga, the same order with manga_url 16 | 'imgdir': [ 17 | 'IMGDIR_FOR_URL_1', 18 | 'IMGDIR_FOR_URL_2' 19 | ], 20 | # Resolution, (Width, Height), For coma this doesn't matter. 21 | 'res': (784, 1200), 22 | # Sleep time for each page (Second), normally no need to change. 23 | 'sleep_time': 1, 24 | # Time wait for page loading (Second), if your network is good, you can reduce this parameter. 25 | 'loading_wait_time': 20, 26 | # Cut image, (left, upper, right, lower) in pixel, None means do not cut the image. This often used to cut the edge. 27 | # Like (0, 0, 0, 3) means cut 3 pixel from bottom of the image. 28 | # or set dynamic to allow the scrypt to cut_images dynamictly (This work only correct if start_page is None) 29 | # this removed whitespace on the corners, initialised by the Cover. 30 | 'cut_image': None, 31 | # File name prefix, if you want your file name like 'klk_v1_001.jpg', write 'klk_v1' here. 32 | 'file_name_prefix': '', 33 | # File name digits count, if you want your file name like '001.jpg', write 3 here. 34 | 'number_of_digits': 3, 35 | # Start page, if you want to download from 3 page, set this to 3, None means from 0 36 | 'start_page': None, 37 | # End page, if you want to download until 10 page, set this to 10, None means until finished 38 | 'end_page': None, 39 | } 40 | 41 | if __name__ == '__main__': 42 | downloader = Downloader(**settings) 43 | downloader.download() 44 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | attrs==23.1.0 2 | certifi==2023.7.22 3 | charset-normalizer==3.2.0 4 | exceptiongroup==1.1.2 5 | h11==0.14.0 6 | idna==3.4 7 | outcome==1.2.0 8 | Pillow==9.2.0 9 | PySocks==1.7.1 10 | requests==2.31.0 11 | selenium==4.18.1 12 | sniffio==1.3.0 13 | sortedcontainers==2.4.0 14 | trio==0.22.2 15 | trio-websocket==0.10.3 16 | typing_extensions==4.10.0 17 | undetected-chromedriver==3.5.5 18 | urllib3==1.26.16 19 | websockets==11.0.3 20 | wsproto==1.2.0 21 | -------------------------------------------------------------------------------- /website_actions/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions 3 | ''' 4 | import glob 5 | from os.path import basename, dirname, isfile, join 6 | 7 | modules = glob.glob(join(dirname(__file__), "*.py")) 8 | __all__ = [basename(f)[:-3] for f in modules if isfile(f) 9 | and not f.endswith('__init__.py')] 10 | -------------------------------------------------------------------------------- /website_actions/abstract_website_actions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Abstract website actions, XU Zhengyi, 2020/05/05 3 | ''' 4 | 5 | from abc import ABCMeta, abstractmethod 6 | 7 | 8 | class WebsiteActions: 9 | ''' 10 | Base class for all website actions. 11 | ''' 12 | __metaclass__ = ABCMeta 13 | 14 | def __init__(self): 15 | self.class_name = self.__class__.__name__ 16 | 17 | def get_class_name(self) -> str: 18 | ''' 19 | Get class name. 20 | ''' 21 | return self.class_name 22 | 23 | @staticmethod 24 | @abstractmethod 25 | def check_url(manga_url) -> bool: 26 | ''' 27 | Give a manga url and check if the website is this class. 28 | ''' 29 | return False 30 | 31 | @property 32 | @abstractmethod 33 | def login_url(self) -> str: 34 | ''' 35 | Login url property. 36 | ''' 37 | pass 38 | 39 | @abstractmethod 40 | def get_sum_page_count(self, driver) -> int: 41 | ''' 42 | Get sum page count on for the manga. 43 | ''' 44 | pass 45 | 46 | @abstractmethod 47 | def move_to_page(self, driver, page) -> bool: 48 | ''' 49 | Move to given page. 50 | ''' 51 | pass 52 | 53 | @abstractmethod 54 | def wait_loading(self, driver): 55 | ''' 56 | Wait page loading. 57 | ''' 58 | pass 59 | 60 | def before_download(self, driver): 61 | ''' 62 | Run after page loaded, can be used to close some hint windows. 63 | ''' 64 | pass 65 | 66 | @abstractmethod 67 | def get_imgdata(self, driver, now_page): 68 | ''' 69 | Get imgdata on the page. Return 70 | ''' 71 | pass 72 | 73 | @abstractmethod 74 | def get_now_page(self, driver) -> int: 75 | ''' 76 | Get now page. 77 | ''' 78 | pass 79 | -------------------------------------------------------------------------------- /website_actions/bookwalker_jp_actions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions for bookwalker.jp 3 | ''' 4 | import base64 5 | 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.support.ui import WebDriverWait 8 | 9 | try: 10 | from abstract_website_actions import WebsiteActions 11 | except: 12 | from website_actions.abstract_website_actions import WebsiteActions 13 | 14 | 15 | class BookwalkerJP(WebsiteActions): 16 | ''' 17 | bookwalker.jp 18 | ''' 19 | login_url = 'https://member.bookwalker.jp/app/03/login' 20 | js = '' 21 | 22 | def check_is_loading(self, list_ele): 23 | ''' 24 | Check is loading. 25 | ''' 26 | is_loading = False 27 | for i in list_ele: 28 | if i.is_displayed() is True: 29 | is_loading = True 30 | break 31 | return is_loading 32 | 33 | @staticmethod 34 | def check_url(manga_url): 35 | return manga_url.find('bookwalker.jp') != -1 36 | 37 | def get_sum_page_count(self, driver): 38 | return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[1]) 39 | 40 | def move_to_page(self, driver, page): 41 | driver.execute_script( 42 | f'NFBR.a6G.Initializer.{self.js}.menu.options.a6l.moveToPage(%d)' % page) 43 | 44 | def wait_loading(self, driver): 45 | WebDriverWait(driver, 600).until_not(lambda x: self.check_is_loading( 46 | x.find_elements(By.CSS_SELECTOR, ".loading"))) 47 | 48 | def get_imgdata(self, driver, now_page): 49 | canvas = driver.find_element(By.CSS_SELECTOR, ".currentScreen canvas") 50 | img_base64 = driver.execute_script( 51 | "return arguments[0].toDataURL('image/png', 1.0).substring(21);", canvas) 52 | return base64.b64decode(img_base64) 53 | 54 | def get_now_page(self, driver): 55 | return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[0]) 56 | 57 | def before_download(self, driver): 58 | for key in driver.execute_script('return Object.keys(NFBR.a6G.Initializer)'): 59 | if 'menu' in driver.execute_script(f'return Object.keys(NFBR.a6G.Initializer.{key})'): 60 | self.js = key 61 | break 62 | -------------------------------------------------------------------------------- /website_actions/bookwalker_tw_actions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions for bookwalker.com.tw 3 | ''' 4 | import base64 5 | 6 | from selenium.webdriver.common.by import By 7 | from selenium.webdriver.support.ui import WebDriverWait 8 | 9 | try: 10 | from abstract_website_actions import WebsiteActions 11 | except: 12 | from website_actions.abstract_website_actions import WebsiteActions 13 | 14 | 15 | class BookwalkerTW(WebsiteActions): 16 | ''' 17 | bookwalker.com.tw 18 | ''' 19 | login_url = 'https://www.bookwalker.com.tw/user/login' 20 | js = '' 21 | 22 | def check_is_loading(self, list_ele): 23 | ''' 24 | Check is loading. 25 | ''' 26 | is_loading = False 27 | for i in list_ele: 28 | if i.is_displayed() is True: 29 | is_loading = True 30 | break 31 | return is_loading 32 | 33 | @staticmethod 34 | def check_url(manga_url): 35 | return manga_url.find('bookwalker.com.tw') != -1 36 | 37 | def get_sum_page_count(self, driver): 38 | return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[1]) 39 | 40 | def move_to_page(self, driver, page): 41 | driver.execute_script( 42 | f'NFBR.a6G.Initializer.{self.js}.menu.options.a6l.moveToPage(%d)' % page) 43 | 44 | def wait_loading(self, driver): 45 | WebDriverWait(driver, 600).until_not(lambda x: self.check_is_loading( 46 | x.find_elements(By.CSS_SELECTOR, ".loading"))) 47 | 48 | def get_imgdata(self, driver, now_page): 49 | canvas = driver.find_element(By.CSS_SELECTOR, ".currentScreen canvas") 50 | img_base64 = driver.execute_script( 51 | "return arguments[0].toDataURL('image/png', 1.0).substring(21);", canvas) 52 | return base64.b64decode(img_base64) 53 | 54 | def get_now_page(self, driver): 55 | return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[0]) 56 | 57 | def before_download(self, driver): 58 | for key in driver.execute_script('return Object.keys(NFBR.a6G.Initializer)'): 59 | if 'menu' in driver.execute_script(f'return Object.keys(NFBR.a6G.Initializer.{key})'): 60 | self.js = key 61 | break 62 | 63 | -------------------------------------------------------------------------------- /website_actions/cmoa_jp_actions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions for www.cmoa.jp 3 | ''' 4 | import base64 5 | from io import BytesIO 6 | 7 | import PIL.Image as pil_image 8 | from selenium.webdriver.common.by import By 9 | from selenium.webdriver.support.ui import WebDriverWait 10 | 11 | try: 12 | from abstract_website_actions import WebsiteActions 13 | except: 14 | from website_actions.abstract_website_actions import WebsiteActions 15 | 16 | 17 | class CmoaJP(WebsiteActions): 18 | ''' 19 | cmoa.jp 20 | ''' 21 | login_url = 'https://www.cmoa.jp/' 22 | 23 | @staticmethod 24 | def get_file_content_chrome(driver, uri): 25 | result = driver.execute_async_script(""" 26 | var uri = arguments[0]; 27 | var callback = arguments[1]; 28 | var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)}; 29 | var xhr = new XMLHttpRequest(); 30 | xhr.responseType = 'arraybuffer'; 31 | xhr.onload = function(){ callback(toBase64(xhr.response)) }; 32 | xhr.onerror = function(){ callback(xhr.status) }; 33 | xhr.open('GET', uri); 34 | xhr.send(); 35 | """, uri) 36 | if type(result) == int: 37 | raise Exception("Request failed with status %s" % result) 38 | return base64.b64decode(result) 39 | 40 | @staticmethod 41 | def check_url(manga_url): 42 | return manga_url.find('cmoa.jp/bib/speedreader') != -1 43 | 44 | def get_sum_page_count(self, driver): 45 | return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[1]) 46 | 47 | def move_to_page(self, driver, page): 48 | driver.execute_script( 49 | 'SpeedBinb.getInstance("content").moveTo(%d)' % page) 50 | 51 | def wait_loading(self, driver): 52 | WebDriverWait(driver, 600).until_not( 53 | lambda x: x.find_element(By.ID, "start_wait")) 54 | 55 | def get_imgdata(self, driver, now_page): 56 | image_elements = driver.find_element( 57 | By.ID, 'content-p%d' % now_page).find_elements(By.CSS_SELECTOR, 'img') 58 | 59 | imgs_arr = [] 60 | imgs_height = [0] 61 | mmset = 4 62 | for i in image_elements: 63 | blob_url = i.get_attribute('src') 64 | image_data = self.get_file_content_chrome(driver, blob_url) 65 | part_img = pil_image.open(BytesIO(image_data)) 66 | imgs_arr.append(part_img) 67 | width, height = part_img.size 68 | imgs_height.append(height + imgs_height[-1] - mmset) 69 | 70 | last_img_height = imgs_height.pop() + mmset 71 | 72 | final_img = pil_image.new('RGB', (width, last_img_height)) 73 | 74 | for i in range(len(imgs_arr)): 75 | final_img.paste(imgs_arr[i], (0, imgs_height[i])) 76 | 77 | final_data = BytesIO() 78 | final_img.save(final_data, format='PNG') 79 | return final_data.getbuffer() 80 | 81 | def get_now_page(self, driver): 82 | return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[0]) 83 | 84 | def before_download(self, driver): 85 | driver.execute_script('parent.closeTips()') 86 | -------------------------------------------------------------------------------- /website_actions/coma_jp_novel.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions for coma.jp novels 3 | ''' 4 | import base64 5 | import time 6 | 7 | from selenium.webdriver.common.by import By 8 | from selenium.webdriver.support.ui import WebDriverWait 9 | 10 | try: 11 | from abstract_website_actions import WebsiteActions 12 | except: 13 | from website_actions.abstract_website_actions import WebsiteActions 14 | 15 | 16 | class CmoaJPNovels(WebsiteActions): 17 | ''' 18 | coma.jp novels 19 | ''' 20 | login_url = 'https://www.cmoa.jp/' 21 | 22 | def next_page(self, driver): 23 | driver.execute_script('moveNextPageSpeech()') 24 | 25 | def prev_page(self, driver): 26 | driver.execute_script('movePrevPageSpeech()') 27 | 28 | def is_finished(self, driver): 29 | return driver.execute_script('return ZHL0PP.Z060JL()') 30 | 31 | @staticmethod 32 | def check_url(manga_url): 33 | return manga_url.find('cmoa.jp/bib/reader') != -1 34 | 35 | def get_sum_page_count(self, driver): 36 | self.now_page = 0 37 | sum_page = 0 38 | while not self.is_finished(driver): 39 | sum_page += 1 40 | self.next_page(driver) 41 | time.sleep(0.5) 42 | 43 | for _ in range(sum_page): 44 | self.prev_page(driver) 45 | time.sleep(0.5) 46 | 47 | return sum_page 48 | 49 | def move_to_page(self, driver, page): 50 | if page == self.now_page: 51 | return 52 | 53 | f_to_use = self.next_page 54 | if page < self.now_page: 55 | f_to_use = self.prev_page 56 | 57 | for _ in range(abs(page - self.now_page)): 58 | f_to_use(driver) 59 | 60 | self.now_page = page 61 | 62 | def wait_loading(self, driver): 63 | WebDriverWait(driver, 600).until_not( 64 | lambda x: x.find_element(By.ID, 'ctmble_menu_notification_overlay_span').is_displayed()) 65 | 66 | def get_imgdata(self, driver, now_page): 67 | return driver.get_screenshot_as_png() 68 | 69 | def get_now_page(self, driver): 70 | return self.now_page + 1 71 | 72 | def before_download(self, driver): 73 | WebDriverWait(driver, 600).until_not( 74 | lambda x: x.find_element(By.ID, 'preMessage')) 75 | driver.switch_to.frame(driver.find_element(By.ID, 'binb')) 76 | WebDriverWait(driver, 600).until_not( 77 | lambda x: x.find_element(By.ID, 'msg_outer_div').is_displayed()) 78 | -------------------------------------------------------------------------------- /website_actions/takeshobo_co_jp_actions.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Website actions for takeshobo.co.jp 3 | ''' 4 | import base64 5 | from io import BytesIO 6 | 7 | import PIL.Image as pil_image 8 | from selenium.webdriver.common.by import By 9 | from selenium.webdriver.support.ui import WebDriverWait 10 | 11 | try: 12 | from abstract_website_actions import WebsiteActions 13 | except: 14 | from website_actions.abstract_website_actions import WebsiteActions 15 | 16 | 17 | class TakeshoboJP(WebsiteActions): 18 | ''' 19 | takeshobo.co.jp 20 | ''' 21 | login_url = 'https://gammaplus.takeshobo.co.jp/' 22 | 23 | @staticmethod 24 | def get_file_content_chrome(driver, uri): 25 | result = driver.execute_async_script(""" 26 | var uri = arguments[0]; 27 | var callback = arguments[1]; 28 | var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)}; 29 | var xhr = new XMLHttpRequest(); 30 | xhr.responseType = 'arraybuffer'; 31 | xhr.onload = function(){ callback(toBase64(xhr.response)) }; 32 | xhr.onerror = function(){ callback(xhr.status) }; 33 | xhr.open('GET', uri); 34 | xhr.send(); 35 | """, uri) 36 | if type(result) == int: 37 | raise Exception("Request failed with status %s" % result) 38 | return base64.b64decode(result) 39 | 40 | @staticmethod 41 | def check_url(manga_url): 42 | return manga_url.find('takeshobo.co.jp/manga/') != -1 43 | 44 | def get_sum_page_count(self, driver): 45 | return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[1]) 46 | 47 | def move_to_page(self, driver, page): 48 | driver.execute_script( 49 | 'SpeedBinb.getInstance("content").moveTo(%d)' % page) 50 | 51 | def wait_loading(self, driver): 52 | WebDriverWait(driver, 600).until_not( 53 | lambda x: x.find_element(By.ID, "start_wait")) 54 | 55 | def get_imgdata(self, driver, now_page): 56 | image_elements = driver.find_element( 57 | By.ID, 'content-p%d' % now_page).find_elements(By.CSS_SELECTOR, 'img') 58 | 59 | imgs_arr = [] 60 | imgs_height = [0] 61 | mmset = 4 62 | for i in image_elements: 63 | blob_url = i.get_attribute('src') 64 | image_data = self.get_file_content_chrome(driver, blob_url) 65 | part_img = pil_image.open(BytesIO(image_data)) 66 | imgs_arr.append(part_img) 67 | width, height = part_img.size 68 | imgs_height.append(height + imgs_height[-1] - mmset) 69 | 70 | last_img_height = imgs_height.pop() + mmset 71 | 72 | final_img = pil_image.new('RGB', (width, last_img_height)) 73 | 74 | for i in range(len(imgs_arr)): 75 | final_img.paste(imgs_arr[i], (0, imgs_height[i])) 76 | 77 | final_data = BytesIO() 78 | final_img.save(final_data, format='PNG') 79 | return final_data.getbuffer() 80 | 81 | def get_now_page(self, driver): 82 | return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[0]) 83 | 84 | def before_download(self, driver): 85 | driver.execute_script('parent.closeTips()') 86 | --------------------------------------------------------------------------------