├── .gitignore
├── Dockerfile
├── README.md
├── docker-compose.yml
├── downloader.py
├── main.py
├── requirements.txt
└── website_actions
    ├── __init__.py
    ├── abstract_website_actions.py
    ├── bookwalker_jp_actions.py
    ├── bookwalker_tw_actions.py
    ├── cmoa_jp_actions.py
    ├── coma_jp_novel.py
    └── takeshobo_co_jp_actions.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8
 2 | 
 3 | RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add -
 4 | RUN sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list'
 5 | 
 6 | # Updating apt to see and install Google Chrome
 7 | RUN apt-get -y update
 8 | 
 9 | # Magic happens
10 | RUN apt-get install -y google-chrome-stable
11 | 
12 | # Installing Unzip
13 | RUN apt-get install -yqq unzip
14 | 
15 | # Download the Chrome Driver
16 | RUN wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/ \
17 |     && curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE \
18 |     | xargs -I{} wget -O /tmp/chromedriver.zip http://chromedriver.storage.googleapis.com/{}/chromedriver_linux64.zip \
19 |     && unzip /tmp/chromedriver.zip -d /usr/local/bin/ \
20 |     && rm /tmp/chromedriver.zip
21 | 
22 | # Set display port as an environment variable
23 | ENV DISPLAY=:99
24 | 
25 | WORKDIR /usr/src/app
26 | 
27 | COPY . /app
28 | WORKDIR /app
29 | 
30 | RUN pip install --upgrade pip
31 | 
32 | RUN pip install -r requirements.txt
33 | 
34 | CMD python main.py
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # NEW METHOD FOR BW TO TRY!
  2 | 
  3 | # New Version
  4 | ## v0.3.3 (Update Recommended)
  5 | - Update to Chromium 112.0.5590.0.
  6 | - Better support for BW books page number pattern, support novels.
  7 | 
  8 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.3) or here: [Windows x64 release build v0.3.3](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.3/BW-downloader-chrome-v0.3.3.7z)
  9 | 
 10 | ## v0.3.2
 11 | This version has some good features below for BW:
 12 | - Could download the cover (If the cover image is jpeg, please check it or better convert it to png before you share it, because the jpeg file will contain your BW account info).
 13 | - Could name the image automictically using the page number, you can start at any page you want and go forward or go back!
 14 | - Could name the folder with the BW uuid but not a random one anymore.
 15 | - No more blank or repeating page will be skipped, no more image hash to check repeating page, better performance.
 16 | 
 17 | Example screenshot:
 18 | ![1670681578(1)](https://user-images.githubusercontent.com/29002064/206859972-0c775ee2-02fd-4d62-8870-4cd262fc6116.jpg)
 19 | 
 20 | If you find the file name all become "cover_or_extra_xxx" when downloading some manga, please file a bug, there may be more URL patterns in BW than I have seen or they changed the pattern, it should be covered to make the page number working correctly.
 21 | 
 22 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.2) or here: [Windows x64 release build v0.3.2](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.2/BW-downloader-chrome-v0.3.2.7z)
 23 | 
 24 | ## v0.3.1
 25 | This version has improved the performance about saving snapshot, if you have some problems that the browser become very slow during downloading, please try the new version.
 26 | 
 27 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3.1) or here: [Windows x64 release build v0.3.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3.1/BW-downloader-chrome-v0.3.1.7z)
 28 | 
 29 | ## v0.3
 30 | Fixed the problem that some manga has width less than 800px could not be downloaded, see [#113](/../../issues/113).
 31 | 
 32 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.3) or here: [Windows x64 release build v0.3](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.3/BW-downloader-chrome-v0.3.7z)
 33 | 
 34 | ## v0.2.1
 35 | Pump Chromium to 109.0.5393, may fix some problems.
 36 | 
 37 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.2.1) or here: [Windows x64 release build v0.2.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.2.1/BW-downloader-chrome-v0.2.1.7z)
 38 | 
 39 | ## v0.2
 40 | This version is based on Chromium 106.0.5243.0, the changes are below:
 41 | - Support `https://ebook.tongli.com.tw`, will save the downloaded images in the `C:\bw_export_data\TONGLI_URL_STRING`
 42 | - Support `https://www.dlsite.com`, but this is saving the cache images, so the final 3~4 pages should be downloaded as below (for example we have 10 pages):
 43 |   - Go through page 1 to 10 (Make sure the current page is full loaded when you go to next page).
 44 |   - You will find that at page 10, there are maybe only images for page 1-7.
 45 |   - Go back from page 10 to page 5, you will find that the final pages are saved. (but maybe in reverse order)
 46 |   - Currently we could not do anything better than this.
 47 |  - Works for `https://book.dmm.com`, use the script below to move page:
 48 |  ```js
 49 |    window.i=0;setInterval(()=>{NFBR.a6G.Initializer.views_.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000)
 50 |  ```
 51 |  The script above is for **DMM**, for **BW** please use the script below:
 52 |  ```js
 53 |    window.i=0;setInterval(()=>{NFBR.a6G.Initializer.L7v.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000)
 54 |  ```
 55 |  - Maybe slitely faster for BW and may download some images that width > height.
 56 | 
 57 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.2) or here: [Windows x64 release build v0.2](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.2/BW-downloader-chrome-v0.2.7z)
 58 | 
 59 | How to use (Same as the old version):
 60 | 1.  Unzip the file `BW-downloader-chrome-bin.zip`.
 61 | 2.  Open a `powsershell` or `cmd`, `cd` to the unzipped browser dir.
 62 | 3.  Open the browser with command line `.\chrome.exe --user-data-dir=c:\bw-downloader-profile --no-sandbox`
 63 | 4.  Browser the manga, manga will be saved to `C:\bw_export_data`
 64 | 
 65 | **Do not use it for other website, only use it as a Managa downloader, it is not as safe as normal chrome browser!**
 66 | 
 67 | # Old Version
 68 | Download it in the [release](https://github.com/xuzhengyi1995/Manga_downloader/releases/tag/BW-downloader-chrome-v0.1) or here: [Windows x64 release build v0.1](https://github.com/xuzhengyi1995/Manga_downloader/releases/download/BW-downloader-chrome-v0.1/BW-downloader-chrome-v0.1.7z)
 69 | 
 70 | **If you are finding something to download BW, please try this method for BW, it's really a good thing to try, you will like it!**
 71 | 
 72 | **For coma, please see below.**
 73 | 
 74 | Now have a new method, with a customized `chromium` browser, it can download BW original image with it's original size very easily. It can download both manga and novel, and may be used to every website that use canvas to render the page (now only tested on BW).
 75 | 
 76 | It is only a dev version, may have bugs and may crash, but you can download the customized browser and try it now.
 77 | 
 78 | **Do not use it for other website, only use it as a BW downloader, it is not as safe as normal chrome browser!**
 79 | 
 80 | Clone this repo or only download the [BW-downloader-chrome-bin.zip](https://github.com/xuzhengyi1995/Manga_downloader/raw/master/BW-downloader-chrome-bin.zip)
 81 | 
 82 | 1.  Unzip the file `BW-downloader-chrome-bin.zip`.
 83 | 2.  Open a `powsershell` or `cmd`, `cd` to the unzipped browser dir.
 84 | 3.  Open the browser with command line `.\chrome.exe --user-data-dir=c:\bw-downloader-profile --no-sandbox`
 85 | 4.  Adjust your browser window size, make it smaller and can only display one manga page, example below
 86 |     ![image](https://user-images.githubusercontent.com/29002064/139255318-95531cd9-c442-4a61-acb4-cef3d71b7190.png)
 87 | 
 88 | 
 89 | 5.  You can now go to BW website, log in and open the manga you want to download, remember to reset the manga's read status before you open it.
 90 | 6.  Press `F12`, make it a separate window (see the image below) and run the script below (just go to `console` and copy-past the code, press enter) to move the page automatically, if your network is good, you can change the `3000` to a smaller number, 3000 means 3000ms -> 3s, every 3s it will move to the next page.
 91 | 
 92 |     You can also manually click the mouse left button / use a keyboard arrow key / use a keyboard simulation software to move the page, you can choose the way you like, just make sure the page is moving.
 93 |     ```js
 94 |     window.i=0;setInterval(()=>{NFBR.a6G.Initializer.L7v.menu.options.a6l.moveToPage(window.i);console.log(window.i);window.i++;},3000)
 95 |     ```
 96 |     
 97 |     If this not work and show 'Uncaught TypeError: Cannot read properties of undefined (reading 'menu') at <anonymous>:1:54', it means BW has updated the js, you can try to find it in the console, just try `NFBR.a6G.Initializer.*.menu` is not `undefined` and the * is the new object name; Or you can just file a bug.
 98 | 
 99 |     ![image](https://user-images.githubusercontent.com/29002064/138590508-e7555a2d-1528-4e59-8a50-e08e407bc1be.png)
100 | 
101 | 
102 | 7.  Now you can check your `C:\bw_export_data`, you can find a random uuid folder with all the manga images in it.
103 |     ![image](https://user-images.githubusercontent.com/29002064/139255390-03b9191a-e90b-4572-9cde-b7e50ca9787c.png)
104 | 
105 | If you want to download multiple manga at the same time, just open as many manga as you want, and do the step 5 to 6.
106 | 
107 | This method is very easy to use, stable and no need to find any resolution or cookies, and it can download the real original image with no barcode.
108 | 
109 | Maybe will add a new browser ui to it and can click to download in the future.
110 | 
111 | May not download the cover page now.
112 | 
113 | Only built on windows, no program now for other platform.
114 | 
115 | If you find some problem, please file a bug, thank you!
116 | 
117 | # Manga_Downloader
118 | 
119 | A Manga download framework using `selenium`.
120 | 
121 | **Now support the websites below:**
122 | 
123 | 1.  [Bookwalker.jp](https://bookwalker.jp)
124 | 2.  [Bookwalker.com.tw](https://www.bookwalker.com.tw)
125 | 3.  [Cmoa.jp](https://www.cmoa.jp/)
126 | 
127 | **Program will check the website of given URL automaticity**
128 | 
129 | **If the website you given is unsupported, the program will raise an error.**
130 | 
131 | **Now support multi manga download with only login one time**
132 | 
133 | **现在支持批量下载**
134 | 
135 | **you should prepare the information below:**
136 | 
137 | 1.  Manga URL
138 | 2.  Cookies
139 | 3.  Image dir (Where to put the image, folder name)
140 | 4.  Some website you should see the size of image and set it at `res`. [Cmoa.jp](https://www.cmoa.jp/) doesn't need this.
141 | 
142 | # How to Use
143 | 
144 | ## All settings
145 | 
146 | All the settings are in `main.py`.
147 | 
148 | ```python
149 | settings = {
150 |     # Manga urls, should be the same website
151 |     'manga_url': [
152 |         'URL_1',
153 |         'URL_2'
154 |     ],
155 |     # Your cookies
156 |     'cookies': 'YOUR_COOKIES_HERE',
157 |     # Folder names to store the Manga, the same order with manga_url
158 |     'imgdir': [
159 |         'IMGDIR_FOR_URL_1',
160 |         'IMGDIR_FOR_URL_2'
161 |     ],
162 |     # Resolution, (Width, Height), For cmoa.jp this doesn't matter.
163 |     'res': (1393, 2048),
164 |     # Sleep time for each page (Second), normally no need to change.
165 |     'sleep_time': 2,
166 |     # Time wait for page loading (Second), if your network is good, you can reduce this parameter.
167 |     'loading_wait_time': 20,
168 |     # Cut image, (left, upper, right, lower) in pixel, None means do not cut the image. This often used to cut the edge.
169 |     # Like (0, 0, 0, 3) means cut 3 pixel from bottom of the image.
170 |     'cut_image': None,
171 |     # File name prefix, if you want your file name like 'klk_v1_001.jpg', write 'klk_v1' here.
172 |     'file_name_prefix': '',
173 |     # File name digits count, if you want your file name like '001.jpg', write 3 here.
174 |     'number_of_digits': 3,
175 |     # Start page, if you want to download from page 3, set this to 3, None means from 0
176 |     'start_page': None,
177 |     # End page, if you want to download until page 10, set this to 10, None means until finished
178 |     'end_page': None,
179 | }
180 | ```
181 | 
182 | ## Install environment & How to Get URL/Cookies
183 | 
184 | **This program now work for Chrome, if you use another browser, please check [this page](https://selenium-python.readthedocs.io/installation.html)**
185 | 
186 | 0.  Install python packages _selenium_ and _pillow_ and get the _Google chrome Drivers_.
187 | 
188 |     1.  For _selenium_ ad _pillow_:
189 | 
190 |     ```shell
191 |     pip install selenium
192 |     pip install Pillow
193 |     # This undetected_chromedriver is prevent us from been detected by BW
194 |     pip install undetected_chromedriver
195 |     ```
196 | 
197 |     2.  For Google chrome Drivers:
198 | 
199 |         1.  Please check your Chrome version, 'Help'->'About Google Chrome'.
200 | 
201 |         2.  Download Chrome Driver fit to your Chrome version [here](https://sites.google.com/a/chromium.org/chromedriver/downloads).
202 | 
203 |         3.  Put it into any folder and add the folder into the PATH.
204 | 
205 |     3.  For more info, I suggest you to check it [here](https://selenium-python.readthedocs.io/installation.html)
206 | 
207 | 
208 | 1.  Change the `IMGDIR` in the main.py to indicate where to put the manga.
209 | 
210 | 2.  Add your cookies in the program.
211 | 
212 |     **Remember to use F12 to see the cookies!**
213 | 
214 |     **Because some http only cookies can not be seen by javascript!**
215 | 
216 |     **Remember to go to the links below to get the cookies!**
217 | 
218 |     1.  For [Bookwalker.jp] cookies, go [here](https://member.bookwalker.jp/app/03/my/profile).
219 |     2.  For [Bookwalker.com.tw] cookies, go [here](https://www.bookwalker.com.tw/member).
220 |     3.  For [www.cmoa.jp] cookies, go [here](https://www.cmoa.jp/) and you **must** get cookies by plug-in [EditThisCookie](http://www.editthiscookie.com/), download it for chrome [here](https://chrome.google.com/webstore/detail/edit-this-cookie/fngmhnnpilhplaeedifhccceomclgfbg).
221 | 
222 |     -   For `EditThisCookie`, this can be used in any website above, but for `cmoa` you **must** use this method
223 | 
224 |         1.  Go to user preferences (chrome-extension://fngmhnnpilhplaeedifhccceomclgfbg/options_pages/user_preferences.html) of `EditThisCookie`
225 |         2.  Set the cookie export format to `Semicolon separated name=value pairs`
226 |         3.  Go to [cmoa](https://www.cmoa.jp/), click the `EditThisCookie` and click `export` button
227 |         4.  Copy the cookies in the file (**After the `// Example: http://www.tutorialspoint.com/javascript/javascript_cookies.htm`**) into the program
228 | 
229 |     -   For the traditional way
230 | 
231 |         > 1.  Open the page.
232 |         > 2.  Press F12.
233 |         > 3.  Click on the _Network_.
234 |         > 4.  Refresh the page.
235 |         > 5.  Find the first _profile_ request, click it.
236 |         > 6.  On the right, there will be a _Request Headers_, go there.
237 |         > 7.  Find the _cookie:...._, copy the string after the _cookie:_, paste to the _main.py_, _YOUR_COOKIES_HERE_
238 | 
239 | 3.  Change the _manga_url_ in the _main.py_.
240 | 
241 |     1.  For [Bookwalker.jp]
242 | 
243 |         First go to [購入済み書籍一覧](https://bookwalker.jp/holdBooks/), you can find all your mangas here.
244 | 
245 |         This time the URL is the URL of **'この本を読む'** button for your manga.
246 | 
247 |         Right click this button, and click **'Copy link address'**.
248 | 
249 |         The URL is start with **member.bookwalker.jp**, not the **viewer.bookwalker.jp**. Here we use the manga [【期間限定　無料お試し版】あつまれ！ふしぎ研究部　１](https://member.bookwalker.jp/app/03/webstore/cooperation?r=BROWSER_VIEWER/640c0ddd-896c-4881-945f-ad5ce9a070a6/https%3A%2F%2Fbookwalker.jp%2FholdBooks%2F).
250 | 
251 |         This is the URL of the **あつまれ！ふしぎ研究部　１**: <https://member.bookwalker.jp/app/03/webstore/cooperation?r=BROWSER_VIEWER/640c0ddd-896c-4881-945f-ad5ce9a070a6/https%3A%2F%2Fbookwalker.jp%2FholdBooks%2F>
252 | 
253 |     2.  For [Bookwalker.com.tw]
254 | 
255 |         Please go to [线上阅读](https://www.bookwalker.com.tw/member/available_book_list).
256 | 
257 |         The manga URL like this：<https://www.bookwalker.com.tw/browserViewer/56994/read>
258 | 
259 |     3.  For [Cmoa.jp]
260 | 
261 |         Open the Manga and just copy the URL on the browser.
262 | 
263 |         The manga URL like this : <https://www.cmoa.jp/bib/speedreader/speed.html?cid=0000156072_jp_0001&u0=0&u1=0&rurl=https%3A%2F%2Fwww.cmoa.jp%2Fmypage%2Fmypage_top%2F%3Ftitle%3D156072>
264 | 
265 |     Just copy this URL to the `MANGA_URL` in _main.py_.
266 | 
267 | 4.  After edit the program, run `python main.py` to run it.
268 | 
269 | # Notice
270 | 
271 | 1.  The `SLEEP_TIME` by default is 2 seconds, you can adjust it with your own network situation, if the downloading has repeated images, you can change it to 5 or more. If you think it's too slow, try change it to 1 or even 0.5.
272 | 
273 | 2.  `LOADING_WAIT_TIME = 20`, this is the time to wait until the manga viewer page loaded, if your network is not good, you can set it to 30 or 50 seconds.
274 | 
275 | 3.  Resolution, you can change it as you want, but check the original image resolution first.
276 | 
277 |     ```python
278 |     RES = (784, 1200)
279 |     ```
280 | 
281 |     If the original image has a higher resolution, you can change it like this (The resolution is just a example).
282 | 
283 |     ```python
284 |     RES = (1568, 2400)
285 |     ```
286 | 
287 |     **For [Cmoa.jp] no need this, the resolution is fixed by [Cmoa.jp].**
288 | 
289 | 4.  Some time we should log out and log in, this website is very strict and take so many method to prevent abuse.
290 | 
291 | 5.  Now you can cut the image by setting `CUT_IMAGE` to (left, upper, right, lower).
292 | 
293 |     For example you want to cut 3px from the bottom of image, you can set it to:
294 | 
295 |     ```python
296 |     CUT_IMAGE = (0, 0, 0, 3)
297 |     ```
298 | 
299 |     This function use `Pillow`, if you want to use it, you should install it by using the command:
300 | 
301 |     ```shell
302 |     pip install Pillow
303 |     ```
304 | 
305 |     By default it is `None`, means do not cut the image.
306 | 
307 | 6.  You can now change the file name prefix and number of digits by changing `file_name_prefix` and `number_of_digits`.
308 | 
309 |     For example, if you are downloading Kill La Kill Manga Volume 1, and you want the file name like:
310 | 
311 |     <pre>
312 |         KLK_V1
313 |         │--KLK_V1_001.jpg
314 |         │--KLK_V1_002.jpg
315 |         │--KLK_V1_003.jpg
316 |     </pre>
317 | 
318 |     Then you can set the parameters like below:
319 | 
320 |     ```python
321 |     settings = {
322 |         ...,
323 |         'file_name_prefix': 'KLK_V1',
324 |         # File name digits count, if you want your file name like '001.jpg', write 3 here.
325 |         'number_of_digits': 3
326 |     }
327 |     ```
328 | 
329 | # Develop
330 | 
331 | 0.  Concept
332 | 
333 |     To download Manga, normally we do like this:
334 | 
335 |     <pre>
336 |     +------------+     +-----------+      +------------+      +-------------------+      +--------------+
337 |     |            |     |           |      |            |      |                   | OVER |              |
338 |     |   Login    +-----+ Load page +----->+ Save image +----->+ Move to next page +----->+   Finished   |
339 |     |            |     |           |      |            |      |                   |      |              |
340 |     +------------+     +-----------+      +-----+------+      +---------+---------+      +--------------+
341 |                                                 ^                       |
342 |                                                 |                       |
343 |                                                 |      More page        |
344 |                                                 +-----------------------+
345 |     </pre>
346 | 
347 |     So we can create a framework to reuse the code, for new website, normally we only need to write some of the method.
348 | 
349 | 1.  Structure of file
350 | 
351 |     <pre>
352 |     |--main.py
353 |     │--downloader.py
354 |     │--README.MD
355 |     └─website_actions
356 |         │--abstract_website_actions.py
357 |         │--bookwalker_jp_actions.py
358 |         │--bookwalker_tw_actions.py
359 |         │--cmoa_jp_actions.py
360 |         │--__init__.py
361 |     </pre>
362 | 
363 | 2.  Introduction to abstract `WebsiteActions` class.
364 | 
365 |     For each website, the class should have the following methods/attributes, here we use bookwalker.jp as example:
366 | 
367 |     ```python
368 |     class BookwalkerJP(WebsiteActions):
369 |         '''
370 |         bookwalker.jp
371 |         '''
372 | 
373 |         # login_url is the page that we load first and put the cookies.
374 |         login_url = 'https://member.bookwalker.jp/app/03/login'
375 | 
376 |         @staticmethod
377 |         def check_url(manga_url):
378 |             '''
379 |             This method return a bool, check if the given manga url is belong to this class.
380 |             '''
381 |             return manga_url.find('bookwalker.jp') != -1
382 | 
383 |         def get_sum_page_count(self, driver):
384 |             '''
385 |             This method return an integer, get total page number.
386 |             '''
387 |             return int(str(driver.find_element_by_id('pageSliderCounter').text).split('/')[1])
388 | 
389 |         def move_to_page(self, driver, page):
390 |             '''
391 |             This method return nothing, move to given page number.
392 |             '''
393 |             driver.execute_script(
394 |                 'NFBR.a6G.Initializer.B0U.menu.a6l.moveToPage(%d)' % page)
395 | 
396 |         def wait_loading(self, driver):
397 |             '''
398 |             This method return nothing, wait manga loading.
399 |             '''
400 |             WebDriverWait(driver, 30).until_not(lambda x: self.check_is_loading(
401 |                 x.find_elements_by_css_selector(".loading")))
402 | 
403 |         def get_imgdata(self, driver, now_page):
404 |             '''
405 |             This method return String/something can be written to file or convert to BytesIO, get image data.
406 |             '''
407 |             canvas = driver.find_element_by_css_selector(".currentScreen canvas")
408 |             img_base64 = driver.execute_script(
409 |                 "return arguments[0].toDataURL('image/jpeg').substring(22);", canvas)
410 |             return base64.b64decode(img_base64)
411 | 
412 |         def get_now_page(self, driver):
413 |             '''
414 |             This method return an integer, the page number on the current page
415 |             '''
416 |             return int(str(driver.find_element_by_id('pageSliderCounter').text).split('/')[0])
417 |     ```
418 | 
419 |       We also have a `before_download` method, this method run before we start download, because some website need to close some pop-up component before we start downloading.
420 | 
421 |     ```python
422 |     def before_download(self, driver):
423 |         '''
424 |         This method return nothing, Run before download.
425 |         '''
426 |         driver.execute_script('parent.closeTips()')
427 |     ```
428 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.7"
2 | services:
3 |   python:
4 |     container_name: mangadw
5 |     build:
6 |       context: .
7 |       dockerfile: Dockerfile
8 |     volumes:
9 |       - .:/app


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Main downloader, XU Zhengyi, 2020/05/05
  3 | '''
  4 | import base64
  5 | import logging
  6 | import os
  7 | import random
  8 | import time
  9 | from io import BytesIO
 10 | from PIL import ImageOps
 11 | 
 12 | import PIL.Image as pil_image
 13 | import undetected_chromedriver as uc
 14 | from selenium.webdriver.support.ui import WebDriverWait
 15 | 
 16 | # DO NOT REMOVE THIS LINE. Used for __subclasses__()
 17 | from website_actions import *
 18 | from website_actions.abstract_website_actions import WebsiteActions
 19 | 
 20 | logging.basicConfig(format='[%(levelname)s](%(name)s) %(asctime)s : %(message)s',
 21 |                     datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO)
 22 | 
 23 | 
 24 | def get_cookie_dict(cookies):
 25 |     cookies_split = cookies.split('; ')
 26 |     if len(cookies_split) == 1:
 27 |         cookies_split = cookies.split(';')
 28 |     cookies_dict = {}
 29 |     for i in cookies_split:
 30 |         if i == '':
 31 |             continue
 32 |         kv = i.split('=')
 33 |         cookies_dict[kv[0]] = '='.join(kv[1:])
 34 |     return cookies_dict
 35 | 
 36 | 
 37 | def add_cookies(driver, cookies):
 38 |     for i in cookies:
 39 |         driver.add_cookie({'name': i, 'value': cookies[i]})
 40 | 
 41 | 
 42 | class Downloader:
 43 |     '''
 44 |     Main download class
 45 |     '''
 46 | 
 47 |     def __init__(
 48 |             self, manga_url, cookies, imgdir, res, sleep_time=2, loading_wait_time=20,
 49 |             cut_image=None, file_name_prefix='', number_of_digits=3, start_page=None,
 50 |             end_page=None
 51 |     ):
 52 |         self.manga_url = manga_url
 53 |         self.cookies = get_cookie_dict(cookies)
 54 |         self.imgdir = imgdir
 55 |         self.res = res
 56 |         self.sleep_time = sleep_time
 57 |         self.loading_wait_time = loading_wait_time
 58 |         self.cut_image = cut_image
 59 |         self.file_name_model = '/'
 60 |         if len(file_name_prefix) != 0:
 61 |             self.file_name_model += file_name_prefix + '_'
 62 | 
 63 |         self.file_name_model += '%%0%dd.png' % number_of_digits
 64 |         self.start_page = start_page - 1 if start_page and start_page > 0 else 0
 65 |         self.end_page = end_page
 66 |         self.image_box = None
 67 | 
 68 |         self.init_function()
 69 | 
 70 |     def check_implementation(self, this_manga_url):
 71 |         is_implemented_website = False
 72 |         for temp_actions_class in WebsiteActions.__subclasses__():
 73 |             if temp_actions_class.check_url(this_manga_url):
 74 |                 is_implemented_website = True
 75 |                 self.actions_class = temp_actions_class()
 76 |                 logging.info('Find action class, use %s class.',
 77 |                              self.actions_class.get_class_name())
 78 |                 break
 79 | 
 80 |         if not is_implemented_website:
 81 |             logging.error('This website has not been added...')
 82 |             raise NotImplementedError
 83 | 
 84 |     def str_to_data_uri(self, str):
 85 |         return ("data:text/plain;charset=utf-8;base64,%s" %
 86 |                 base64.b64encode(bytes(str, 'utf-8')).decode('ascii'))
 87 | 
 88 |     def get_driver(self):
 89 |         option = uc.ChromeOptions()
 90 |         option.set_capability('unhandledPromptBehavior', 'accept')
 91 |         option.add_argument('--high-dpi-support=1')
 92 |         option.add_argument('--device-scale-factor=1')
 93 |         option.add_argument('--force-device-scale-factor=1')
 94 |         option.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36')
 95 |         option.add_argument("--app=%s" % self.str_to_data_uri('Manga_downloader'))
 96 |         option.add_argument('--headless')
 97 |         self.driver = uc.Chrome(options=option)
 98 |         self.driver.set_window_size(self.res[0], self.res[1])
 99 |         viewport_dimensions = self.driver.execute_script("return [window.innerWidth, window.innerHeight];")
100 |         logging.info('Viewport dimensions %s', viewport_dimensions)
101 |         self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
102 |             "source": """
103 |               Object.defineProperty(navigator, 'webdriver', {
104 |                 get: () => false
105 |               })
106 |               window.navigator.chrome = undefined;
107 |               Object.defineProperty(navigator, 'languages', {
108 |                 get: () => ['en-US', 'en'],
109 |               });
110 |               Object.defineProperty(navigator, 'plugins', {
111 |                 get: () => [1, 2, 3, 4, 5],
112 |               });
113 |               const originalQuery = window.navigator.permissions.query;
114 |               window.navigator.permissions.query = (parameters) => (
115 |                 parameters.name === 'notifications' ?
116 |                 Promise.resolve({ state: Notification.permission }) :
117 |                 originalQuery(parameters)
118 |               );
119 |             """
120 |         })
121 | 
122 |     def init_function(self):
123 |         if self.cut_image is not None and self.cut_image != 'dynamic':
124 |             self.left, self.upper, self.right, self.lower = self.cut_image
125 |         self.get_driver()
126 |         random.seed()
127 | 
128 |     def login(self):
129 |         logging.info('Login...')
130 |         driver = self.driver
131 |         driver.get(self.actions_class.login_url)
132 |         driver.delete_all_cookies()
133 |         add_cookies(driver, self.cookies)
134 |         logging.info('Login finished...')
135 | 
136 |     def prepare_download(self, this_image_dir, this_manga_url):
137 |         if not os.path.isdir(this_image_dir):
138 |             os.mkdir(this_image_dir)
139 |         logging.info('Loading Book page...')
140 |         driver = self.driver
141 |         driver.get(this_manga_url)
142 |         logging.info('Book page Loaded...')
143 |         logging.info('Preparing for downloading...')
144 |         time.sleep(self.loading_wait_time)
145 | 
146 |     def download_book(self, this_image_dir):
147 |         driver = self.driver
148 |         logging.info('Run before downloading...')
149 |         self.actions_class.before_download(driver)
150 |         logging.info('Start download...')
151 |         try:
152 |             page_count = self.actions_class.get_sum_page_count(driver)
153 |             logging.info('Has %d pages.', page_count)
154 |             end_page = page_count
155 |             if self.end_page and self.end_page <= page_count:
156 |                 end_page = self.end_page
157 |             self.actions_class.move_to_page(driver, self.start_page)
158 | 
159 |             time.sleep(self.sleep_time)
160 | 
161 |             for i in range(self.start_page, end_page):
162 |                 self.actions_class.wait_loading(driver)
163 |                 image_data = self.actions_class.get_imgdata(driver, i + 1)
164 |                 with open(this_image_dir + self.file_name_model % i, 'wb') as img_file:
165 |                     if self.cut_image is None:
166 |                         img_file.write(image_data)
167 |                     elif self.cut_image == "dynamic":
168 |                         org_img = pil_image.open(BytesIO(image_data))
169 |                         if self.image_box is None:
170 |                             org_img.load()
171 |                             invert_im = org_img.convert("RGB")
172 |                             invert_im = ImageOps.invert(invert_im)
173 |                             self.image_box = invert_im.getbbox()
174 |                         org_img.crop(self.image_box).save(img_file, format='PNG')
175 |                     else:
176 |                         org_img = pil_image.open(BytesIO(image_data))
177 |                         width, height = org_img.size
178 |                         org_img.crop(
179 |                             (self.left, self.upper, width - self.right, height - self.lower)).save(img_file, format='PNG')
180 | 
181 |                 logging.info('Page %d Downloaded', i + 1)
182 |                 if i == page_count - 1:
183 |                     logging.info('Finished.')
184 |                     self.image_box = None
185 |                     return
186 | 
187 |                 self.actions_class.move_to_page(driver, i + 1)
188 | 
189 |                 WebDriverWait(driver, 300).until_not(
190 |                     lambda x: self.actions_class.get_now_page(x) == i + 1)
191 | 
192 |                 time.sleep(self.sleep_time + random.random() * 2)
193 |         except Exception as err:
194 |             with open("error.html", "w", encoding="utf-8") as err_source:
195 |                 err_source.write(driver.page_source)
196 |             driver.save_screenshot('./error.png')
197 |             logging.error('Something wrong or download finished,Please check the error.png to see the web page.\r\nNormally, you should logout and login, then renew the cookies to solve this problem.')
198 |             logging.error(err)
199 |             self.image_box = None
200 |             return
201 | 
202 |     def download(self):
203 |         total_manga = len(self.manga_url)
204 |         total_dir = len(self.imgdir)
205 |         if total_manga != total_dir:
206 |             logging.error('Total manga urls given not equal to imgdir.')
207 |             return
208 | 
209 |         for i in range(total_manga):
210 |             t_manga_url = self.manga_url[i]
211 |             t_img_dir = self.imgdir[i]
212 |             self.check_implementation(t_manga_url)
213 |             if i == 0:
214 |                 self.login()
215 |             logging.info("Starting download manga %d, imgdir: %s",
216 |                          i + 1, t_img_dir)
217 |             self.prepare_download(t_img_dir, t_manga_url)
218 |             self.download_book(t_img_dir)
219 |             logging.info("Finished download manga %d, imgdir: %s",
220 |                          i + 1, t_img_dir)
221 |             time.sleep(2)
222 |         self.driver.close()
223 |         self.driver.quit()
224 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Main file
 3 | '''
 4 | 
 5 | from downloader import Downloader
 6 | 
 7 | settings = {
 8 |     # Manga urls, should be the same website
 9 |     'manga_url': [
10 |         'URL_1',
11 |         'URL_2'
12 |     ],
13 |     # Your cookies
14 |     'cookies': 'YOUR_COOKIES_HERE',
15 |     # Folder names to store the Manga, the same order with manga_url
16 |     'imgdir': [
17 |         'IMGDIR_FOR_URL_1',
18 |         'IMGDIR_FOR_URL_2'
19 |     ],
20 |     # Resolution, (Width, Height), For coma this doesn't matter.
21 |     'res': (784, 1200),
22 |     # Sleep time for each page (Second), normally no need to change.
23 |     'sleep_time': 1,
24 |     # Time wait for page loading (Second), if your network is good, you can reduce this parameter.
25 |     'loading_wait_time': 20,
26 |     # Cut image, (left, upper, right, lower) in pixel, None means do not cut the image. This often used to cut the edge.
27 |     # Like (0, 0, 0, 3) means cut 3 pixel from bottom of the image.
28 |     # or set dynamic to allow the scrypt to cut_images dynamictly (This work only correct if start_page is None)
29 |     # this removed whitespace on the corners, initialised by the Cover.
30 |     'cut_image': None,
31 |     # File name prefix, if you want your file name like 'klk_v1_001.jpg', write 'klk_v1' here.
32 |     'file_name_prefix': '',
33 |     # File name digits count, if you want your file name like '001.jpg', write 3 here.
34 |     'number_of_digits': 3,
35 |     # Start page, if you want to download from 3 page, set this to 3, None means from 0
36 |     'start_page': None,
37 |     # End page, if you want to download until 10 page, set this to 10, None means until finished
38 |     'end_page': None,
39 | }
40 | 
41 | if __name__ == '__main__':
42 |     downloader = Downloader(**settings)
43 |     downloader.download()
44 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | attrs==23.1.0
 2 | certifi==2023.7.22
 3 | charset-normalizer==3.2.0
 4 | exceptiongroup==1.1.2
 5 | h11==0.14.0
 6 | idna==3.4
 7 | outcome==1.2.0
 8 | Pillow==9.2.0
 9 | PySocks==1.7.1
10 | requests==2.31.0
11 | selenium==4.18.1
12 | sniffio==1.3.0
13 | sortedcontainers==2.4.0
14 | trio==0.22.2
15 | trio-websocket==0.10.3
16 | typing_extensions==4.10.0
17 | undetected-chromedriver==3.5.5
18 | urllib3==1.26.16
19 | websockets==11.0.3
20 | wsproto==1.2.0
21 | 


--------------------------------------------------------------------------------
/website_actions/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions
 3 | '''
 4 | import glob
 5 | from os.path import basename, dirname, isfile, join
 6 | 
 7 | modules = glob.glob(join(dirname(__file__), "*.py"))
 8 | __all__ = [basename(f)[:-3] for f in modules if isfile(f)
 9 |            and not f.endswith('__init__.py')]
10 | 


--------------------------------------------------------------------------------
/website_actions/abstract_website_actions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Abstract website actions, XU Zhengyi, 2020/05/05
 3 | '''
 4 | 
 5 | from abc import ABCMeta, abstractmethod
 6 | 
 7 | 
 8 | class WebsiteActions:
 9 |     '''
10 |     Base class for all website actions.
11 |     '''
12 |     __metaclass__ = ABCMeta
13 | 
14 |     def __init__(self):
15 |         self.class_name = self.__class__.__name__
16 | 
17 |     def get_class_name(self) -> str:
18 |         '''
19 |         Get class name.
20 |         '''
21 |         return self.class_name
22 | 
23 |     @staticmethod
24 |     @abstractmethod
25 |     def check_url(manga_url) -> bool:
26 |         '''
27 |         Give a manga url and check if the website is this class.
28 |         '''
29 |         return False
30 | 
31 |     @property
32 |     @abstractmethod
33 |     def login_url(self) -> str:
34 |         '''
35 |         Login url property.
36 |         '''
37 |         pass
38 | 
39 |     @abstractmethod
40 |     def get_sum_page_count(self, driver) -> int:
41 |         '''
42 |         Get sum page count on for the manga.
43 |         '''
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def move_to_page(self, driver, page) -> bool:
48 |         '''
49 |         Move to given page.
50 |         '''
51 |         pass
52 | 
53 |     @abstractmethod
54 |     def wait_loading(self, driver):
55 |         '''
56 |         Wait page loading.
57 |         '''
58 |         pass
59 | 
60 |     def before_download(self, driver):
61 |         '''
62 |         Run after page loaded, can be used to close some hint windows.
63 |         '''
64 |         pass
65 | 
66 |     @abstractmethod
67 |     def get_imgdata(self, driver, now_page):
68 |         '''
69 |         Get imgdata on the page. Return
70 |         '''
71 |         pass
72 | 
73 |     @abstractmethod
74 |     def get_now_page(self, driver) -> int:
75 |         '''
76 |         Get now page.
77 |         '''
78 |         pass
79 | 


--------------------------------------------------------------------------------
/website_actions/bookwalker_jp_actions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions for bookwalker.jp
 3 | '''
 4 | import base64
 5 | 
 6 | from selenium.webdriver.common.by import By
 7 | from selenium.webdriver.support.ui import WebDriverWait
 8 | 
 9 | try:
10 |     from abstract_website_actions import WebsiteActions
11 | except:
12 |     from website_actions.abstract_website_actions import WebsiteActions
13 | 
14 | 
15 | class BookwalkerJP(WebsiteActions):
16 |     '''
17 |     bookwalker.jp
18 |     '''
19 |     login_url = 'https://member.bookwalker.jp/app/03/login'
20 |     js = ''
21 | 
22 |     def check_is_loading(self, list_ele):
23 |         '''
24 |         Check is loading.
25 |         '''
26 |         is_loading = False
27 |         for i in list_ele:
28 |             if i.is_displayed() is True:
29 |                 is_loading = True
30 |                 break
31 |         return is_loading
32 | 
33 |     @staticmethod
34 |     def check_url(manga_url):
35 |         return manga_url.find('bookwalker.jp') != -1
36 | 
37 |     def get_sum_page_count(self, driver):
38 |         return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[1])
39 | 
40 |     def move_to_page(self, driver, page):
41 |         driver.execute_script(
42 |             f'NFBR.a6G.Initializer.{self.js}.menu.options.a6l.moveToPage(%d)' % page)
43 | 
44 |     def wait_loading(self, driver):
45 |         WebDriverWait(driver, 600).until_not(lambda x: self.check_is_loading(
46 |             x.find_elements(By.CSS_SELECTOR, ".loading")))
47 | 
48 |     def get_imgdata(self, driver, now_page):
49 |         canvas = driver.find_element(By.CSS_SELECTOR, ".currentScreen canvas")
50 |         img_base64 = driver.execute_script(
51 |             "return arguments[0].toDataURL('image/png', 1.0).substring(21);", canvas)
52 |         return base64.b64decode(img_base64)
53 | 
54 |     def get_now_page(self, driver):
55 |         return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[0])
56 |     
57 |     def before_download(self, driver):
58 |         for key in driver.execute_script('return Object.keys(NFBR.a6G.Initializer)'):
59 |             if 'menu' in driver.execute_script(f'return Object.keys(NFBR.a6G.Initializer.{key})'):
60 |                 self.js = key
61 |                 break
62 | 


--------------------------------------------------------------------------------
/website_actions/bookwalker_tw_actions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions for bookwalker.com.tw
 3 | '''
 4 | import base64
 5 | 
 6 | from selenium.webdriver.common.by import By
 7 | from selenium.webdriver.support.ui import WebDriverWait
 8 | 
 9 | try:
10 |     from abstract_website_actions import WebsiteActions
11 | except:
12 |     from website_actions.abstract_website_actions import WebsiteActions
13 | 
14 | 
15 | class BookwalkerTW(WebsiteActions):
16 |     '''
17 |     bookwalker.com.tw
18 |     '''
19 |     login_url = 'https://www.bookwalker.com.tw/user/login'
20 |     js = ''
21 | 
22 |     def check_is_loading(self, list_ele):
23 |         '''
24 |         Check is loading.
25 |         '''
26 |         is_loading = False
27 |         for i in list_ele:
28 |             if i.is_displayed() is True:
29 |                 is_loading = True
30 |                 break
31 |         return is_loading
32 | 
33 |     @staticmethod
34 |     def check_url(manga_url):
35 |         return manga_url.find('bookwalker.com.tw') != -1
36 | 
37 |     def get_sum_page_count(self, driver):
38 |         return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[1])
39 | 
40 |     def move_to_page(self, driver, page):
41 |         driver.execute_script(
42 |             f'NFBR.a6G.Initializer.{self.js}.menu.options.a6l.moveToPage(%d)' % page)
43 | 
44 |     def wait_loading(self, driver):
45 |         WebDriverWait(driver, 600).until_not(lambda x: self.check_is_loading(
46 |             x.find_elements(By.CSS_SELECTOR, ".loading")))
47 | 
48 |     def get_imgdata(self, driver, now_page):
49 |         canvas = driver.find_element(By.CSS_SELECTOR, ".currentScreen canvas")
50 |         img_base64 = driver.execute_script(
51 |             "return arguments[0].toDataURL('image/png', 1.0).substring(21);", canvas)
52 |         return base64.b64decode(img_base64)
53 | 
54 |     def get_now_page(self, driver):
55 |         return int(str(driver.find_element(By.ID, 'pageSliderCounter').get_attribute('textContent')).split('/')[0])
56 |     
57 |     def before_download(self, driver):
58 |         for key in driver.execute_script('return Object.keys(NFBR.a6G.Initializer)'):
59 |             if 'menu' in driver.execute_script(f'return Object.keys(NFBR.a6G.Initializer.{key})'):
60 |                 self.js = key
61 |                 break
62 | 
63 | 


--------------------------------------------------------------------------------
/website_actions/cmoa_jp_actions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions for www.cmoa.jp
 3 | '''
 4 | import base64
 5 | from io import BytesIO
 6 | 
 7 | import PIL.Image as pil_image
 8 | from selenium.webdriver.common.by import By
 9 | from selenium.webdriver.support.ui import WebDriverWait
10 | 
11 | try:
12 |     from abstract_website_actions import WebsiteActions
13 | except:
14 |     from website_actions.abstract_website_actions import WebsiteActions
15 | 
16 | 
17 | class CmoaJP(WebsiteActions):
18 |     '''
19 |     cmoa.jp
20 |     '''
21 |     login_url = 'https://www.cmoa.jp/'
22 | 
23 |     @staticmethod
24 |     def get_file_content_chrome(driver, uri):
25 |         result = driver.execute_async_script("""
26 |         var uri = arguments[0];
27 |         var callback = arguments[1];
28 |         var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
29 |         var xhr = new XMLHttpRequest();
30 |         xhr.responseType = 'arraybuffer';
31 |         xhr.onload = function(){ callback(toBase64(xhr.response)) };
32 |         xhr.onerror = function(){ callback(xhr.status) };
33 |         xhr.open('GET', uri);
34 |         xhr.send();
35 |         """, uri)
36 |         if type(result) == int:
37 |             raise Exception("Request failed with status %s" % result)
38 |         return base64.b64decode(result)
39 | 
40 |     @staticmethod
41 |     def check_url(manga_url):
42 |         return manga_url.find('cmoa.jp/bib/speedreader') != -1
43 | 
44 |     def get_sum_page_count(self, driver):
45 |         return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[1])
46 | 
47 |     def move_to_page(self, driver, page):
48 |         driver.execute_script(
49 |             'SpeedBinb.getInstance("content").moveTo(%d)' % page)
50 | 
51 |     def wait_loading(self, driver):
52 |         WebDriverWait(driver, 600).until_not(
53 |             lambda x: x.find_element(By.ID, "start_wait"))
54 | 
55 |     def get_imgdata(self, driver, now_page):
56 |         image_elements = driver.find_element(
57 |             By.ID, 'content-p%d' % now_page).find_elements(By.CSS_SELECTOR, 'img')
58 | 
59 |         imgs_arr = []
60 |         imgs_height = [0]
61 |         mmset = 4
62 |         for i in image_elements:
63 |             blob_url = i.get_attribute('src')
64 |             image_data = self.get_file_content_chrome(driver, blob_url)
65 |             part_img = pil_image.open(BytesIO(image_data))
66 |             imgs_arr.append(part_img)
67 |             width, height = part_img.size
68 |             imgs_height.append(height + imgs_height[-1] - mmset)
69 | 
70 |         last_img_height = imgs_height.pop() + mmset
71 | 
72 |         final_img = pil_image.new('RGB', (width, last_img_height))
73 | 
74 |         for i in range(len(imgs_arr)):
75 |             final_img.paste(imgs_arr[i], (0, imgs_height[i]))
76 | 
77 |         final_data = BytesIO()
78 |         final_img.save(final_data, format='PNG')
79 |         return final_data.getbuffer()
80 | 
81 |     def get_now_page(self, driver):
82 |         return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[0])
83 | 
84 |     def before_download(self, driver):
85 |         driver.execute_script('parent.closeTips()')
86 | 


--------------------------------------------------------------------------------
/website_actions/coma_jp_novel.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions for coma.jp novels
 3 | '''
 4 | import base64
 5 | import time
 6 | 
 7 | from selenium.webdriver.common.by import By
 8 | from selenium.webdriver.support.ui import WebDriverWait
 9 | 
10 | try:
11 |     from abstract_website_actions import WebsiteActions
12 | except:
13 |     from website_actions.abstract_website_actions import WebsiteActions
14 | 
15 | 
16 | class CmoaJPNovels(WebsiteActions):
17 |     '''
18 |     coma.jp novels
19 |     '''
20 |     login_url = 'https://www.cmoa.jp/'
21 | 
22 |     def next_page(self, driver):
23 |         driver.execute_script('moveNextPageSpeech()')
24 | 
25 |     def prev_page(self, driver):
26 |         driver.execute_script('movePrevPageSpeech()')
27 | 
28 |     def is_finished(self, driver):
29 |         return driver.execute_script('return ZHL0PP.Z060JL()')
30 | 
31 |     @staticmethod
32 |     def check_url(manga_url):
33 |         return manga_url.find('cmoa.jp/bib/reader') != -1
34 | 
35 |     def get_sum_page_count(self, driver):
36 |         self.now_page = 0
37 |         sum_page = 0
38 |         while not self.is_finished(driver):
39 |             sum_page += 1
40 |             self.next_page(driver)
41 |             time.sleep(0.5)
42 | 
43 |         for _ in range(sum_page):
44 |             self.prev_page(driver)
45 |             time.sleep(0.5)
46 | 
47 |         return sum_page
48 | 
49 |     def move_to_page(self, driver, page):
50 |         if page == self.now_page:
51 |             return
52 | 
53 |         f_to_use = self.next_page
54 |         if page < self.now_page:
55 |             f_to_use = self.prev_page
56 | 
57 |         for _ in range(abs(page - self.now_page)):
58 |             f_to_use(driver)
59 | 
60 |         self.now_page = page
61 | 
62 |     def wait_loading(self, driver):
63 |         WebDriverWait(driver, 600).until_not(
64 |             lambda x: x.find_element(By.ID, 'ctmble_menu_notification_overlay_span').is_displayed())
65 | 
66 |     def get_imgdata(self, driver, now_page):
67 |         return driver.get_screenshot_as_png()
68 | 
69 |     def get_now_page(self, driver):
70 |         return self.now_page + 1
71 | 
72 |     def before_download(self, driver):
73 |         WebDriverWait(driver, 600).until_not(
74 |             lambda x: x.find_element(By.ID, 'preMessage'))
75 |         driver.switch_to.frame(driver.find_element(By.ID, 'binb'))
76 |         WebDriverWait(driver, 600).until_not(
77 |             lambda x: x.find_element(By.ID, 'msg_outer_div').is_displayed())
78 | 


--------------------------------------------------------------------------------
/website_actions/takeshobo_co_jp_actions.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Website actions for takeshobo.co.jp
 3 | '''
 4 | import base64
 5 | from io import BytesIO
 6 | 
 7 | import PIL.Image as pil_image
 8 | from selenium.webdriver.common.by import By
 9 | from selenium.webdriver.support.ui import WebDriverWait
10 | 
11 | try:
12 |     from abstract_website_actions import WebsiteActions
13 | except:
14 |     from website_actions.abstract_website_actions import WebsiteActions
15 | 
16 | 
17 | class TakeshoboJP(WebsiteActions):
18 |     '''
19 |     takeshobo.co.jp
20 |     '''
21 |     login_url = 'https://gammaplus.takeshobo.co.jp/'
22 | 
23 |     @staticmethod
24 |     def get_file_content_chrome(driver, uri):
25 |         result = driver.execute_async_script("""
26 |         var uri = arguments[0];
27 |         var callback = arguments[1];
28 |         var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
29 |         var xhr = new XMLHttpRequest();
30 |         xhr.responseType = 'arraybuffer';
31 |         xhr.onload = function(){ callback(toBase64(xhr.response)) };
32 |         xhr.onerror = function(){ callback(xhr.status) };
33 |         xhr.open('GET', uri);
34 |         xhr.send();
35 |         """, uri)
36 |         if type(result) == int:
37 |             raise Exception("Request failed with status %s" % result)
38 |         return base64.b64decode(result)
39 | 
40 |     @staticmethod
41 |     def check_url(manga_url):
42 |         return manga_url.find('takeshobo.co.jp/manga/') != -1
43 | 
44 |     def get_sum_page_count(self, driver):
45 |         return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[1])
46 | 
47 |     def move_to_page(self, driver, page):
48 |         driver.execute_script(
49 |             'SpeedBinb.getInstance("content").moveTo(%d)' % page)
50 | 
51 |     def wait_loading(self, driver):
52 |         WebDriverWait(driver, 600).until_not(
53 |             lambda x: x.find_element(By.ID, "start_wait"))
54 | 
55 |     def get_imgdata(self, driver, now_page):
56 |         image_elements = driver.find_element(
57 |             By.ID, 'content-p%d' % now_page).find_elements(By.CSS_SELECTOR, 'img')
58 | 
59 |         imgs_arr = []
60 |         imgs_height = [0]
61 |         mmset = 4
62 |         for i in image_elements:
63 |             blob_url = i.get_attribute('src')
64 |             image_data = self.get_file_content_chrome(driver, blob_url)
65 |             part_img = pil_image.open(BytesIO(image_data))
66 |             imgs_arr.append(part_img)
67 |             width, height = part_img.size
68 |             imgs_height.append(height + imgs_height[-1] - mmset)
69 | 
70 |         last_img_height = imgs_height.pop() + mmset
71 | 
72 |         final_img = pil_image.new('RGB', (width, last_img_height))
73 | 
74 |         for i in range(len(imgs_arr)):
75 |             final_img.paste(imgs_arr[i], (0, imgs_height[i]))
76 | 
77 |         final_data = BytesIO()
78 |         final_img.save(final_data, format='PNG')
79 |         return final_data.getbuffer()
80 | 
81 |     def get_now_page(self, driver):
82 |         return int(str(driver.execute_script("return document.getElementById('menu_slidercaption').innerHTML")).split('/')[0])
83 | 
84 |     def before_download(self, driver):
85 |         driver.execute_script('parent.closeTips()')
86 | 


--------------------------------------------------------------------------------