├── .editorconfig
├── .env.example
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE
    │   ├── 1_general.md
    │   ├── 2_new-source.md
    │   ├── 3_source-issue.md
    │   └── 4_bug_report.md
    ├── contribs.json
    ├── dependabot.yml
    └── workflows
    │   ├── index-gen.yml
    │   ├── lint.yml
    │   └── release.yml
├── .gitignore
├── .python-version
├── Aptfile
├── LICENSE
├── Procfile
├── README.md
├── README.pip
├── app.json
├── compose.yml
├── etc
    └── wuxiaworld.com
    │   ├── help.md
    │   ├── wuxia.proto
    │   └── wuxia.proto.json
├── lncrawl
    ├── VERSION
    ├── __init__.py
    ├── __main__.py
    ├── assets
    │   ├── __init__.py
    │   ├── banner.py
    │   ├── chars.py
    │   ├── colors.py
    │   ├── epub
    │   │   ├── __init__.py
    │   │   ├── chapter.xhtml
    │   │   ├── cover.xhtml
    │   │   └── style.css
    │   ├── languages.py
    │   ├── user_agents.py
    │   ├── version.py
    │   └── web
    │   │   ├── __init__.py
    │   │   ├── script.js
    │   │   └── style.css
    ├── binders
    │   ├── __init__.py
    │   ├── calibre.py
    │   ├── epub.py
    │   ├── json.py
    │   ├── text.py
    │   └── web.py
    ├── bots
    │   ├── __init__.py
    │   ├── _sample.py
    │   ├── console
    │   │   ├── __init__.py
    │   │   ├── get_crawler.py
    │   │   ├── integration.py
    │   │   ├── login_info.py
    │   │   ├── open_folder_prompt.py
    │   │   ├── output_style.py
    │   │   ├── range_selection.py
    │   │   └── resume_download.py
    │   ├── discord
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── discord_bot.py
    │   │   └── message_handler.py
    │   ├── lookup
    │   │   ├── __init__.py
    │   │   ├── analyze.py
    │   │   ├── generator.py
    │   │   └── prompts.py
    │   ├── server
    │   │   ├── __init__.py
    │   │   ├── api
    │   │   │   ├── __init__.py
    │   │   │   ├── artifacts.py
    │   │   │   ├── auth.py
    │   │   │   ├── jobs.py
    │   │   │   ├── novels.py
    │   │   │   ├── runner.py
    │   │   │   └── users.py
    │   │   ├── app.py
    │   │   ├── config.py
    │   │   ├── context.py
    │   │   ├── db.py
    │   │   ├── exceptions.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── _base.py
    │   │   │   ├── job.py
    │   │   │   ├── pagination.py
    │   │   │   └── user.py
    │   │   ├── security.py
    │   │   ├── services
    │   │   │   ├── __init__.py
    │   │   │   ├── artifacts.py
    │   │   │   ├── jobs.py
    │   │   │   ├── novels.py
    │   │   │   ├── runner.py
    │   │   │   ├── scheduler.py
    │   │   │   ├── tier.py
    │   │   │   └── users.py
    │   │   ├── ui
    │   │   │   └── __index__.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── aborter.py
    │   │   │   ├── decorators.py
    │   │   │   ├── json_tools.py
    │   │   │   ├── text_tools.py
    │   │   │   └── time_utils.py
    │   └── telegram
    │   │   └── __init__.py
    ├── constants.py
    ├── core
    │   ├── __init__.py
    │   ├── app.py
    │   ├── arguments.py
    │   ├── browser.py
    │   ├── cleaner.py
    │   ├── crawler.py
    │   ├── display.py
    │   ├── download_chapters.py
    │   ├── download_images.py
    │   ├── exeptions.py
    │   ├── logconfig.py
    │   ├── metadata.py
    │   ├── novel_info.py
    │   ├── novel_search.py
    │   ├── proxy.py
    │   ├── scraper.py
    │   ├── soup.py
    │   ├── sources.py
    │   └── taskman.py
    ├── models
    │   ├── __init__.py
    │   ├── chapter.py
    │   ├── formats.py
    │   ├── meta.py
    │   ├── novel.py
    │   ├── search_result.py
    │   ├── session.py
    │   └── volume.py
    ├── templates
    │   ├── __init__.py
    │   ├── browser
    │   │   ├── __init__.py
    │   │   ├── basic.py
    │   │   ├── chapter_only.py
    │   │   ├── general.py
    │   │   ├── login.py
    │   │   ├── optional_volume.py
    │   │   ├── searchable.py
    │   │   └── with_volume.py
    │   ├── madara.py
    │   ├── mangastream.py
    │   ├── novelfull.py
    │   ├── novelmtl.py
    │   ├── novelpub.py
    │   └── soup
    │   │   ├── __init__.py
    │   │   ├── chapter_only.py
    │   │   ├── general.py
    │   │   ├── optional_volume.py
    │   │   ├── searchable.py
    │   │   └── with_volume.py
    ├── utils
    │   ├── __init__.py
    │   ├── common.py
    │   ├── imgen.py
    │   ├── kindlegen_download.py
    │   ├── material_colors.py
    │   ├── pbincli.py
    │   ├── platforms.py
    │   ├── ratelimit.py
    │   ├── sockets.py
    │   ├── ssl_no_verify.py
    │   ├── tilings.py
    │   └── uploader
    │   │   ├── __init__.py
    │   │   ├── anonfiles.py
    │   │   ├── gofile.py
    │   │   └── google_drive.py
    └── webdriver
    │   ├── __init__.py
    │   ├── elements.py
    │   ├── job_queue.py
    │   ├── local.py
    │   ├── remote.py
    │   └── scripts.py
├── requirements-app.txt
├── requirements-bot.txt
├── requirements-dev.txt
├── requirements.txt
├── res
    ├── lncrawl-icon.png
    ├── lncrawl-web.png
    └── lncrawl.ico
├── scripts
    ├── Dockerfile
    ├── bitanon.sh
    ├── build.bat
    ├── build.sh
    ├── check_sources.py
    ├── entry_point.sh
    ├── index_gen.py
    ├── lint.bat
    ├── lint.sh
    ├── lncrawl.service
    ├── publish.bat
    ├── publish.sh
    ├── push_tag.bat
    ├── push_tag.sh
    ├── push_tag_force.bat
    ├── push_tag_force.sh
    ├── rebrandly.sh
    ├── start.sh
    └── stop.sh
├── setup.cfg
├── setup.py
├── setup_pyi.py
└── sources
    ├── __init__.py
    ├── _examples
        ├── _00_basic.py
        ├── _01_general_soup.py
        ├── _02_searchable_soup.py
        ├── _03_chapter_only_soup.py
        ├── _04_searchable_chapter_only_soup.py
        ├── _05_with_volume_soup.py
        ├── _06_searchable_with_volume_soup.py
        ├── _07_optional_volume_soup.py
        ├── _08_searchable_optional_volume_soup.py
        ├── _09_basic_browser.py
        ├── _10_general_browser.py
        ├── _11_searchable_browser.py
        ├── _12_chapter_only_browser.py
        ├── _13_searchable_chapter_only_browser.py
        ├── _14_with_volume_browser.py
        ├── _15_searchable_with_volume_browser.py
        ├── _16_optional_volume_browser.py
        └── _17_searchable_optional_volume_browser.py
    ├── _index.json
    ├── _rejected.json
    ├── ar
        ├── arnovel.py
        ├── kolnovel.py
        └── rewayatclub.py
    ├── en
        ├── 1
        │   └── 1stkissnovel.py
        ├── 4
        │   └── 4scanlation.py
        ├── 8
        │   ├── 888novel.py
        │   └── 88tang.py
        ├── a
        │   ├── allnovel.py
        │   ├── allnovelfull.py
        │   ├── americanfaux.py
        │   ├── amnesiactl.py
        │   ├── ancientheartloss.py
        │   ├── anythingnovel.py
        │   ├── aquamanga.py
        │   ├── arangscans.py
        │   ├── arcanetranslations.py
        │   ├── asadatrans.py
        │   ├── asianhobbyist.py
        │   ├── asianovel.py
        │   ├── asianovel_net.py
        │   └── automtl.py
        ├── b
        │   ├── babelnovel.py
        │   ├── bakapervert.py
        │   ├── bato.py
        │   ├── beautymanga.py
        │   ├── bestlightnovel.py
        │   ├── blackboxtl.py
        │   ├── bonnovel.py
        │   ├── booknet.py
        │   ├── boxnovel.py
        │   ├── boxnovelcom.py
        │   ├── boxnovelonline.py
        │   ├── boxnovelorg.py
        │   └── bronovel.py
        ├── c
        │   ├── centinni.py
        │   ├── chereads.py
        │   ├── chickengege.py
        │   ├── chrysanthemumgarden.py
        │   ├── ckandawrites.online.py
        │   ├── clicknovel.py
        │   ├── coffeemanga.py
        │   ├── creativenovels.py
        │   ├── crescentmoon.py
        │   └── fu_kemao.py
        ├── d
        │   ├── daonovel.py
        │   ├── daotranslate.py
        │   ├── demontrans.py
        │   ├── divinedaolibrary.py
        │   ├── dmtrans.py
        │   ├── dobelyuwai.py
        │   ├── dragon_tea.py
        │   ├── dsrealmtrans.py
        │   └── dummynovels.py
        ├── e
        │   ├── ebotnovel.py
        │   ├── engnovel.py
        │   └── exiledrebels.py
        ├── f
        │   ├── fanfiction.py
        │   ├── fanmtl.py
        │   ├── fanstrans.py
        │   ├── fantasyworldonline.py
        │   ├── faqwiki.py
        │   ├── fenrirealm.py
        │   ├── fenrirtranslations.py
        │   ├── fictionpress.py
        │   ├── flyinglines.py
        │   ├── foxteller.py
        │   ├── freefullnovel.py
        │   ├── freelightnovel.py
        │   ├── freemanga.py
        │   ├── freewebnovel.py
        │   ├── fringecapybara.py
        │   ├── fsapk.py
        │   ├── fujitrans.py
        │   ├── fullnovellive.py
        │   └── fuyuneko.py
        ├── g
        │   ├── genesistls.py
        │   └── gravitytales.py
        ├── h
        │   ├── hanyunovels.py
        │   ├── harimanga.py
        │   ├── hostednovel.py
        │   ├── hotnovelfull.py
        │   └── hui3r.py
        ├── i
        │   ├── imperfectcomic.py
        │   ├── inadequatetrans.py
        │   ├── infinitetrans.py
        │   ├── inkitt.py
        │   ├── instadoses.py
        │   ├── isekaiscan.py
        │   ├── isekaiscaneu.py
        │   ├── isotls.py
        │   └── snowycodex.py
        ├── j
        │   ├── jpmtl.py
        │   └── justatrans.py
        ├── k
        │   ├── katreadingcafe.py
        │   ├── kingmanga.py
        │   ├── kissmanga.py
        │   ├── kissnovel.py
        │   ├── kitenovel.py
        │   ├── kolnovelnewsite.py
        │   └── koreanmtl.py
        ├── l
        │   ├── ladybirdtrans.py
        │   ├── latestnovel.py
        │   ├── lazygirltranslations.py
        │   ├── leafstudio.py
        │   ├── lemontree.py
        │   ├── librarynovel.py
        │   ├── lightnovelbastion.py
        │   ├── lightnovelheaven.py
        │   ├── lightnovelkiss.py
        │   ├── lightnovelme.py
        │   ├── lightnovelmeta.py
        │   ├── lightnovelonline.py
        │   ├── lightnovelpub.py
        │   ├── lightnovelreader.py
        │   ├── lightnovelshub.py
        │   ├── lightnovelsonl.py
        │   ├── lightnoveltv.py
        │   ├── lightnovelworld.com.py
        │   ├── lightnovelworld.py
        │   ├── lightnovetrans.py
        │   ├── listnovel.py
        │   ├── literotica.py
        │   ├── lnmtl.py
        │   ├── ltnovel.py
        │   ├── luminarynovels.py
        │   └── lunarletters.py
        ├── m
        │   ├── machinetransorg.py
        │   ├── manga-tx.py
        │   ├── mangabuddy.py
        │   ├── mangachilllove.py
        │   ├── mangaread.py
        │   ├── mangarockteam.py
        │   ├── mangarosie.py
        │   ├── mangastic.py
        │   ├── mangatoon.py
        │   ├── mangatx.py
        │   ├── mangaweebs.py
        │   ├── manhuaplus.py
        │   ├── manhwachill.py
        │   ├── meownovel.py
        │   ├── miraslation.py
        │   ├── mixednovel.py
        │   ├── mltnovels.py
        │   ├── mostnovel.py
        │   ├── mtlednovels.py
        │   ├── mtlnation.py
        │   ├── mtlreader.py
        │   ├── myboxnovel.py
        │   ├── mydramanovel.py
        │   ├── myoniyonitrans.py
        │   └── mysticalmerries.py
        ├── n
        │   ├── neosekaitranslations.py
        │   ├── newnovelorg.py
        │   ├── newsnovel.py
        │   ├── noblemtl.py
        │   ├── noobchan.py
        │   ├── novel-bin.net.py
        │   ├── novel-bin.py
        │   ├── novel27.py
        │   ├── novel35.py
        │   ├── novelall.py
        │   ├── novelbin.net.py
        │   ├── novelbin.py
        │   ├── novelcake.py
        │   ├── novelcool.py
        │   ├── novelcrush.py
        │   ├── novelfull.py
        │   ├── novelfullme.py
        │   ├── novelfullplus.py
        │   ├── novelgate.py
        │   ├── novelhall.py
        │   ├── novelhard.py
        │   ├── novelhi.py
        │   ├── novelhulk.py
        │   ├── novelhunters.py
        │   ├── novelight.py
        │   ├── novelmao.py
        │   ├── novelmic.py
        │   ├── novelmt.py
        │   ├── novelmtl.py
        │   ├── novelmultiverse.py
        │   ├── novelnext.py
        │   ├── novelnextz.py
        │   ├── novelonlinefree.py
        │   ├── novelonlinefull.py
        │   ├── novelpassion.py
        │   ├── novelplanet.py
        │   ├── novelpub.py
        │   ├── novelrare.py
        │   ├── novelraw.py
        │   ├── novelsala.py
        │   ├── novelsemperor.py
        │   ├── novelsite.py
        │   ├── novelsonline.py
        │   ├── novelspl.py
        │   ├── novelspread.py
        │   ├── novelsrock.py
        │   ├── noveltranslate.py
        │   ├── noveluniverse.py
        │   ├── novelupdatescc.py
        │   ├── novelv.py
        │   ├── novelww.py
        │   ├── novelzec.py
        │   ├── novlove.py
        │   └── nyxtranslation.py
        ├── o
        │   ├── omgnovels.py
        │   ├── oppatrans.py
        │   ├── oppatranslations.py
        │   ├── ornovel.py
        │   └── overabook.py
        ├── p
        │   ├── pandamanga.py
        │   ├── pandanovelco.py
        │   ├── pandanovelorg.py
        │   ├── peryinfo.py
        │   ├── pianmanga.py
        │   └── puretl.py
        ├── q
        │   └── qidianunderground.py
        ├── r
        │   ├── raeitranslations.py
        │   ├── randomnovel.py
        │   ├── ranobes.py
        │   ├── readlightnovelcc.py
        │   ├── readlightnovelorg.py
        │   ├── readlightnovelsnet.py
        │   ├── readmanganato.py
        │   ├── readmtl.py
        │   ├── readnovelfull.py
        │   ├── readnovelz.py
        │   ├── readonlinenovels.py
        │   ├── readwebnovels.py
        │   ├── readwn.py
        │   ├── reaperscans.py
        │   ├── rebirthonline.py
        │   ├── reincarnationpalace.py
        │   ├── relibrary.py
        │   ├── royalroad.py
        │   └── rpgnovels.py
        ├── s
        │   ├── scribblehub.py
        │   ├── secondlifetranslations.py
        │   ├── shalvation.py
        │   ├── shanghaifantasy.py
        │   ├── shinsori.py
        │   ├── skydemonorder.py
        │   ├── skynovel.py
        │   ├── sleepytrans.py
        │   ├── smnovels.py
        │   ├── sonicmtl.py
        │   ├── steambun.py
        │   ├── supernovel.py
        │   └── systemtranslation.py
        ├── t
        │   ├── tamagotl.py
        │   ├── tapread.py
        │   ├── teanovel.py
        │   ├── tigertranslations.py
        │   ├── tipnovel.py
        │   ├── tomotrans.py
        │   ├── toonily.py
        │   ├── topmanhua.py
        │   ├── totallytranslations.py
        │   ├── translateindo.py
        │   ├── travistranslations.py
        │   └── tunovelaligera.py
        ├── u
        │   └── usefulnovel.py
        ├── v
        │   ├── veratales.py
        │   ├── viewnovel.py
        │   ├── vipnovel.py
        │   ├── virlyce.py
        │   ├── vistrans.py
        │   └── volarenovels.py
        ├── w
        │   ├── wanderinginn.py
        │   ├── webnovel.py
        │   ├── webnovelonlinecom.py
        │   ├── webnovelonlinenet.py
        │   ├── webnovelpub.py
        │   ├── webtoon.py
        │   ├── whatsawhizzerwebnovels.py
        │   ├── whitemoonlightnovels.py
        │   ├── wnmtl.py
        │   ├── wondernovels.py
        │   ├── woopread.py
        │   ├── wordexcerpt.py
        │   ├── wordrain.py
        │   ├── writerupdates.py
        │   ├── wspadancewichita.py
        │   ├── wujizun.py
        │   ├── wuxiablog.py
        │   ├── wuxiabox.py
        │   ├── wuxiacity.py
        │   ├── wuxiaclick.py
        │   ├── wuxiaco.py
        │   ├── wuxiacom.py
        │   ├── wuxiahub.py
        │   ├── wuxialeague.py
        │   ├── wuxiamtl.py
        │   ├── wuxianovelhub.py
        │   ├── wuxiaonline.py
        │   ├── wuxiapub.py
        │   ├── wuxiar.py
        │   ├── wuxiasite.py
        │   ├── wuxiaspot.py
        │   ├── wuxiau.py
        │   ├── wuxiav.py
        │   ├── wuxiaworldio.py
        │   ├── wuxiaworldlive.py
        │   ├── wuxiaworldsite.py
        │   ├── wuxiax.py
        │   └── wuxiaz.py
        ├── x
        │   └── xiainovel.py
        └── z
        │   ├── zenithnovels.py
        │   ├── zetrotranslation.py
        │   ├── zinmanga.py
        │   └── zinnovel.py
    ├── es
        ├── domentranslations.py
        └── novelasligeras.py
    ├── fr
        ├── animesama.py
        ├── chireads.py
        ├── lightnovelfr.py
        ├── lnmtlfr.py
        ├── noveldeglace.py
        └── xiaowaz.py
    ├── id
        ├── darktrans.py
        ├── grensia_blogspot.py
        ├── idqidian.py
        ├── indomtl.py
        ├── indowebnovel.py
        ├── meionovel.py
        ├── morenovel.py
        ├── novelgo.py
        ├── novelku.py
        ├── novelringan.py
        ├── noveltoon.py
        ├── wbnovel.py
        ├── webnovelindonesia.py
        ├── webnovelover.py
        ├── worldnovelonline.py
        ├── yukinovel.py
        └── zhiend.py
    ├── jp
        └── s
        │   └── syosetu.py
    ├── multi
        ├── foxaholic.py
        ├── mtlnovel.py
        ├── novelupdates.py
        ├── quotev.py
        ├── wattpad.py
        ├── webfic.py
        └── wtrlab.py
    ├── pt
        ├── blnovels.py
        ├── centralnovel.py
        └── ceunovel.py
    ├── ru
        ├── bestmanga.py
        ├── ifreedom.py
        ├── jaomix.py
        ├── litnet.py
        ├── ranobelib.py
        ├── ranobenovel.py
        ├── renovels.py
        └── rulate.py
    ├── tr
        └── fenrirscan.py
    ├── vi
        ├── lnhakone.py
        └── truenfull.py
    └── zh
        ├── 27k.py
        ├── 69shuba.cx.py
        ├── 69shuba.py
        ├── daocaorenshuwu.py
        ├── ddxsss.py
        ├── ixdzs.py
        ├── novel543.py
        ├── piaotian.py
        ├── powanjuan.py
        ├── shw5.py
        ├── soxs.py
        ├── trxs.py
        ├── uukanshu.py
        ├── uukanshu_sj.py
        ├── xbanxia.py
        └── xnunu.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig is awesome: https://editorconfig.org/
 2 | 
 3 | # top-most EditorConfig file
 4 | root = true
 5 | 
 6 | # Unix-style newlines with a newline ending every file
 7 | [*]
 8 | end_of_line = lf
 9 | insert_final_newline = true
10 | 
11 | # Set default charset
12 | [*.{js,py}]
13 | charset = utf-8
14 | 
15 | [*.py]
16 | indent_style = space
17 | indent_size = 4
18 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Values should be the file names (without .py) inside `lncrawl/interfaces` folder.
 2 | # By default the console bot will be choosen if this is left empty or invalid..
 3 | BOT=console
 4 | 
 5 | # Available levels: NOTSET, WARN, INFO, DEBUG, FATAL, ERROR
 6 | # If this variable is unset or NONE, logging will not be configured.
 7 | LOG_LEVEL=INFO
 8 | 
 9 | # Configs for bots
10 | TELEGRAM_TOKEN=
11 | DISCORD_TOKEN=
12 | DISCORD_DISABLE_SEARCH=false
13 | DISCORD_SIGNAL_CHAR=!
14 | 
15 | # Cloud drives for upload | Options: [ANONFILES, GOFILE, GOOGLE_DRIVE] | Default: ANONFILES
16 | CLOUD_DRIVE=ANONFILES
17 | 
18 | # Google Drive Config
19 | GOOGLE_DRIVE_CREDENTIAL_FILE=mycreds.txt
20 | GOOGLE_DRIVE_FOLDER_ID=118iN1jzavVV-9flrLPZo7DOi0cuxrQ5F
21 | 
22 | # Password for VNC server
23 | VNC_PASSWORD=secret
24 | 
25 | # Server Config
26 | SERVER_SECRET=
27 | SERVER_ADMIN_EMAIL=
28 | SERVER_ADMIN_PASSWORD=
29 | RUNNER_INTERVAL_IN_SECOND=10
30 | DATABASE_URL=sqlite:///.server/sqlite.db
31 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto eol=lf
2 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: ['https://paypal.me/sd1pu']
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/1_general.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: General
 3 | about: Create a general issue
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- You can start a Discussion instead of creating an issue first. -->
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/2_new-source.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Request New Source
 3 | about: Want to request a new source that is not yet listed in the README.md?
 4 | title: Replace this with an url
 5 | labels: source
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- Have you checked README.md whether this link is already supported? -->
11 | <!-- If it is in the README.md but does not work, create a [Source Issue] request instead. -->
12 | 
13 | <!-- Have you searched existing issues to check if this link is already requested? -->
14 | <!-- You can upvote the existing issue to make it more noticeable. -->
15 | 
16 | <!-- Please provide following information. Skip if you don't know. -->
17 | 
18 | - Language:
19 | - Supports Searching: Yes/No
20 | - Contains Machine Translations: Yes/No
21 | - Contains Manga/Manhua/Manhwa: Yes/No
22 | - Has CloudFlare Protection: Yes/No
23 | 
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/3_source-issue.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Source Not Working
 3 | about: Having trouble with a specific source? (e.g. failing to crawl or missing chapters or content)
 4 | title: Fix this source
 5 | labels: source-issue
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | <!-- Have you searched the existing issues? -->
11 | 
12 | ## Let us know
13 | 
14 | <!-- Please fill up the following information first -->
15 | 
16 | **Novel URL**:     <your novel url or query> <!-- What is your novel URL? -->
17 | **App Location**:    PIP | EXE | Discord | Telegram <!-- Where did you find this issue? -->
18 | <!-- Check if you are using the latest version, your issue might have been fixed in an update. -->
19 | **App Version**:   x.y.z  <!--Try the latest version  or the name of the bot -->
20 | 
21 | ## Describe this issue
22 | 
23 | <!-- What is this issue about? -->
24 | 
25 | <!-- Please add some error logs or screenshots here. -->
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/4_bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug
 3 | about: Having a general issue with the app?
 4 | title: Fix this bug
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Describe the bug
11 | 
12 | <!-- If this issue is related to specific source / novel url, please create a "Source Not Working" instead  -->
13 | 
14 | <!-- What is this issue about? -->
15 | 
16 | <!-- Please add some error logs or screenshots here. -->
17 | 
18 | ## Let us know
19 | 
20 | <!-- Fill up the following information please. -->
21 | 
22 | **App source**:    PIP | EXE | Discord | Telegram <!-- Where did you find this bug? -->
23 | **App version**:   x.y.z  <!-- or the name of the bot -->
24 | **Your OS**:       Linux | Windows | Mac <!-- skip for bots -->
25 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 |     target-branch: "dev"
13 |     labels:
14 |       - "pip dependencies"
15 | 


--------------------------------------------------------------------------------
/.github/workflows/index-gen.yml:
--------------------------------------------------------------------------------
 1 | name: Generate source index
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - dev
 7 |     paths:
 8 |       - "sources/**"
 9 |       - "!sources/_index.json"
10 |       - "scripts/index_gen.py"
11 |       - "scripts/check_sources.py"
12 |       - "lncrawl/VERSION"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.ref }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   index_gen:
20 |     if: github.repository == 'dipu-bd/lightnovel-crawler'
21 |     runs-on: ubuntu-latest
22 |     steps:
23 |       - uses: actions/checkout@v4
24 |         with:
25 |           fetch-depth: 0
26 | 
27 |       - name: Set up Python 3.11
28 |         uses: actions/setup-python@v5
29 |         with:
30 |           python-version: "3.11"
31 | 
32 |       - name: Install dependencies
33 |         run: pip install -r requirements-app.txt
34 | 
35 |       - name: Generate index
36 |         run: python ./scripts/index_gen.py
37 | 
38 |       - name: Commit changes
39 |         uses: stefanzweifel/git-auto-commit-action@v5
40 |         with:
41 |           commit_message: Generate source index
42 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |       - dev
 8 |   pull_request:
 9 |     branches:
10 |       - master
11 |       - dev
12 | 
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   lint_test:
19 |     if: github.repository == 'dipu-bd/lightnovel-crawler'
20 |     name: Lint & Test
21 |     runs-on: ubuntu-latest
22 |     strategy:
23 |       matrix:
24 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
25 | 
26 |     steps:
27 |       - uses: actions/checkout@v4
28 |       - name: Set up Python ${{ matrix.python-version }}
29 |         uses: actions/setup-python@v5
30 |         with:
31 |           python-version: ${{ matrix.python-version }}
32 | 
33 |       - name: Install dependencies
34 |         run: |
35 |           python -m pip install --upgrade pip
36 |           pip install -r requirements.txt
37 | 
38 |       - name: Lint with flake8
39 |         run: flake8 -v --count --show-source --statistics
40 | 
41 |       - name: Build wheel
42 |         run: python setup.py clean bdist_wheel
43 | 
44 |       - name: Install and test the wheel package
45 |         run: |
46 |           pip install dist/lightnovel_crawler*.whl
47 |           lncrawl --list-sources
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | _novel
 3 | _book
 4 | geckodriver.log
 5 | build
 6 | dist
 7 | *.egg-info
 8 | Test Novel
 9 | .vscode
10 | bundle
11 | env
12 | .env
13 | Lightnovels/
14 | windows/
15 | .pyi/
16 | .telegram_bot_output/
17 | .discord_bot_output/
18 | mycreds.txt
19 | /venv*/
20 | *.log
21 | save_pid.txt
22 | /.tox
23 | /logs
24 | client_secrets.json
25 | /lightnovel-crawler-*
26 | __pycache__/
27 | .idea/
28 | /test.py
29 | .venv/
30 | .DS_Store
31 | /sqlite.db
32 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10
2 | 


--------------------------------------------------------------------------------
/Aptfile:
--------------------------------------------------------------------------------
1 | libnss3 libgl1-mesa-glx libxcomposite1 libxrandr2 libxi6
2 | 


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | bot: python lncrawl --shard-id 0 --shard-count 1
2 | 


--------------------------------------------------------------------------------
/README.pip:
--------------------------------------------------------------------------------
1 | Lightnovel Crawler
2 | -----------------------
3 | 
4 | Download lightnovels from various online sources and generate output in different formats, e.g. epub, mobi, json, html, text, docx and pdf.
5 | 
6 | Visit https://github.com/dipu-bd/lightnovel-crawler for more details.
7 | 


--------------------------------------------------------------------------------
/compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   chrome:
 3 |     platform: linux/amd64
 4 |     image: selenium/standalone-chrome:latest
 5 |     shm_size: 6gb
 6 |     restart: unless-stopped
 7 |     ports:
 8 |       - "7900:7900"
 9 |       - "4444:4444"
10 |     environment:
11 |       SE_VNC_VIEW_ONLY: "1"
12 |       SE_EVENT_BUS_PUBLISH_PORT: "4442"
13 |       SE_EVENT_BUS_SUBSCRIBE_PORT: "4443"
14 |       SE_NODE_MAX_SESSIONS: "4"
15 |       SE_NODE_OVERRIDE_MAX_SESSIONS: "true"
16 |       SE_NO_VNC_PORT: "7900"
17 |       SE_SCREEN_WIDTH: "1920"
18 |       SE_SCREEN_HEIGHT: "1080"
19 |       SE_NODE_GRID_URL: "false"
20 | 
21 |   # discord-bot:
22 |   #   platform: linux/amd64
23 |   #   image: lncrawl
24 |   #   build:
25 |   #     context: .
26 |   #     dockerfile: ./scripts/Dockerfile
27 |   #   restart: unless-stopped
28 |   #   environment:
29 |   #     CLOUD_DRIVE: "GOFILE"
30 |   #     DISCORD_TOKEN: "${DISCORD_TOKEN}"
31 |   #     DISCORD_SIGNAL_CHAR: "${DISCORD_SIGNAL_CHAR}"
32 |   #     DISCORD_DISABLE_SEARCH: "${DISCORD_DISABLE_SEARCH}"
33 |   #   command: python -m lncrawl --suppress --bot discord --shard-id 0 --shard-count 1 --selenium-grid "http://chrome:4444"
34 |   #   depends_on:
35 |   #     - chrome
36 | 
37 |   server:
38 |     platform: linux/amd64
39 |     build:
40 |       context: .
41 |       dockerfile: ./scripts/Dockerfile
42 |     restart: unless-stopped
43 |     ports:
44 |       - "23457:8000"
45 |     command: python -m lncrawl -ll --suppress --bot server --port 8000 --selenium-grid "http://chrome:4444"
46 |     environment:
47 |       - PYTHONUNBUFFERED=1
48 |       - OUTPUT_PATH=/home/lncrawl/output
49 |       - DATABASE_URL=sqlite:////home/lncrawl/output/sqlite.db
50 |     volumes:
51 |       - output_path:/home/lncrawl/output
52 |     depends_on:
53 |       - chrome
54 | 
55 | volumes:
56 |   output_path:
57 | 


--------------------------------------------------------------------------------
/etc/wuxiaworld.com/help.md:
--------------------------------------------------------------------------------
 1 | Install the packages required:
 2 | 
 3 | ```
 4 | pip install grpcio-tools
 5 | ```
 6 | 
 7 | To generate the wuxia.proto.json from wuxia.proto, run this command:
 8 | 
 9 | ```
10 | pyease-grpc -I . wuxia.proto > wuxia.proto.json
11 | ```
12 | 


--------------------------------------------------------------------------------
/etc/wuxiaworld.com/wuxia.proto.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/etc/wuxiaworld.com/wuxia.proto.json


--------------------------------------------------------------------------------
/lncrawl/VERSION:
--------------------------------------------------------------------------------
1 | 3.9.4
2 | 


--------------------------------------------------------------------------------
/lncrawl/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from __future__ import annotations
 3 | 
 4 | import multiprocessing
 5 | 
 6 | 
 7 | def main():
 8 |     multiprocessing.freeze_support()
 9 |     multiprocessing.set_start_method("spawn")
10 | 
11 |     try:
12 |         from dotenv import load_dotenv
13 |         load_dotenv()
14 |     except Exception:
15 |         pass
16 | 
17 |     from .core import start_app
18 |     start_app()
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/lncrawl/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | 
 5 | if not __package__ and not hasattr(sys, "frozen"):
 6 |     import os.path
 7 |     path = os.path.realpath(os.path.abspath(__file__))
 8 |     sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     from lncrawl import main
13 |     main()
14 | 


--------------------------------------------------------------------------------
/lncrawl/assets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/assets/__init__.py


--------------------------------------------------------------------------------
/lncrawl/assets/chars.py:
--------------------------------------------------------------------------------
 1 | from ..utils.common import static_cached_property
 2 | from ..utils.platforms import Platform
 3 | 
 4 | 
 5 | class Chars:
 6 |     @static_cached_property
 7 |     @staticmethod
 8 |     def __supported():
 9 |         return Platform.linux or Platform.mac
10 | 
11 |     # --------------------------------------- #
12 | 
13 |     @static_cached_property
14 |     @staticmethod
15 |     def EOL() -> str:
16 |         return "\r\n" if Platform.windows else "\n"
17 | 
18 |     @static_cached_property
19 |     @staticmethod
20 |     def EMPTY():
21 |         return " "
22 | 
23 |     @static_cached_property
24 |     @staticmethod
25 |     def BOOK():
26 |         return "📒" if Chars.__supported else "[#]"
27 | 
28 |     @static_cached_property
29 |     @staticmethod
30 |     def CLOVER():
31 |         return "🍀" if Chars.__supported else "*"
32 | 
33 |     @static_cached_property
34 |     @staticmethod
35 |     def LINK():
36 |         return "🔗" if Chars.__supported else "-"
37 | 
38 |     @static_cached_property
39 |     @staticmethod
40 |     def HANDS():
41 |         return "🙏" if Chars.__supported else "-"
42 | 
43 |     @static_cached_property
44 |     @staticmethod
45 |     def ERROR():
46 |         return "❗" if Chars.__supported else "!"
47 | 
48 |     @static_cached_property
49 |     @staticmethod
50 |     def PARTY():
51 |         return "📦" if Chars.__supported else "$"
52 | 
53 |     @static_cached_property
54 |     @staticmethod
55 |     def SOUND():
56 |         return "🔊" if Chars.__supported else "<<"
57 | 
58 |     @static_cached_property
59 |     @staticmethod
60 |     def SPARKLE():
61 |         return "✨" if Chars.__supported else "*"
62 | 
63 |     @static_cached_property
64 |     @staticmethod
65 |     def INFO():
66 |         return "💁" if Chars.__supported else ">"
67 | 
68 |     @static_cached_property
69 |     @staticmethod
70 |     def RIGHT_ARROW():
71 |         return "➡" if Chars.__supported else "->"
72 | 


--------------------------------------------------------------------------------
/lncrawl/assets/epub/__init__.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | ROOT = Path(__file__).parent
 4 | 
 5 | 
 6 | def epub_style_css() -> bytes:
 7 |     return (ROOT / "style.css").read_bytes()
 8 | 
 9 | 
10 | def epub_cover_xhtml() -> bytes:
11 |     return (ROOT / "cover.xhtml").read_bytes()
12 | 
13 | 
14 | def epub_chapter_xhtml() -> bytes:
15 |     return (ROOT / "chapter.xhtml").read_bytes()
16 | 


--------------------------------------------------------------------------------
/lncrawl/assets/epub/chapter.xhtml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE html>
 3 | <html
 4 |   xmlns="http://www.w3.org/1999/xhtml"
 5 |   xmlns:epub="http://www.idpf.org/2007/ops"
 6 |   epub:prefix="z3998: http://www.daisy.org/z3998/2012/vocab/structure/#"
 7 |   lang="en"
 8 |   xml:lang="en"
 9 | ></html>
10 | 


--------------------------------------------------------------------------------
/lncrawl/assets/epub/cover.xhtml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE html>
 3 | <html
 4 |   xmlns="http://www.w3.org/1999/xhtml"
 5 |   xmlns:epub="http://www.idpf.org/2007/ops"
 6 |   lang="en"
 7 |   xml:lang="en"
 8 | >
 9 |   <head>
10 |     <link href="style.css" rel="stylesheet" type="text/css" />
11 |   </head>
12 |   <body>
13 |     <img id="cover" src="" alt="" />
14 |   </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/lncrawl/assets/epub/style.css:
--------------------------------------------------------------------------------
 1 | img {
 2 |   width: 100%;
 3 |   object-fit: scale-down;
 4 |   object-position: center;
 5 | }
 6 | 
 7 | p + br {
 8 |   display: none;
 9 | }
10 | 
11 | #intro {
12 |   width: 100vw;
13 |   height: calc(100% - 30px);
14 |   text-align: center;
15 |   position: relative;
16 |   display: flex;
17 |   flex-direction: column;
18 |   align-items: center;
19 |   justify-content: space-between;
20 |   text-align: center;
21 |   letter-spacing: 0.25;
22 | }
23 | 
24 | #intro .header {
25 |   height: 200px;
26 | }
27 | 
28 | #intro h1 {
29 |   opacity: 1;
30 | }
31 | #intro h3 {
32 |   opacity: 0.6;
33 | }
34 | 
35 | #intro img {
36 |   width: 100%;
37 |   height: calc(100% - 300px);
38 |   object-fit: contain;
39 |   object-position: center;
40 | }
41 | 
42 | #intro .footer {
43 |   height: 50px;
44 |   line-height: 24px;
45 |   opacity: 0.8;
46 | }
47 | 
48 | #cover {
49 |   object-fit: cover;
50 | }
51 | 
52 | #volume {
53 |   width: 100%;
54 |   height: 100%;
55 |   display: flex;
56 |   text-align: center;
57 |   align-items: center;
58 |   justify-content: center;
59 | }
60 | 


--------------------------------------------------------------------------------
/lncrawl/assets/version.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | ROOT = Path(__file__).parent.parent
 4 | 
 5 | with open(str(ROOT / "VERSION"), "r", encoding="utf8") as f:
 6 |     version = f.read().strip()
 7 | 
 8 | 
 9 | def get_version():
10 |     return version
11 | 


--------------------------------------------------------------------------------
/lncrawl/assets/web/__init__.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | ROOT = Path(__file__).parent
 4 | 
 5 | 
 6 | def get_js_script():
 7 |     with open(str(ROOT / "script.js"), "r", encoding="utf8") as f:
 8 |         script = f.read()
 9 |     return script
10 | 
11 | 
12 | def get_css_style():
13 |     with open(str(ROOT / "style.css"), "r", encoding="utf8") as f:
14 |         style = f.read()
15 |     return style
16 | 


--------------------------------------------------------------------------------
/lncrawl/binders/json.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from pathlib import Path
 3 | from typing import Generator
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | def make_jsons(app, data) -> Generator[str, None, None]:
 9 |     root_path = Path(app.output_path)
10 |     yield str(root_path / 'meta.json')
11 |     for vol in data:
12 |         for chap in data[vol]:
13 |             file_name = "%s.json" % str(chap["id"]).rjust(5, "0")
14 |             file_path = root_path / "json" / file_name
15 |             if file_path.is_file():
16 |                 yield str(file_path)
17 | 


--------------------------------------------------------------------------------
/lncrawl/binders/text.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import re
 4 | from typing import Generator
 5 | 
 6 | from bs4 import BeautifulSoup
 7 | 
 8 | from ..assets.chars import Chars
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def make_texts(app, data) -> Generator[str, None, None]:
14 |     for vol in data:
15 |         dir_name = os.path.join(app.output_path, "text", vol)
16 |         os.makedirs(dir_name, exist_ok=True)
17 |         for chap in data[vol]:
18 |             if not chap.get("body"):
19 |                 continue
20 |             file_name = "%s.txt" % str(chap["id"]).rjust(5, "0")
21 |             file_name = os.path.join(dir_name, file_name)
22 |             with open(file_name, "w", encoding="utf8") as file:
23 |                 body = chap["body"].replace("</p><p", "</p>\n<p")
24 |                 soup = BeautifulSoup(body, "lxml")
25 |                 text = "\n\n".join(soup.stripped_strings)
26 |                 text = re.sub(r"[\r\n]+", Chars.EOL + Chars.EOL, text)
27 |                 file.write(text)
28 |                 yield file_name
29 | 


--------------------------------------------------------------------------------
/lncrawl/bots/__init__.py:
--------------------------------------------------------------------------------
 1 | supported_bots = [
 2 |     "console",
 3 |     "telegram",
 4 |     "discord",
 5 |     "lookup",
 6 |     "server",
 7 | ]
 8 | 
 9 | 
10 | def run_bot(bot):
11 |     if bot not in supported_bots:
12 |         bot = "console"
13 | 
14 |     if bot == "console":
15 |         from ..bots.console import ConsoleBot
16 |         ConsoleBot().start()
17 | 
18 |     elif bot == "telegram":
19 |         from ..bots.telegram import TelegramBot
20 |         TelegramBot().start()
21 | 
22 |     elif bot == "discord":
23 |         from ..bots.discord import DiscordBot
24 |         DiscordBot().start_bot()
25 | 
26 |     elif bot == "lookup":
27 |         from ..bots.lookup import LookupBot
28 |         LookupBot().start()
29 | 
30 |     if bot == "server":
31 |         from ..bots.server import ServerBot
32 |         ServerBot().start()
33 | 
34 |     else:
35 |         print("Unknown bot: %s" % bot)
36 | 


--------------------------------------------------------------------------------
/lncrawl/bots/console/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | from ...core.app import App
 5 | 
 6 | 
 7 | class ConsoleBot:
 8 |     log = logging.getLogger(__name__)
 9 | 
10 |     def __init__(self) -> None:
11 |         self.app: Optional[App] = None
12 |         self.search_mode = False
13 | 
14 |     from .get_crawler import (
15 |         choose_a_novel,
16 |         confirm_guessed_novel,
17 |         confirm_retry,
18 |         get_crawlers_to_search,
19 |         get_novel_url,
20 |     )
21 |     from .integration import process_chapter_range, start
22 |     from .login_info import get_login_info
23 |     from .output_style import (
24 |         force_replace_old,
25 |         get_output_formats,
26 |         get_output_path,
27 |         should_pack_by_volume,
28 |     )
29 |     from .range_selection import (
30 |         get_range_from_chapters,
31 |         get_range_from_volumes,
32 |         get_range_selection,
33 |         get_range_using_index,
34 |         get_range_using_urls,
35 |     )
36 | 


--------------------------------------------------------------------------------
/lncrawl/bots/console/login_info.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Tuple
 2 | 
 3 | from questionary import prompt
 4 | 
 5 | from ...core.arguments import get_args
 6 | 
 7 | 
 8 | def get_login_info(self) -> Optional[Tuple[str, str]]:
 9 |     """Returns the (email, password) pair for login"""
10 |     args = get_args()
11 | 
12 |     if args.login:
13 |         return args.login
14 | 
15 |     if args.suppress:
16 |         return None
17 | 
18 |     answer = prompt(
19 |         [
20 |             {
21 |                 "type": "confirm",
22 |                 "name": "login",
23 |                 "message": "Do you want to log in?",
24 |                 "default": False,
25 |             },
26 |         ]
27 |     )
28 | 
29 |     if answer["login"]:
30 |         answer = prompt(
31 |             [
32 |                 {
33 |                     "type": "input",
34 |                     "name": "email",
35 |                     "message": "User/Email:",
36 |                     "validate": lambda a: True
37 |                     if a
38 |                     else "User/Email should be not be empty",
39 |                 },
40 |                 {
41 |                     "type": "password",
42 |                     "name": "password",
43 |                     "message": "Password:",
44 |                     "validate": lambda a: True
45 |                     if a
46 |                     else "Password should be not be empty",
47 |                 },
48 |             ]
49 |         )
50 |         return answer["email"], answer["password"]
51 | 
52 |     return None
53 | 


--------------------------------------------------------------------------------
/lncrawl/bots/console/open_folder_prompt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from questionary import prompt
 4 | 
 5 | from ...utils.platforms import Platform
 6 | from ...core.arguments import get_args
 7 | 
 8 | 
 9 | def display_open_folder(folder_path: str):
10 |     args = get_args()
11 | 
12 |     if args.suppress:
13 |         return
14 |     if Platform.java or Platform.docker:
15 |         return
16 | 
17 |     answer = prompt(
18 |         [
19 |             {
20 |                 "type": "confirm",
21 |                 "name": "exit",
22 |                 "message": "Open the output folder?",
23 |                 "default": True,
24 |             },
25 |         ]
26 |     )
27 | 
28 |     if not answer["exit"]:
29 |         return
30 | 
31 |     if Platform.windows:
32 |         os.system(f'explorer.exe "{folder_path}"')
33 |     elif Platform.wsl:
34 |         os.system(f'cd "{folder_path}" && explorer.exe .')
35 |     elif Platform.linux:
36 |         os.system(f'xdg-open "{folder_path}"')
37 |     elif Platform.mac:
38 |         os.system(f'open "{folder_path}"')
39 |     else:
40 |         print(f"Output Folder: {folder_path}")
41 | 


--------------------------------------------------------------------------------
/lncrawl/bots/discord/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config
2 | from .discord_bot import DiscordBot
3 | 
4 | __all__ = ["config", "DiscordBot"]
5 | 


--------------------------------------------------------------------------------
/lncrawl/bots/lookup/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | from urllib.parse import urlparse
 4 | 
 5 | from slugify import slugify
 6 | 
 7 | from ...core.sources import sources_path
 8 | from .analyze import analyze_url
 9 | from .generator import generate_crawler
10 | from .prompts import get_features, get_novel_url
11 | 
12 | 
13 | class LookupBot:
14 |     log = logging.getLogger(__name__)
15 | 
16 |     def __init__(self) -> None:
17 |         pass
18 | 
19 |     def start(self) -> None:
20 |         novel_url = get_novel_url()
21 | 
22 |         _parsed = urlparse(novel_url)
23 |         base_url = "%s://%s/" % (_parsed.scheme, _parsed.hostname)
24 |         name = re.sub(r"(^www\.)|(\.com$)", "", _parsed.hostname)
25 | 
26 |         template = analyze_url(base_url, novel_url)
27 | 
28 |         features = get_features()
29 |         language = features["language"] or "multi"
30 |         has_manga = features["has_manga"]
31 |         has_mtl = features["has_mtl"]
32 | 
33 |         filename = name + ".py"
34 |         classname = slugify(
35 |             name,
36 |             max_length=20,
37 |             separator="_",
38 |             lowercase=True,
39 |             word_boundary=True,
40 |         ).title()
41 | 
42 |         folder = sources_path / language
43 |         if language == "en":
44 |             folder = folder / filename[0]
45 |         filename = str(folder / filename)
46 | 
47 |         generate_crawler(
48 |             template,
49 |             output_file=filename,
50 |             classname=classname,
51 |             base_url=base_url,
52 |             has_manga=has_manga,
53 |             has_mtl=has_mtl,
54 |         )
55 | 


--------------------------------------------------------------------------------
/lncrawl/bots/lookup/generator.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Type
 3 | 
 4 | from colorama import Style
 5 | 
 6 | from ...assets.chars import Chars
 7 | from ...core.crawler import Crawler
 8 | from ...core.exeptions import LNException
 9 | 
10 | 
11 | def generate_crawler(
12 |     template: Type[Crawler],
13 |     output_file: str,
14 |     classname: str,
15 |     base_url: str,
16 |     has_manga: bool,
17 |     has_mtl: bool,
18 | ):
19 |     if os.path.exists(output_file):
20 |         raise LNException(f"File exists: {output_file}")
21 | 
22 |     lines = [
23 |         "import logging",
24 |         "",
25 |         f"from {template.__module__} import {template.__name__}",
26 |         "",
27 |         "logger = logging.getLogger(__name__)",
28 |         "",
29 |         "",
30 |         f"class {classname}({template.__name__}):",
31 |         f"    has_mtl = {bool(has_mtl)}",
32 |         f"    has_manga = {bool(has_manga)}",
33 |         f'    base_url = ["{base_url}"]',
34 |         "",
35 |     ]
36 |     with open(output_file, "w", encoding="utf-8") as f:
37 |         f.write("\n".join(lines))
38 | 
39 |     print()
40 |     print(
41 |         Style.BRIGHT + Chars.PARTY,
42 |         "Generated source file",
43 |         Chars.PARTY + Style.RESET_ALL,
44 |     )
45 |     print(Chars.RIGHT_ARROW, output_file)
46 |     print()
47 | 


--------------------------------------------------------------------------------
/lncrawl/bots/lookup/prompts.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from questionary import prompt
 4 | 
 5 | from ...assets.languages import language_codes
 6 | from ...core.arguments import get_args
 7 | from ...core.exeptions import LNException
 8 | 
 9 | 
10 | def get_novel_url():
11 |     """Return a novel page url"""
12 |     args = get_args()
13 |     url = args.novel_page
14 | 
15 |     if url:
16 |         if re.match(r"^https?://.+\..+$", url):
17 |             return url
18 |         else:
19 |             raise LNException("Invalid URL of novel page")
20 | 
21 |     try:
22 |         answer = prompt(
23 |             [
24 |                 {
25 |                     "type": "input",
26 |                     "name": "novel",
27 |                     "message": "Enter novel page url:",
28 |                     "validate": lambda x: (
29 |                         True
30 |                         if re.match(r"^https?://.+\..+$", x)
31 |                         else "Invalid URL of novel page"
32 |                     ),
33 |                 },
34 |             ]
35 |         )
36 |         return answer["novel"].strip()
37 |     except Exception:
38 |         raise LNException("Novel page url or query was not given")
39 | 
40 | 
41 | def get_features():
42 |     """Return the feature list for the crawler"""
43 |     answer = prompt(
44 |         [
45 |             {
46 |                 "type": "autocomplete",
47 |                 "name": "language",
48 |                 "message": "Enter language:",
49 |                 "choices": list(sorted(language_codes.keys())),
50 |             },
51 |             {
52 |                 "type": "confirm",
53 |                 "name": "has_manga",
54 |                 "message": "Does it contain Manga/Manhua/Manhwa?",
55 |                 "default": False,
56 |             },
57 |             {
58 |                 "type": "confirm",
59 |                 "name": "has_mtl",
60 |                 "message": "Does it contain Machine Translations?",
61 |                 "default": False,
62 |             },
63 |         ]
64 |     )
65 |     return answer
66 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import uvicorn
 4 | 
 5 | from ...core.arguments import get_args
 6 | from .app import app
 7 | from .context import ServerContext
 8 | 
 9 | 
10 | class ServerBot:
11 |     log = logging.getLogger(__name__)
12 | 
13 |     def start(self):
14 |         args = get_args()
15 | 
16 |         ctx = ServerContext()
17 |         ctx.db.prepare()
18 |         ctx.users.prepare()
19 |         ctx.scheduler.start()
20 | 
21 |         uvicorn.run(
22 |             app,
23 |             log_level=logging.DEBUG,
24 |             port=args.server_port or 8080,
25 |             host=args.server_host or '0.0.0.0',
26 |         )
27 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/__init__.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends
 2 | 
 3 | from ..security import ensure_admin, ensure_login
 4 | from .artifacts import router as artifact
 5 | from .auth import router as auth
 6 | from .jobs import router as job
 7 | from .novels import router as novel
 8 | from .runner import router as runner
 9 | from .users import router as user
10 | 
11 | router = APIRouter()
12 | 
13 | router.include_router(
14 |     auth,
15 |     prefix='/auth',
16 |     tags=['Auth'],
17 | )
18 | 
19 | router.include_router(
20 |     user,
21 |     prefix='/user',
22 |     tags=['Users'],
23 |     dependencies=[Depends(ensure_admin)],
24 | )
25 | 
26 | router.include_router(
27 |     job,
28 |     prefix='/job',
29 |     tags=['Jobs'],
30 |     dependencies=[Depends(ensure_login)],
31 | )
32 | 
33 | router.include_router(
34 |     novel,
35 |     prefix='/novel',
36 |     tags=['Novels'],
37 |     dependencies=[Depends(ensure_login)],
38 | )
39 | 
40 | router.include_router(
41 |     artifact,
42 |     prefix='/artifact',
43 |     tags=['Artifacts'],
44 |     dependencies=[Depends(ensure_login)],
45 | )
46 | 
47 | router.include_router(
48 |     runner,
49 |     prefix='/runner',
50 |     tags=['Runner'],
51 |     dependencies=[Depends(ensure_admin)],
52 | )
53 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/artifacts.py:
--------------------------------------------------------------------------------
 1 | import mimetypes
 2 | import os
 3 | from typing import Optional
 4 | 
 5 | from fastapi import APIRouter, Depends, Path, Query
 6 | from fastapi.responses import FileResponse
 7 | 
 8 | from ..context import ServerContext
 9 | from ..exceptions import AppErrors
10 | 
11 | # The root router
12 | router = APIRouter()
13 | 
14 | 
15 | @router.get("s", summary='Returns a list of artifacts')
16 | def list_artifacts(
17 |     ctx: ServerContext = Depends(),
18 |     offset: int = Query(default=0),
19 |     limit: int = Query(default=20, le=100),
20 |     novel_id: Optional[str] = Query(default=None),
21 | ):
22 |     return ctx.artifacts.list(
23 |         limit=limit,
24 |         offset=offset,
25 |         novel_id=novel_id,
26 |     )
27 | 
28 | 
29 | @router.get("/{artifact_id}", summary='Returns a artifact')
30 | def get_novel(
31 |     artifact_id: str = Path(),
32 |     ctx: ServerContext = Depends(),
33 | ):
34 |     return ctx.artifacts.get(artifact_id)
35 | 
36 | 
37 | @router.get("/{artifact_id}/download", summary='Download artifact file')
38 | def get_novel_artifacts(
39 |     artifact_id: str = Path(),
40 |     ctx: ServerContext = Depends(),
41 | ):
42 |     artifact = ctx.artifacts.get(artifact_id)
43 |     file_path = artifact.output_file
44 |     if not file_path:
45 |         raise AppErrors.no_artifact_file
46 | 
47 |     media_type, _ = mimetypes.guess_type(file_path)
48 |     return FileResponse(
49 |         path=file_path,
50 |         filename=os.path.basename(file_path),
51 |         media_type=media_type or "application/octet-stream",
52 |     )
53 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/auth.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Body, Depends
 2 | 
 3 | from ..context import ServerContext
 4 | from ..models.user import (CreateRequest, LoginRequest, LoginResponse,
 5 |                            SignupRequest, UpdateRequest, User)
 6 | from ..security import ensure_user
 7 | 
 8 | # The root router
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.post("/login", summary="Login with username or email and password")
13 | def login(
14 |     ctx: ServerContext = Depends(),
15 |     credentials: LoginRequest = Body(
16 |         default=...,
17 |         description='The login credentials',
18 |     ),
19 | ):
20 |     user = ctx.users.verify(credentials)
21 |     token = ctx.users.generate_token(user.id)
22 |     return LoginResponse(token=token, user=user)
23 | 
24 | 
25 | @router.post('/signup', summary='Signup as a new user')
26 | def signup(
27 |     ctx: ServerContext = Depends(),
28 |     body: SignupRequest = Body(
29 |         default=...,
30 |         description='The signup request',
31 |     ),
32 | ):
33 |     request = CreateRequest(
34 |         password=body.password,
35 |         email=body.email,
36 |         name=body.name,
37 |     )
38 |     user = ctx.users.create(request)
39 |     token = ctx.users.generate_token(user.id)
40 |     return LoginResponse(token=token, user=user)
41 | 
42 | 
43 | @router.get('/me', summary='Get current user details')
44 | def me(
45 |     user: User = Depends(ensure_user),
46 | ):
47 |     return user
48 | 
49 | 
50 | @router.put('/me/update', summary='Update current user details')
51 | def self_update(
52 |     ctx: ServerContext = Depends(),
53 |     user: User = Depends(ensure_user),
54 |     body: UpdateRequest = Body(
55 |         default=...,
56 |         description='The signup request',
57 |     ),
58 | ):
59 |     body.role = None
60 |     body.tier = None
61 |     body.is_active = None
62 |     return ctx.users.update(user.id, body)
63 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/novels.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends, Path, Query
 2 | 
 3 | from ..context import ServerContext
 4 | 
 5 | # The root router
 6 | router = APIRouter()
 7 | 
 8 | 
 9 | @router.get("s", summary='Returns a list of novels')
10 | def list_novels(
11 |     ctx: ServerContext = Depends(),
12 |     offset: int = Query(default=0),
13 |     limit: int = Query(default=20, le=100),
14 |     with_orphans: bool = Query(default=False),
15 | ):
16 |     return ctx.novels.list(
17 |         limit=limit,
18 |         offset=offset,
19 |         with_orphans=with_orphans,
20 |     )
21 | 
22 | 
23 | @router.get("/{novel_id}", summary='Returns a novel')
24 | def get_novel(
25 |     novel_id: str = Path(),
26 |     ctx: ServerContext = Depends(),
27 | ):
28 |     return ctx.novels.get(novel_id)
29 | 
30 | 
31 | @router.get("/{novel_id}/artifacts", summary='Returns cached artifacts')
32 | def get_novel_artifacts(
33 |     novel_id: str = Path(),
34 |     ctx: ServerContext = Depends(),
35 | ):
36 |     return ctx.novels.get_artifacts(novel_id)
37 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/runner.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Depends
 2 | 
 3 | from ..context import ServerContext
 4 | 
 5 | # The root router
 6 | router = APIRouter()
 7 | 
 8 | 
 9 | @router.get("/start", summary='Start the runner')
10 | def start(ctx: ServerContext = Depends()):
11 |     ctx.scheduler.start()
12 | 
13 | 
14 | @router.get("/stop", summary='Stops the runner')
15 | def stop(ctx: ServerContext = Depends()):
16 |     ctx.scheduler.close()
17 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/api/users.py:
--------------------------------------------------------------------------------
 1 | from fastapi import APIRouter, Body, Depends, Path, Query
 2 | 
 3 | from ..context import ServerContext
 4 | from ..exceptions import AppErrors
 5 | from ..models.user import CreateRequest, UpdateRequest, User
 6 | from ..security import ensure_user
 7 | 
 8 | # The root router
 9 | router = APIRouter()
10 | 
11 | 
12 | @router.get('s', summary='Get list of all users')
13 | def all_users(
14 |     ctx: ServerContext = Depends(),
15 |     offset: int = Query(default=0),
16 |     limit: int = Query(default=20, le=100),
17 | ):
18 |     return ctx.users.list(offset, limit)
19 | 
20 | 
21 | @router.post('', summary='Create an user')
22 | def create_user(
23 |     ctx: ServerContext = Depends(),
24 |     body: CreateRequest = Body(
25 |         default=...,
26 |         description='The signup request',
27 |     ),
28 | ):
29 |     return ctx.users.create(body)
30 | 
31 | 
32 | @router.get('/{user_id}', summary='Get the user')
33 | def get_user(
34 |     ctx: ServerContext = Depends(),
35 |     user_id: str = Path(),
36 | ):
37 |     return ctx.users.get(user_id)
38 | 
39 | 
40 | @router.put('/{user_id}', summary='Update the user')
41 | def update_user(
42 |     ctx: ServerContext = Depends(),
43 |     user: User = Depends(ensure_user),
44 |     body: UpdateRequest = Body(
45 |         default=...,
46 |         description='The signup request',
47 |     ),
48 |     user_id: str = Path(),
49 | ):
50 |     if user_id == user.id:
51 |         body.role = None
52 |         body.is_active = None
53 |     return ctx.users.update(user_id, body)
54 | 
55 | 
56 | @router.delete('/{user_id}', summary='Delete the user')
57 | def delete_user(
58 |     user: User = Depends(ensure_user),
59 |     ctx: ServerContext = Depends(),
60 |     user_id: str = Path(),
61 | ):
62 |     if user.id == user_id:
63 |         raise AppErrors.can_not_delete_self
64 |     return ctx.users.remove(user_id)
65 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/app.py:
--------------------------------------------------------------------------------
 1 | import traceback
 2 | 
 3 | from fastapi import FastAPI
 4 | from fastapi.middleware.cors import CORSMiddleware
 5 | from fastapi.middleware.gzip import GZipMiddleware
 6 | 
 7 | from ...assets.version import get_version
 8 | 
 9 | app = FastAPI(
10 |     version=get_version(),
11 |     title="Lightnovel Crawler",
12 |     description="Download novels from online sources and generate e-books",
13 | )
14 | 
15 | app.add_middleware(
16 |     CORSMiddleware,
17 |     allow_credentials=True,
18 |     allow_origins=["*"],
19 |     allow_methods=["*"],
20 |     allow_headers=["*"],
21 | )
22 | 
23 | app.add_middleware(
24 |     GZipMiddleware,
25 |     minimum_size=1000,
26 | )
27 | 
28 | try:
29 |     from .api import router as api
30 |     app.include_router(api, prefix='/api')
31 | except ImportError:
32 |     traceback.print_exc()
33 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/context.py:
--------------------------------------------------------------------------------
 1 | from functools import cached_property
 2 | from typing import Optional
 3 | 
 4 | from .utils.decorators import autoclose
 5 | 
 6 | _cache: Optional['ServerContext'] = None
 7 | 
 8 | 
 9 | class ServerContext:
10 |     def __new__(cls):
11 |         global _cache
12 |         if _cache is None:
13 |             _cache = super().__new__(cls)
14 |         return _cache
15 | 
16 |     @cached_property
17 |     def config(self):
18 |         from .config import Config
19 |         return Config()
20 | 
21 |     @cached_property
22 |     @autoclose
23 |     def db(self):
24 |         from .db import DB
25 |         return DB(self)
26 | 
27 |     @cached_property
28 |     def users(self):
29 |         from .services.users import UserService
30 |         return UserService(self)
31 | 
32 |     @cached_property
33 |     def jobs(self):
34 |         from .services.jobs import JobService
35 |         return JobService(self)
36 | 
37 |     @cached_property
38 |     def novels(self):
39 |         from .services.novels import NovelService
40 |         return NovelService(self)
41 | 
42 |     @cached_property
43 |     def artifacts(self):
44 |         from .services.artifacts import ArtifactService
45 |         return ArtifactService(self)
46 | 
47 |     @cached_property
48 |     @autoclose
49 |     def scheduler(self):
50 |         from .services.scheduler import JobScheduler
51 |         return JobScheduler(self)
52 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/db.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from sqlmodel import Session, SQLModel, create_engine
 4 | 
 5 | from .context import ServerContext
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class DB:
11 |     def __init__(self, ctx: ServerContext) -> None:
12 |         self.engine = create_engine(
13 |             ctx.config.server.database_url,
14 |             echo=logger.isEnabledFor(logging.DEBUG),
15 |         )
16 | 
17 |     def close(self):
18 |         self.engine.dispose()
19 | 
20 |     def prepare(self):
21 |         logger.info('Creating tables')
22 |         SQLModel.metadata.create_all(self.engine)
23 | 
24 |     def session(
25 |         self, *,
26 |         future: bool = True,
27 |         autoflush: bool = True,
28 |         autocommit: bool = False,
29 |         expire_on_commit: bool = True,
30 |         enable_baked_queries: bool = True,
31 |     ):
32 |         return Session(
33 |             self.engine,
34 |             future=future,  # type:ignore
35 |             autoflush=autoflush,
36 |             autocommit=autocommit,  # type:ignore
37 |             expire_on_commit=expire_on_commit,
38 |             enable_baked_queries=enable_baked_queries,
39 |         )
40 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/bots/server/models/__init__.py


--------------------------------------------------------------------------------
/lncrawl/bots/server/models/_base.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | 
 3 | from sqlalchemy import event
 4 | from sqlmodel import Field, SQLModel
 5 | 
 6 | from ..utils.time_utils import current_timestamp
 7 | 
 8 | 
 9 | def generate_uuid():
10 |     return uuid.uuid4().hex
11 | 
12 | 
13 | class BaseModel(SQLModel):
14 |     id: str = Field(
15 |         default_factory=generate_uuid,
16 |         primary_key=True,
17 |         description="ID"
18 |     )
19 |     created_at: int = Field(
20 |         index=True,
21 |         default_factory=current_timestamp,
22 |         description="Create timestamp (ms)"
23 |     )
24 |     updated_at: int = Field(
25 |         default_factory=current_timestamp,
26 |         description="Update timestamp (ms)"
27 |     )
28 | 
29 | 
30 | @event.listens_for(BaseModel, "before_update", propagate=True)
31 | def auto_update_timestamp(mapper, connection, target: BaseModel):
32 |     target.updated_at = current_timestamp()
33 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/models/pagination.py:
--------------------------------------------------------------------------------
 1 | from typing import Generic, List, TypeVar
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | T = TypeVar("T")
 6 | 
 7 | 
 8 | class Paginated(BaseModel, Generic[T]):
 9 |     total: int
10 |     offset: int
11 |     limit: int
12 |     items: List[T]
13 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/models/user.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum, IntEnum
 2 | from typing import Optional
 3 | 
 4 | from pydantic import EmailStr
 5 | from sqlmodel import Field, SQLModel
 6 | 
 7 | from ._base import BaseModel
 8 | 
 9 | 
10 | class UserRole(str, Enum):
11 |     USER = "user"
12 |     ADMIN = "admin"
13 | 
14 | 
15 | class UserTier(IntEnum):
16 |     BASIC = 0
17 |     PREMIUM = 1
18 |     VIP = 2
19 | 
20 | 
21 | class User(BaseModel, table=True):
22 |     password: str = Field(description="Hashed password", exclude=True)
23 |     email: str = Field(unique=True, index=True, description="User Email")
24 |     role: UserRole = Field(default=UserRole.USER, description="User role")
25 |     is_active: bool = Field(default=True, description="Active status")
26 |     name: Optional[str] = Field(default=None, description="Full name")
27 |     tier: UserTier = Field(default=UserTier.BASIC, description="User tier")
28 | 
29 | 
30 | class LoginRequest(SQLModel):
31 |     email: str = Field(description="User email")
32 |     password: str = Field(description="User password")
33 | 
34 | 
35 | class LoginResponse(SQLModel):
36 |     token: str = Field(description="The authorization token")
37 |     user: User = Field(description="The user")
38 | 
39 | 
40 | class SignupRequest(SQLModel):
41 |     email: EmailStr = Field(description="User Email")
42 |     password: str = Field(description="User password")
43 |     name: Optional[str] = Field(default=None, description="Full name")
44 | 
45 | 
46 | class CreateRequest(SignupRequest):
47 |     role: UserRole = Field(default=UserRole.USER, description="User role")
48 |     tier: UserTier = Field(default=UserTier.BASIC, description="User tier")
49 | 
50 | 
51 | class UpdateRequest(SQLModel):
52 |     password: Optional[str] = Field(default=None, description="User password")
53 |     name: Optional[str] = Field(default=None, description="Full name")
54 |     role: Optional[UserRole] = Field(default=None, description="User role")
55 |     is_active: Optional[bool] = Field(default=None, description="Active status")
56 |     tier: Optional[UserTier] = Field(default=None, description="User tier")
57 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/security.py:
--------------------------------------------------------------------------------
 1 | from fastapi import Depends
 2 | from fastapi.security import APIKeyHeader
 3 | from jose import jwt
 4 | 
 5 | from .context import ServerContext
 6 | from .exceptions import AppErrors
 7 | from .models.user import User, UserRole
 8 | 
 9 | header_scheme = APIKeyHeader(
10 |     name='Authorization',
11 |     scheme_name='Bearer Token',
12 | )
13 | 
14 | 
15 | def ensure_login(
16 |     ctx: ServerContext = Depends(),
17 |     token: str = Depends(header_scheme),
18 | ) -> dict:
19 |     try:
20 |         key = ctx.config.server.token_secret
21 |         algo = ctx.config.server.token_algo
22 |         if token.startswith('Bearer '):
23 |             token = token[len('Bearer '):]
24 |         return jwt.decode(token, key, algorithms=[algo])
25 |     except Exception as e:
26 |         raise AppErrors.unauthorized from e
27 | 
28 | 
29 | def ensure_user(
30 |     ctx: ServerContext = Depends(),
31 |     payload: dict = Depends(ensure_login),
32 | ) -> User:
33 |     user_id = payload.get('uid')
34 |     if not user_id:
35 |         raise AppErrors.unauthorized
36 |     user = ctx.users.get(user_id)
37 |     if not user.is_active:
38 |         raise AppErrors.inactive_user
39 |     return user
40 | 
41 | 
42 | def ensure_admin(user: User = Depends(ensure_user)) -> User:
43 |     if user.role != UserRole.ADMIN:
44 |         raise AppErrors.forbidden
45 |     return user
46 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/bots/server/services/__init__.py


--------------------------------------------------------------------------------
/lncrawl/bots/server/services/artifacts.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from sqlmodel import desc, func, select
 4 | 
 5 | from ..context import ServerContext
 6 | from ..exceptions import AppErrors
 7 | from ..models.job import Artifact
 8 | from ..models.pagination import Paginated
 9 | from ..models.user import User, UserRole
10 | 
11 | 
12 | class ArtifactService:
13 |     def __init__(self, ctx: ServerContext) -> None:
14 |         self._ctx = ctx
15 |         self._db = ctx.db
16 | 
17 |     def list(
18 |         self,
19 |         offset: int = 0,
20 |         limit: int = 20,
21 |         novel_id: Optional[str] = None,
22 |     ) -> Paginated[Artifact]:
23 |         with self._db.session() as sess:
24 |             stmt = select(Artifact)
25 | 
26 |             # Apply filters
27 |             if not novel_id:
28 |                 stmt = stmt.where(Artifact.novel_id == novel_id)
29 | 
30 |             # Apply sorting
31 |             stmt.order_by(desc(Artifact.created_at))
32 | 
33 |             total = sess.exec(select(func.count()).select_from(Artifact)).one()
34 |             items = sess.exec(stmt.offset(offset).limit(limit)).all()
35 | 
36 |             return Paginated(
37 |                 total=total,
38 |                 offset=offset,
39 |                 limit=limit,
40 |                 items=list(items),
41 |             )
42 | 
43 |     def get(self, artifact_id: str) -> Artifact:
44 |         with self._db.session() as sess:
45 |             artifact = sess.get(Artifact, artifact_id)
46 |             if not artifact:
47 |                 raise AppErrors.no_such_artifact
48 |             return artifact
49 | 
50 |     def delete(self, artifact_id: str, user: User) -> bool:
51 |         if user.role != UserRole.ADMIN:
52 |             raise AppErrors.forbidden
53 |         with self._db.session() as sess:
54 |             artifact = sess.get(Artifact, artifact_id)
55 |             if not artifact:
56 |                 raise AppErrors.no_such_artifact
57 |             sess.delete(artifact)
58 |             sess.commit()
59 |             return True
60 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/services/tier.py:
--------------------------------------------------------------------------------
 1 | from lncrawl.models import OutputFormat
 2 | 
 3 | from ..models.job import JobPriority
 4 | from ..models.user import UserTier
 5 | 
 6 | ##
 7 | # For Job creation
 8 | ##
 9 | 
10 | JOB_PRIORITY_LEVEL = {
11 |     UserTier.BASIC: JobPriority.LOW,
12 |     UserTier.PREMIUM: JobPriority.NORMAL,
13 |     UserTier.VIP: JobPriority.HIGH,
14 | }
15 | 
16 | ##
17 | # For JobRunner service
18 | ##
19 | ENABLED_FORMATS = {
20 |     UserTier.BASIC: [
21 |         OutputFormat.json,
22 |         OutputFormat.epub,
23 |     ],
24 |     UserTier.PREMIUM: [
25 |         OutputFormat.json,
26 |         OutputFormat.epub,
27 |         OutputFormat.text,
28 |         OutputFormat.web,
29 |         OutputFormat.pdf,
30 |     ],
31 |     UserTier.VIP: list(OutputFormat),
32 | }
33 | 
34 | BATCH_DOWNLOAD_LIMIT = {
35 |     UserTier.BASIC: 10,
36 |     UserTier.PREMIUM: 100,
37 |     UserTier.VIP: 10000,
38 | }
39 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/ui/__index__.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | 
 3 | 
 4 | class State(rx.State):
 5 |     count: int = 0
 6 | 
 7 |     def increment(self):
 8 |         self.count += 1
 9 | 
10 |     def decrement(self):
11 |         self.count -= 1
12 | 
13 | 
14 | def index():
15 |     return rx.hstack(
16 |         rx.button(
17 |             "Decrement",
18 |             color_scheme="ruby",
19 |             on_click=State.decrement,
20 |         ),
21 |         rx.heading(State.count, font_size="2em"),
22 |         rx.button(
23 |             "Increment",
24 |             color_scheme="grass",
25 |             on_click=State.increment,
26 |         ),
27 |         spacing="4",
28 |     )
29 | 
30 | 
31 | app = rx.App()
32 | app.add_page(index)
33 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/bots/server/utils/__init__.py


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/aborter.py:
--------------------------------------------------------------------------------
 1 | from threading import Event
 2 | 
 3 | 
 4 | class Aborter:
 5 |     def __init__(self) -> None:
 6 |         self._event = Event()
 7 | 
 8 |     @property
 9 |     def aborted(self):
10 |         return self._event.is_set()
11 | 
12 |     def abort(self):
13 |         self._event.set()
14 | 
15 |     def wait(self, timeout: float):
16 |         if timeout <= 0:
17 |             return
18 |         self._event.wait(timeout)
19 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/decorators.py:
--------------------------------------------------------------------------------
 1 | import atexit
 2 | 
 3 | 
 4 | def autoclose(func):
 5 |     def inner(*args, **kwargs):
 6 |         val = func(*args, **kwargs)
 7 |         if hasattr(val, 'close') and callable(val.close):
 8 |             atexit.register(val.close)
 9 |         return val
10 |     return inner
11 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/json_tools.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | from typing import Any, TypeVar
 4 | 
 5 | _log = logging.getLogger(__name__)
 6 | 
 7 | T = TypeVar('T')
 8 | 
 9 | 
10 | def json_encode(data: Any, encoding: str = "utf-8") -> bytes:
11 |     try:
12 |         output = json.dumps(
13 |             data,
14 |             allow_nan=True,
15 |             ensure_ascii=False,
16 |             check_circular=True,
17 |             separators=(',', ':'),
18 |         )
19 |         return output.encode(encoding)
20 |     except Exception as err:
21 |         _log.debug('Failed encoding', err)
22 |         return b''
23 | 
24 | 
25 | def json_decode(data: str | bytes | bytearray | None, _default: T) -> T:
26 |     try:
27 |         if isinstance(data, bytearray):
28 |             data = bytes(data)
29 |         if isinstance(data, bytes):
30 |             data = data.decode()
31 |         if not isinstance(data, str):
32 |             return _default
33 |         return json.loads(data)
34 |     except Exception as err:
35 |         _log.debug('Failed decoding', err)
36 |         return _default
37 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/text_tools.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import hashlib
 3 | import lzma
 4 | 
 5 | from cryptography.fernet import Fernet
 6 | 
 7 | __key_cache = {}
 8 | 
 9 | 
10 | def text_compress(plain: bytes) -> bytes:
11 |     lzc = lzma.LZMACompressor()
12 |     output = lzc.compress(plain)
13 |     output += lzc.flush()
14 |     return output
15 | 
16 | 
17 | def text_decompress(compressed: bytes) -> bytes:
18 |     lzd = lzma.LZMADecompressor()
19 |     return lzd.decompress(compressed)
20 | 
21 | 
22 | def text_encrypt(plain: bytes, secret: str | bytes) -> bytes:
23 |     fernet = Fernet(generate_key(secret))
24 |     result = fernet.encrypt(plain)
25 |     return base64.urlsafe_b64decode(result)
26 | 
27 | 
28 | def text_decrypt(cipher: bytes, secret: str | bytes) -> bytes:
29 |     fernet = Fernet(generate_key(secret))
30 |     cipher = base64.urlsafe_b64encode(cipher)
31 |     return fernet.decrypt(cipher)
32 | 
33 | 
34 | def text_compress_encrypt(plain: bytes, secret: str | bytes) -> bytes:
35 |     return text_encrypt(text_compress(plain), secret)
36 | 
37 | 
38 | def text_decrypt_decompress(cipher: bytes, secret: str | bytes) -> bytes:
39 |     return text_decompress(text_decrypt(cipher, secret))
40 | 
41 | 
42 | def generate_md5(*texts) -> str:
43 |     md5 = hashlib.md5()
44 |     for text in texts:
45 |         md5.update(str(text or '').encode())
46 |     return md5.hexdigest()
47 | 
48 | 
49 | def generate_key(secret: str | bytes) -> bytes:
50 |     if isinstance(secret, str):
51 |         secret = secret.encode()
52 |     if secret not in __key_cache:
53 |         hash = hashlib.sha3_256(secret).digest()
54 |         key = base64.urlsafe_b64encode(hash)
55 |         __key_cache[secret] = key
56 |     return __key_cache[secret]
57 | 


--------------------------------------------------------------------------------
/lncrawl/bots/server/utils/time_utils.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Any
 3 | 
 4 | from dateutil import parser
 5 | from dateutil.relativedelta import relativedelta
 6 | from dateutil.tz import tzutc
 7 | 
 8 | 
 9 | def current_timestamp():
10 |     '''Current UNIX timestamp in milliseconds'''
11 |     return round(1000 * datetime.now().timestamp())
12 | 
13 | 
14 | def as_unix_time(time: Any) -> int | None:
15 |     try:
16 |         if isinstance(time, int):
17 |             return time
18 |         if isinstance(time, str):
19 |             time = parser.parse(time)
20 |         if isinstance(time, datetime):
21 |             return round(1000 * time.timestamp())
22 |     except Exception:
23 |         pass
24 |     return None
25 | 
26 | 
27 | def time_from_now(
28 |     years=0, months=0, days=0, weeks=0,
29 |     hours=0, minutes=0, seconds=0
30 | ) -> datetime:
31 |     delta = relativedelta(
32 |         years=years, months=months, days=days, weeks=weeks,
33 |         hours=hours, minutes=minutes, seconds=seconds
34 |     )
35 |     return datetime.now(tzutc()).replace(microsecond=0) + delta
36 | 


--------------------------------------------------------------------------------
/lncrawl/constants.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | DEFAULT_OUTPUT_PATH = os.getenv('OUTPUT_PATH') or os.path.abspath("Lightnovels")
4 | META_FILE_NAME = "meta.json"
5 | 


--------------------------------------------------------------------------------
/lncrawl/core/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Interactive application to take user inputs
 3 | """
 4 | 
 5 | import logging
 6 | import os
 7 | import sys
 8 | 
 9 | import colorama  # type:ignore
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def init():
15 |     from ..assets.version import get_version
16 |     from .arguments import get_args
17 |     from .display import description, input_suppression
18 |     from .logconfig import configure_logging
19 | 
20 |     os.environ["version"] = get_version()
21 | 
22 |     colorama.init(wrap=True)
23 |     description()
24 | 
25 |     configure_logging()
26 | 
27 |     args = get_args()
28 |     logger.debug("Arguments: %s", args)
29 | 
30 |     if args.suppress:
31 |         input_suppression()
32 |         print(args)
33 | 
34 |     if args.bot:
35 |         os.environ["BOT"] = args.bot
36 | 
37 |     for key, val in args.extra.items():
38 |         os.environ[key] = val[0]
39 | 
40 | 
41 | def start_app():
42 |     from ..bots import run_bot
43 |     from .arguments import get_args
44 |     from .display import cancel_method, error_message
45 |     from .proxy import load_proxies, start_proxy_fetcher, stop_proxy_fetcher
46 |     from .sources import load_sources
47 | 
48 |     init()
49 | 
50 |     load_sources()
51 |     cancel_method()
52 | 
53 |     args = get_args()
54 |     if args.proxy_file:
55 |         os.environ["use_proxy"] = "file"
56 |         load_proxies(args.proxy_file)
57 | 
58 |     if args.auto_proxy:
59 |         os.environ["use_proxy"] = "auto"
60 |         start_proxy_fetcher()
61 | 
62 |     try:
63 |         bot = os.getenv("BOT", "").lower()
64 |         run_bot(bot)
65 |     except KeyboardInterrupt:
66 |         pass
67 |     except Exception:
68 |         error_message(*sys.exc_info())
69 | 
70 |     if args.auto_proxy:
71 |         stop_proxy_fetcher()
72 | 


--------------------------------------------------------------------------------
/lncrawl/core/exeptions.py:
--------------------------------------------------------------------------------
 1 | from urllib.error import URLError
 2 | 
 3 | from cloudscraper.exceptions import CloudflareException
 4 | from PIL import UnidentifiedImageError
 5 | from requests.exceptions import RequestException
 6 | from urllib3.exceptions import HTTPError
 7 | 
 8 | 
 9 | class LNException(Exception):
10 |     pass
11 | 
12 | 
13 | class FallbackToBrowser(Exception):
14 |     pass
15 | 
16 | 
17 | ScraperErrorGroup = (
18 |     URLError,
19 |     HTTPError,
20 |     CloudflareException,
21 |     RequestException,
22 |     FallbackToBrowser,
23 |     UnidentifiedImageError,
24 | )
25 | 
26 | RetryErrorGroup = (
27 |     URLError,
28 |     HTTPError,
29 |     CloudflareException,
30 |     RequestException,
31 |     UnidentifiedImageError,
32 | )
33 | 


--------------------------------------------------------------------------------
/lncrawl/core/soup.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC
 3 | from typing import Optional, Union
 4 | 
 5 | from bs4 import BeautifulSoup, Tag
 6 | from requests import Response
 7 | 
 8 | from .exeptions import LNException
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | DEFAULT_PARSER = "lxml"
14 | 
15 | 
16 | class SoupMaker(ABC):
17 |     def __init__(
18 |         self,
19 |         parser: Optional[str] = None,
20 |     ) -> None:
21 |         """This is a helper for Beautiful Soup. It is being used as a superclass of the Crawler.
22 | 
23 |         Args:
24 |         - parser (Optional[str], optional): Desirable features of the parser. This can be the name of a specific parser
25 |             ("lxml", "lxml-xml", "html.parser", or "html5lib") or it may be the type of markup to be used ("html", "html5", "xml").
26 |         """
27 |         self._parser = parser or DEFAULT_PARSER
28 | 
29 |     def close(self) -> None:
30 |         pass
31 | 
32 |     def make_soup(
33 |         self,
34 |         data: Union[Response, bytes, str],
35 |         encoding: Optional[str] = None,
36 |     ) -> BeautifulSoup:
37 |         if isinstance(data, Response):
38 |             return self.make_soup(data.content, encoding)
39 |         elif isinstance(data, bytes):
40 |             html = data.decode(encoding or "utf8", "ignore")
41 |         elif isinstance(data, str):
42 |             html = data
43 |         else:
44 |             raise LNException("Could not parse response")
45 |         return BeautifulSoup(html, features=self._parser)
46 | 
47 |     def make_tag(
48 |         self,
49 |         data: Union[Response, bytes, str],
50 |         encoding: Optional[str] = None,
51 |     ) -> Tag:
52 |         soup = self.make_soup(data, encoding)
53 |         return next(soup.find("body").children)
54 | 


--------------------------------------------------------------------------------
/lncrawl/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .chapter import Chapter
 2 | from .formats import OutputFormat
 3 | from .meta import MetaInfo
 4 | from .novel import Novel
 5 | from .search_result import CombinedSearchResult, SearchResult
 6 | from .session import Session
 7 | from .volume import Volume
 8 | 
 9 | __all__ = [
10 |     "Chapter",
11 |     "CombinedSearchResult",
12 |     "SearchResult",
13 |     "OutputFormat",
14 |     "Novel",
15 |     "MetaInfo",
16 |     "Session",
17 |     "Volume",
18 | ]
19 | 


--------------------------------------------------------------------------------
/lncrawl/models/chapter.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from box import Box
 4 | 
 5 | 
 6 | class Chapter(Box):
 7 |     def __init__(
 8 |         self,
 9 |         id: int,
10 |         url: str = "",
11 |         title: str = "",
12 |         volume: Optional[int] = None,
13 |         volume_title: Optional[str] = None,
14 |         body: Optional[str] = None,
15 |         images: Dict[str, str] = dict(),
16 |         success: bool = False,
17 |         **kwargs,
18 |     ) -> None:
19 |         self.id = id
20 |         self.url = url
21 |         self.title = title
22 |         self.volume = volume
23 |         self.volume_title = volume_title
24 |         self.body = body
25 |         self.images = images
26 |         self.success = success
27 |         self.update(kwargs)
28 | 
29 |     @staticmethod
30 |     def without_body(item: "Chapter") -> "Chapter":
31 |         result = item.copy()
32 |         result.body = None
33 |         return result
34 | 


--------------------------------------------------------------------------------
/lncrawl/models/formats.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class OutputFormat(str, Enum):
 5 |     json = "json"
 6 |     epub = "epub"
 7 |     text = "text"
 8 |     web = "web"
 9 |     docx = "docx"
10 |     mobi = "mobi"
11 |     pdf = "pdf"
12 |     rtf = "rtf"
13 |     txt = "txt"
14 |     azw3 = "azw3"
15 |     fb2 = "fb2"
16 |     lit = "lit"
17 |     lrf = "lrf"
18 |     oeb = "oeb"
19 |     pdb = "pdb"
20 |     rb = "rb"
21 |     snb = "snb"
22 |     tcr = "tcr"
23 | 
24 |     def __str__(self) -> str:
25 |         return self.value
26 | 


--------------------------------------------------------------------------------
/lncrawl/models/meta.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from box import Box
 4 | 
 5 | from .novel import Novel
 6 | from .session import Session
 7 | 
 8 | 
 9 | class MetaInfo(Box):
10 |     def __init__(
11 |         self,
12 |         session: Optional[Session] = None,
13 |         novel: Optional[Novel] = None,
14 |         **kwargs,
15 |     ) -> None:
16 |         self.session = session
17 |         self.novel = novel
18 |         self.update(kwargs)
19 | 


--------------------------------------------------------------------------------
/lncrawl/models/novel.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Optional
 2 | 
 3 | from box import Box
 4 | 
 5 | from ..assets.languages import find_code
 6 | from .chapter import Chapter
 7 | from .volume import Volume
 8 | 
 9 | 
10 | class Novel(Box):
11 |     def __init__(
12 |         self,
13 |         url: str,
14 |         title: str,
15 |         authors: List[str] = [],
16 |         cover_url: Optional[str] = None,
17 |         chapters: List[Chapter] = [],
18 |         volumes: List[Volume] = [],
19 |         is_rtl: bool = False,
20 |         synopsis: str = "",
21 |         language: Optional[str] = None,
22 |         tags: List[str] = [],
23 |         has_manga: Optional[bool] = None,
24 |         has_mtl: Optional[bool] = None,
25 |         **kwargs,
26 |     ) -> None:
27 |         self.url = url
28 |         self.title = title
29 |         self.authors = authors
30 |         self.cover_url = cover_url
31 |         self.chapters = chapters
32 |         self.volumes = volumes
33 |         self.is_rtl = is_rtl
34 |         self.synopsis = synopsis
35 |         self.has_manga = has_manga
36 |         self.has_mtl = has_mtl
37 |         self.language = find_code(language)
38 |         self.tags = tags
39 |         self.update(kwargs)
40 | 


--------------------------------------------------------------------------------
/lncrawl/models/search_result.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from box import Box
 4 | 
 5 | 
 6 | class SearchResult(Box):
 7 |     def __init__(
 8 |         self,
 9 |         title: str,
10 |         url: str,
11 |         info: str = "",
12 |         **kwargs,
13 |     ) -> None:
14 |         self.title = str(title)
15 |         self.url = str(url)
16 |         self.info = str(info)
17 |         self.update(kwargs)
18 | 
19 | 
20 | class CombinedSearchResult(Box):
21 |     def __init__(
22 |         self,
23 |         id: str,
24 |         title: str,
25 |         novels: List[SearchResult] = [],
26 |         **kwargs,
27 |     ) -> None:
28 |         self.id = id
29 |         self.title = str(title)
30 |         self.novels = novels
31 |         self.update(kwargs)
32 | 


--------------------------------------------------------------------------------
/lncrawl/models/volume.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from box import Box
 4 | 
 5 | 
 6 | class Volume(Box):
 7 |     def __init__(
 8 |         self,
 9 |         id: int,
10 |         title: str = "",
11 |         start_chapter: Optional[int] = None,
12 |         final_chapter: Optional[int] = None,
13 |         chapter_count: Optional[int] = None,
14 |         **kwargs,
15 |     ) -> None:
16 |         self.id = id
17 |         self.title = title
18 |         self.start_chapter = start_chapter
19 |         self.final_chapter = final_chapter
20 |         self.chapter_count = chapter_count
21 |         self.update(kwargs)
22 | 


--------------------------------------------------------------------------------
/lncrawl/templates/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/templates/__init__.py


--------------------------------------------------------------------------------
/lncrawl/templates/browser/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/templates/browser/__init__.py


--------------------------------------------------------------------------------
/lncrawl/templates/browser/chapter_only.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator
 2 | 
 3 | from bs4 import Tag
 4 | 
 5 | from ...models import Chapter
 6 | from ..soup.chapter_only import ChapterOnlySoupTemplate
 7 | from .general import GeneralBrowserTemplate
 8 | 
 9 | 
10 | class ChapterOnlyBrowserTemplate(GeneralBrowserTemplate, ChapterOnlySoupTemplate):
11 |     """Attempts to crawl using cloudscraper first, if failed use the browser."""
12 | 
13 |     def parse_chapter_list_in_browser(self) -> Generator[Chapter, None, None]:
14 |         chap_id = 0
15 |         for tag in self.select_chapter_tags_in_browser():
16 |             if not isinstance(tag, Tag):
17 |                 continue
18 |             chap_id += 1
19 |             yield self.parse_chapter_item(tag, chap_id)
20 | 
21 |     def select_chapter_tags_in_browser(self) -> Generator[Tag, None, None]:
22 |         """Select chapter list item tags from the browser"""
23 |         yield from self.select_chapter_tags(self.browser.soup)
24 | 


--------------------------------------------------------------------------------
/lncrawl/templates/browser/login.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import abstractmethod
 3 | 
 4 | from ...core.exeptions import FallbackToBrowser, ScraperErrorGroup
 5 | from .general import GeneralBrowserTemplate
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class LoginBrowserTemplate(GeneralBrowserTemplate):
11 |     """Attempts to crawl using cloudscraper first, if failed use the browser."""
12 | 
13 |     def login(self, email: str, password: str) -> None:
14 |         try:
15 |             return self.login_in_soup(email, password)
16 |         except ScraperErrorGroup:
17 |             return self.login_in_browser(email, password)
18 | 
19 |     def login_in_soup(self, email: str, password: str) -> None:
20 |         """Login to the website using the scraper"""
21 |         raise FallbackToBrowser()
22 | 
23 |     @abstractmethod
24 |     def login_in_browser(self, email: str, password: str) -> None:
25 |         """Login to the website using the browser"""
26 |         raise NotImplementedError()
27 | 


--------------------------------------------------------------------------------
/lncrawl/templates/browser/searchable.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Generator, List
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from ...core.exeptions import FallbackToBrowser
 7 | from ...models import SearchResult
 8 | from ..soup.searchable import SearchableSoupTemplate
 9 | from .general import GeneralBrowserTemplate
10 | 
11 | 
12 | class SearchableBrowserTemplate(GeneralBrowserTemplate, SearchableSoupTemplate):
13 |     """Attempts to crawl using cloudscraper first, if failed use the browser."""
14 | 
15 |     def search_novel_in_soup(self, query: str) -> List[SearchResult]:
16 |         tags = self.select_search_items(query)
17 |         return list(self.process_search_results(tags))
18 | 
19 |     def search_novel_in_browser(self, query: str) -> List[SearchResult]:
20 |         tags = self.select_search_items_in_browser(query)
21 |         return list(self.process_search_results_in_browser(tags))
22 | 
23 |     def process_search_results_in_browser(
24 |         self, tags: Generator[Tag, None, None]
25 |     ) -> Generator[Tag, None, None]:
26 |         """Process novel item tag and generates search results from the browser"""
27 |         count = 0
28 |         for tag in tags:
29 |             if not isinstance(tag, Tag):
30 |                 continue
31 |             count += 1
32 |             if count == 10:
33 |                 break
34 |             yield self.parse_search_item_in_browser(tag)
35 | 
36 |     @abstractmethod
37 |     def select_search_items(self, query: str) -> Generator[Tag, None, None]:
38 |         raise FallbackToBrowser()
39 | 
40 |     def select_search_items_in_browser(self, query: str) -> Generator[Tag, None, None]:
41 |         """Select novel items found by the query using the browser"""
42 |         yield from self.select_search_items(self.browser.soup)
43 | 
44 |     def parse_search_item_in_browser(self, tag: Tag) -> SearchResult:
45 |         """Parse a tag and return single search result"""
46 |         return self.parse_search_item(tag)
47 | 


--------------------------------------------------------------------------------
/lncrawl/templates/browser/with_volume.py:
--------------------------------------------------------------------------------
 1 | from typing import Generator, Union
 2 | 
 3 | from bs4 import Tag
 4 | 
 5 | from ...models import Chapter, Volume
 6 | from ..soup.with_volume import ChapterWithVolumeSoupTemplate
 7 | from .general import GeneralBrowserTemplate
 8 | 
 9 | 
10 | class ChapterWithVolumeBrowserTemplate(
11 |     GeneralBrowserTemplate, ChapterWithVolumeSoupTemplate
12 | ):
13 |     """Attempts to crawl using cloudscraper first, if failed use the browser."""
14 | 
15 |     def parse_chapter_list_in_browser(
16 |         self,
17 |     ) -> Generator[Union[Chapter, Volume], None, None]:
18 |         vol_id = 0
19 |         chap_id = 0
20 |         for vol in self.select_volume_tags_in_browser():
21 |             if not isinstance(vol, Tag):
22 |                 continue
23 |             vol_id += 1
24 |             vol_item = self.parse_volume_item_in_browser(vol, vol_id)
25 |             yield vol_item
26 |             for tag in self.select_chapter_tags_in_browser(vol, vol_item):
27 |                 if not isinstance(tag, Tag):
28 |                     continue
29 |                 chap_id += 1
30 |                 item = self.parse_chapter_item_in_browser(tag, chap_id, vol_item)
31 |                 item.volume = vol_id
32 |                 yield item
33 | 
34 |     def select_volume_tags_in_browser(self) -> Generator[Tag, None, None]:
35 |         """Select volume list item tags from the browser"""
36 |         return self.select_volume_tags(self.browser.soup)
37 | 
38 |     def parse_volume_item_in_browser(self, tag: Tag, id: int) -> Volume:
39 |         """Parse a single volume from volume list item tag from the browser"""
40 |         return self.parse_volume_item(tag, id)
41 | 
42 |     def select_chapter_tags_in_browser(
43 |         self, tag: Tag, vol: Volume
44 |     ) -> Generator[Tag, None, None]:
45 |         """Select chapter list item tags from volume tag from the browser"""
46 |         return self.select_chapter_tags(tag, vol)
47 | 
48 |     def parse_chapter_item_in_browser(self, tag: Tag, id: int, vol: Volume) -> Chapter:
49 |         """Parse a single chapter from chapter list item tag from the browser"""
50 |         return self.parse_chapter_item(tag, id, vol)
51 | 


--------------------------------------------------------------------------------
/lncrawl/templates/soup/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/templates/soup/__init__.py


--------------------------------------------------------------------------------
/lncrawl/templates/soup/chapter_only.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Generator
 3 | 
 4 | from bs4 import BeautifulSoup, Tag
 5 | 
 6 | from ...models import Chapter
 7 | from .general import GeneralSoupTemplate
 8 | 
 9 | 
10 | class ChapterOnlySoupTemplate(GeneralSoupTemplate):
11 |     def parse_chapter_list(self, soup: BeautifulSoup) -> Generator[Chapter, None, None]:
12 |         chap_id = 0
13 |         for tag in self.select_chapter_tags(soup):
14 |             if not isinstance(tag, Tag):
15 |                 continue
16 |             chap_id += 1
17 |             yield self.parse_chapter_item(tag, chap_id)
18 | 
19 |     @abstractmethod
20 |     def select_chapter_tags(self, soup: BeautifulSoup) -> Generator[Tag, None, None]:
21 |         """Select chapter list item tags from the page soup"""
22 |         raise NotImplementedError()
23 | 
24 |     @abstractmethod
25 |     def parse_chapter_item(self, tag: Tag, id: int) -> Chapter:
26 |         """Parse a single chapter from chapter list item tag"""
27 |         raise NotImplementedError()
28 | 


--------------------------------------------------------------------------------
/lncrawl/templates/soup/optional_volume.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Generator, Union
 3 | 
 4 | from bs4 import BeautifulSoup, Tag
 5 | 
 6 | from ...models import Chapter, Volume
 7 | from .general import GeneralSoupTemplate
 8 | 
 9 | 
10 | class OptionalVolumeSoupTemplate(GeneralSoupTemplate):
11 |     def parse_chapter_list(
12 |         self, soup: BeautifulSoup
13 |     ) -> Generator[Union[Chapter, Volume], None, None]:
14 |         vol_id = 0
15 |         chap_id = 0
16 |         for vol in self.select_volume_tags(soup):
17 |             if not isinstance(vol, Tag):
18 |                 continue
19 |             vol_id += 1
20 |             vol_item = self.parse_volume_item(vol, vol_id)
21 |             yield vol_item
22 |             for tag in self.select_chapter_tags(vol):
23 |                 if not isinstance(tag, Tag):
24 |                     continue
25 |                 chap_id += 1
26 |                 item = self.parse_chapter_item(tag, chap_id, vol_item)
27 |                 item.volume = vol_id
28 |                 yield item
29 | 
30 |         if chap_id > 0:
31 |             return
32 | 
33 |         vol_id = 0
34 |         chap_id = 0
35 |         parent = soup.select_one("html")
36 |         for tag in self.select_chapter_tags(parent):
37 |             if not isinstance(tag, Tag):
38 |                 continue
39 |             if chap_id % 100 == 0:
40 |                 vol_id = chap_id // 100 + 1
41 |                 vol_item = self.parse_volume_item(parent, vol_id)
42 |                 yield vol_item
43 |             chap_id += 1
44 |             item = self.parse_chapter_item(tag, chap_id, vol_item)
45 |             item.volume = vol_id
46 |             yield item
47 | 
48 |     def select_volume_tags(self, soup: BeautifulSoup):
49 |         return []
50 | 
51 |     def parse_volume_item(self, tag: Tag, id: int) -> Volume:
52 |         return Volume(id=id)
53 | 
54 |     @abstractmethod
55 |     def select_chapter_tags(self, parent: Tag) -> Generator[Tag, None, None]:
56 |         raise NotImplementedError()
57 | 
58 |     @abstractmethod
59 |     def parse_chapter_item(self, tag: Tag, id: int, vol: Volume) -> Chapter:
60 |         raise NotImplementedError()
61 | 


--------------------------------------------------------------------------------
/lncrawl/templates/soup/searchable.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Generator, List
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from ...models import SearchResult
 7 | from .general import GeneralSoupTemplate
 8 | 
 9 | 
10 | class SearchableSoupTemplate(GeneralSoupTemplate):
11 |     def search_novel(self, query) -> List[SearchResult]:
12 |         tags = self.select_search_items(query)
13 |         return list(self.process_search_results(tags))
14 | 
15 |     def process_search_results(
16 |         self, tags: Generator[Tag, None, None]
17 |     ) -> Generator[Tag, None, None]:
18 |         """Process novel item tag and generates search results"""
19 |         count = 0
20 |         for tag in tags:
21 |             if not isinstance(tag, Tag):
22 |                 continue
23 |             count += 1
24 |             if count == 10:
25 |                 break
26 |             yield self.parse_search_item(tag)
27 | 
28 |     @abstractmethod
29 |     def select_search_items(self, query: str) -> Generator[Tag, None, None]:
30 |         """Select novel items found on the search page by the query"""
31 |         raise NotImplementedError()
32 | 
33 |     @abstractmethod
34 |     def parse_search_item(self, tag: Tag) -> SearchResult:
35 |         """Parse a tag and return single search result"""
36 |         raise NotImplementedError()
37 | 


--------------------------------------------------------------------------------
/lncrawl/templates/soup/with_volume.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import Generator, Union
 3 | 
 4 | from bs4 import BeautifulSoup, Tag
 5 | 
 6 | from ...models import Chapter, Volume
 7 | from .general import GeneralSoupTemplate
 8 | 
 9 | 
10 | class ChapterWithVolumeSoupTemplate(GeneralSoupTemplate):
11 |     def parse_chapter_list(
12 |         self, soup: BeautifulSoup
13 |     ) -> Generator[Union[Chapter, Volume], None, None]:
14 |         vol_id = 0
15 |         chap_id = 0
16 |         for vol in self.select_volume_tags(soup):
17 |             if not isinstance(vol, Tag):
18 |                 continue
19 |             vol_id += 1
20 |             vol_item = self.parse_volume_item(vol, vol_id)
21 |             yield vol_item
22 |             for tag in self.select_chapter_tags(vol, vol_item):
23 |                 if not isinstance(tag, Tag):
24 |                     continue
25 |                 chap_id += 1
26 |                 item = self.parse_chapter_item(tag, chap_id, vol_item)
27 |                 item.volume = vol_id
28 |                 yield item
29 | 
30 |     @abstractmethod
31 |     def select_volume_tags(self, soup: BeautifulSoup) -> Generator[Tag, None, None]:
32 |         """Select volume list item tags from the page soup"""
33 |         raise NotImplementedError()
34 | 
35 |     @abstractmethod
36 |     def parse_volume_item(self, tag: Tag, id: int) -> Volume:
37 |         """Parse a single volume from volume list item tag"""
38 |         raise NotImplementedError()
39 | 
40 |     @abstractmethod
41 |     def select_chapter_tags(self, tag: Tag, vol: Volume) -> Generator[Tag, None, None]:
42 |         """Select chapter list item tags from volume tag"""
43 |         raise NotImplementedError()
44 | 
45 |     @abstractmethod
46 |     def parse_chapter_item(self, tag: Tag, id: int, vol: Volume) -> Chapter:
47 |         """Parse a single chapter from chapter list item tag"""
48 |         raise NotImplementedError()
49 | 


--------------------------------------------------------------------------------
/lncrawl/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/lncrawl/utils/__init__.py


--------------------------------------------------------------------------------
/lncrawl/utils/common.py:
--------------------------------------------------------------------------------
 1 | from typing import TypeVar, Generic, Callable, Type
 2 | 
 3 | T = TypeVar('T')
 4 | 
 5 | 
 6 | class static_cached_property(Generic[T]):
 7 |     def __init__(self, func: Callable[..., T]):
 8 |         self._initialized = False
 9 |         if isinstance(func, staticmethod):
10 |             self.func = func.__func__
11 |         else:
12 |             self.func = func
13 | 
14 |     def __get__(self, instance: None, owner: Type) -> T:
15 |         if not self._initialized:
16 |             self._value = self.func()
17 |             self._initialized = True
18 |         return self._value
19 | 


--------------------------------------------------------------------------------
/lncrawl/utils/imgen.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/alexwlchan/specktre
 2 | 
 3 | import random
 4 | from typing import List, Optional
 5 | 
 6 | from PIL import Image, ImageDraw
 7 | 
 8 | from .material_colors import ColorName, ColorWeight, generate_colors
 9 | from .tilings import TileGenerator, generate_tiles
10 | 
11 | 
12 | def generate_image(
13 |     filename: Optional[str] = None,
14 |     width: int = 512,
15 |     height: int = 512,
16 |     color_names: List[ColorName] = [],
17 |     color_weights: List[ColorWeight] = [],
18 |     generator: Optional[TileGenerator] = None,
19 |     side_length: int = 50,
20 | ) -> Image:
21 |     tiles = generate_tiles(
22 |         generator,
23 |         width,
24 |         height,
25 |         side_length,
26 |     )
27 |     colors = generate_colors(
28 |         color_names,
29 |         color_weights,
30 |     )
31 |     im = Image.new(
32 |         mode="RGB",
33 |         size=(width, height),
34 |     )
35 |     for tile, color in zip(tiles, colors):
36 |         ImageDraw.Draw(im).polygon(tile, fill=color)
37 | 
38 |     if filename:
39 |         im.save(filename)
40 | 
41 |     return im
42 | 
43 | 
44 | good_color_names = set(ColorName).difference(
45 |     [
46 |         ColorName.black,
47 |         ColorName.white,
48 |         ColorName.light_blue,
49 |         ColorName.light_green,
50 |     ]
51 | )
52 | good_color_weights = set(ColorWeight).difference(
53 |     [
54 |         ColorWeight.main,
55 |         ColorWeight.w50,
56 |         ColorWeight.w100,
57 |         ColorWeight.w200,
58 |         ColorWeight.w800,
59 |         ColorWeight.w900,
60 |         ColorWeight.a100,
61 |         ColorWeight.a200,
62 |     ]
63 | )
64 | 
65 | 
66 | def generate_cover_image(
67 |     filename: Optional[str] = None,
68 |     width: int = 800,
69 |     height: int = 1032,
70 | ) -> Image:
71 |     generate_image(
72 |         filename=filename,
73 |         width=width,
74 |         height=height,
75 |         color_names=good_color_names,
76 |         color_weights=good_color_weights,
77 |         side_length=random.randint(300, 750),
78 |     )
79 | 


--------------------------------------------------------------------------------
/lncrawl/utils/ratelimit.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class RateLimiter(object):
 8 |     """A helper class for a controlling number of requests per seconds.
 9 |     It is being used along with the TaskManager class.
10 | 
11 |     Args:
12 |     - ratelimit (float, optional): Number of requests per seconds.
13 |     """
14 | 
15 |     def __init__(self, ratelimit: float):
16 |         if ratelimit <= 0:
17 |             raise ValueError("ratelimit should be a non-zero positive number")
18 |         self.period = 1 / ratelimit
19 |         self._closed = False
20 | 
21 |     def _now(self):
22 |         if hasattr(time, "monotonic"):
23 |             return time.monotonic()
24 |         return time.time()
25 | 
26 |     def __enter__(self):
27 |         self._time = self._now()
28 | 
29 |     def __exit__(self, type, value, traceback):
30 |         if self._closed:
31 |             return
32 |         d = (self._time + self.period) - self._now()
33 |         self._time = self._now()
34 |         if d > 0:
35 |             time.sleep(d)
36 | 
37 |     def shutdown(self):
38 |         self._closed = True
39 | 
40 |     def wrap(self, fn):
41 |         def inner(*args, **kwargs):
42 |             with self:
43 |                 return fn(*args, **kwargs)
44 | 
45 |         return inner
46 | 


--------------------------------------------------------------------------------
/lncrawl/utils/sockets.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | 
 4 | def free_port(host="127.0.0.1") -> int:
 5 |     """
 6 |     Determines a free port using sockets.
 7 |     """
 8 |     free_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 9 |     free_socket.bind((host, 0))
10 |     free_socket.listen(5)
11 |     port: int = free_socket.getsockname()[1]
12 |     free_socket.close()
13 |     return port
14 | 


--------------------------------------------------------------------------------
/lncrawl/utils/ssl_no_verify.py:
--------------------------------------------------------------------------------
 1 | """
 2 | https://stackoverflow.com/a/15445989/1583052
 3 | """
 4 | import warnings
 5 | import contextlib
 6 | 
 7 | import requests
 8 | from urllib3.exceptions import InsecureRequestWarning
 9 | 
10 | 
11 | old_merge_environment_settings = requests.Session.merge_environment_settings
12 | 
13 | 
14 | @contextlib.contextmanager
15 | def no_ssl_verification():
16 |     opened_adapters = set()
17 | 
18 |     def merge_environment_settings(self, url, proxies, stream, verify, cert):
19 |         # Verification happens only once per connection so we need to close
20 |         # all the opened adapters once we're done. Otherwise, the effects of
21 |         # verify=False persist beyond the end of this context manager.
22 |         opened_adapters.add(self.get_adapter(url))
23 | 
24 |         settings = old_merge_environment_settings(
25 |             self, url, proxies, stream, verify, cert
26 |         )
27 |         settings["verify"] = False
28 | 
29 |         return settings
30 | 
31 |     requests.Session.merge_environment_settings = merge_environment_settings
32 | 
33 |     try:
34 |         with warnings.catch_warnings():
35 |             warnings.simplefilter("ignore", InsecureRequestWarning)
36 |             yield
37 |     finally:
38 |         requests.Session.merge_environment_settings = old_merge_environment_settings
39 | 
40 |         for adapter in opened_adapters:
41 |             try:
42 |                 adapter.close()
43 |             except Exception:
44 |                 pass
45 | 


--------------------------------------------------------------------------------
/lncrawl/utils/uploader/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | cloud_drive = os.getenv("CLOUD_DRIVE", "ANONFILES")
 4 | 
 5 | 
 6 | def upload(file_path, description=None):
 7 |     if cloud_drive == "GOOGLE_DRIVE":
 8 |         from .google_drive import upload
 9 | 
10 |         return upload(file_path, description)
11 |     elif cloud_drive == "GOFILE":
12 |         from .gofile import upload
13 | 
14 |         return upload(file_path, description)
15 |     else:
16 |         from .anonfiles import upload
17 | 
18 |         return upload(file_path, description)
19 | 


--------------------------------------------------------------------------------
/lncrawl/utils/uploader/anonfiles.py:
--------------------------------------------------------------------------------
 1 | from requests import Session
 2 | 
 3 | 
 4 | # API Docs: https://anonfiles.com/docs/api
 5 | def upload(file_path, description):
 6 |     with Session() as sess:
 7 |         with open(file_path, "rb") as fp:
 8 |             response = sess.post(
 9 |                 "https://api.anonfiles.com/upload",
10 |                 files={"file": fp},
11 |                 stream=True,
12 |             )
13 |             response.raise_for_status()
14 |             return response.json()["data"]["file"]["url"]["full"]
15 | 


--------------------------------------------------------------------------------
/lncrawl/utils/uploader/gofile.py:
--------------------------------------------------------------------------------
 1 | from requests import Session
 2 | 
 3 | 
 4 | # API Docs: https://gofile.io/api
 5 | def upload(file_path, description=""):
 6 |     with Session() as sess:
 7 |         response = sess.get("https://api.gofile.io/getServer")
 8 |         response.raise_for_status()
 9 |         server_name = response.json()["data"]["server"]
10 | 
11 |         with open(file_path, "rb") as fp:
12 |             response = sess.post(
13 |                 f"https://{server_name}.gofile.io/uploadFile",
14 |                 files={"file": fp},
15 |                 stream=True,
16 |             )
17 |             response.raise_for_status()
18 |             return response.json()["data"]["downloadPage"]
19 | 


--------------------------------------------------------------------------------
/lncrawl/utils/uploader/google_drive.py:
--------------------------------------------------------------------------------
 1 | """[DEPRECATED] Uploader for google drive"""
 2 | import logging
 3 | import os
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | try:
 9 |     from pydrive.auth import GoogleAuth
10 |     from pydrive.drive import GoogleDrive
11 | except Exception:
12 |     logger.error("`pydrive` was not setup properly")
13 | 
14 | 
15 | def upload(file_path, description=None) -> str:
16 |     gauth = GoogleAuth()
17 |     # gauth.LocalWebserverAuth()
18 | 
19 |     # Try to load saved client credentials
20 |     credential_file = os.getenv("GOOGLE_DRIVE_CREDENTIAL_FILE")
21 |     gauth.LoadCredentialsFile(credential_file)
22 |     if gauth.credentials is None:
23 |         # Authenticate if they're not there
24 |         gauth.LocalWebserverAuth()
25 |     elif gauth.access_token_expired:
26 |         # Refresh them if expired
27 |         gauth.Refresh()
28 |     else:
29 |         # Initialize the saved creds
30 |         gauth.Authorize()
31 | 
32 |     # Save the current credentials to a file
33 |     gauth.SaveCredentialsFile(credential_file)
34 | 
35 |     drive = GoogleDrive(gauth)
36 |     folder_id = os.getenv("GOOGLE_DRIVE_FOLDER_ID")
37 |     filename_w_ext = os.path.basename(file_path)
38 |     filename, file_extension = os.path.splitext(filename_w_ext)
39 | 
40 |     # Upload file to folder
41 |     f = drive.CreateFile({"parents": [{"kind": "drive#fileLink", "id": folder_id}]})
42 |     f["title"] = filename_w_ext
43 | 
44 |     # Make sure to add the path to the file to upload below.
45 |     f.SetContentFile(file_path)
46 |     f.Upload()
47 | 
48 |     logger.info("Uploaded file id: {}", f["id"])
49 |     return "https://drive.google.com/open?id=" + f["id"]
50 | 


--------------------------------------------------------------------------------
/lncrawl/webdriver/__init__.py:
--------------------------------------------------------------------------------
 1 | # https://cloudbytes.dev/snippets/run-selenium-and-chrome-on-wsl2
 2 | # https://github.com/ultrafunkamsterdam/undetected-chromedriver
 3 | 
 4 | import logging
 5 | from typing import Optional
 6 | 
 7 | from selenium.webdriver import ChromeOptions
 8 | from selenium.webdriver.remote.webdriver import WebDriver
 9 | 
10 | from ..core.arguments import get_args
11 | from ..core.soup import SoupMaker
12 | from .local import create_local
13 | from .remote import create_remote
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def create_new(
19 |     options: Optional["ChromeOptions"] = None,
20 |     timeout: Optional[float] = None,
21 |     user_data_dir: Optional[str] = None,
22 |     soup_maker: Optional[SoupMaker] = None,
23 |     headless: bool = False,
24 |     **kwargs,
25 | ) -> WebDriver:
26 |     args = get_args()
27 |     if args.selenium_grid:
28 |         return create_remote(
29 |             address=args.selenium_grid,
30 |             options=options,
31 |             timeout=timeout,
32 |             soup_maker=soup_maker,
33 |         )
34 |     else:
35 |         return create_local(
36 |             options=options,
37 |             timeout=timeout,
38 |             soup_maker=soup_maker,
39 |             user_data_dir=user_data_dir,
40 |             headless=headless,
41 |         )
42 | 


--------------------------------------------------------------------------------
/lncrawl/webdriver/job_queue.py:
--------------------------------------------------------------------------------
 1 | import atexit
 2 | import logging
 3 | from threading import Semaphore, Thread
 4 | from typing import List, Optional
 5 | 
 6 | from selenium.webdriver.remote.webdriver import WebDriver
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | MAX_BROWSER_INSTANCES = 8
11 | 
12 | __open_browsers: List[WebDriver] = []
13 | __semaphore = Semaphore(MAX_BROWSER_INSTANCES)
14 | 
15 | 
16 | def __override_quit(driver: WebDriver):
17 |     __open_browsers.append(driver)
18 |     original = Thread(target=driver.quit, daemon=True)
19 | 
20 |     def override():
21 |         if driver in __open_browsers:
22 |             __semaphore.release()
23 |             __open_browsers.remove(driver)
24 |             logger.info("Destroyed instance: %s", driver.session_id)
25 |         if not original._started.is_set():  # type:ignore
26 |             original.start()
27 | 
28 |     driver.quit = override  # type:ignore
29 | 
30 | 
31 | def _acquire_queue(timeout: Optional[float] = None):
32 |     acquired = __semaphore.acquire(True, timeout)
33 |     if not acquired:
34 |         raise TimeoutError("Failed to acquire semaphore")
35 | 
36 | 
37 | def _release_queue(driver: WebDriver):
38 |     __override_quit(driver)
39 | 
40 | 
41 | def check_active(driver: WebDriver) -> bool:
42 |     if not isinstance(driver, WebDriver):
43 |         return False
44 |     return driver in __open_browsers
45 | 
46 | 
47 | def cleanup_drivers():
48 |     for driver in __open_browsers:
49 |         driver.close()
50 |         driver.quit()
51 | 
52 | 
53 | atexit.register(cleanup_drivers)
54 | 


--------------------------------------------------------------------------------
/requirements-app.txt:
--------------------------------------------------------------------------------
 1 | # app requirements
 2 | typer
 3 | ascii
 4 | regex
 5 | packaging
 6 | lxml[html-clean]
 7 | pyease-grpc>=1.6.0
 8 | python-dotenv>=0.15.0,<2.0.0
 9 | beautifulsoup4>=4.8.0,<5.0.0
10 | requests>=2.20.0,<2.33.0
11 | python-slugify>=4.0.0,<9.0.0
12 | colorama>=0.4.0,<0.5.0
13 | tqdm>=4.60,<5.0
14 | PyExecJS>=1.5.1,<2.0.0
15 | ebooklib>=0.17.0,<1.0.0
16 | pillow>=6.0.0
17 | cloudscraper>=1.2.71
18 | readability-lxml>=0.8.0,<1.0.0
19 | questionary>=1.6.0
20 | prompt-toolkit~=3.0
21 | html5lib~=1.1
22 | base58~=2.1.1
23 | python-box>=6.0.0,<8.0.0
24 | pycryptodome>=3.0.0,<4.0.0
25 | selenium>=3.141.0
26 | tenacity>=9.0.0
27 | 


--------------------------------------------------------------------------------
/requirements-bot.txt:
--------------------------------------------------------------------------------
 1 | # bot requirements
 2 | discord.py>=2.0.0
 3 | python-telegram-bot[job-queue]~=20.0
 4 | # pydrive>=1.3.1,<2.0.0
 5 | 
 6 | uvicorn
 7 | fastapi[standard]
 8 | cachetools
 9 | sqlmodel
10 | passlib[argon2]
11 | python-jose[cryptography]
12 | python-dateutil
13 | reflex
14 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # dev requirements
 2 | wheel
 3 | black
 4 | flake8
 5 | setuptools
 6 | pyinstaller
 7 | pycryptodome>=3.0.0,<4.0.0
 8 | 
 9 | types-tqdm
10 | types-colorama
11 | types-cachetools
12 | types-python-dateutil
13 | types-passlib
14 | types-python-jose


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # app requirements
 2 | typer
 3 | ascii
 4 | regex
 5 | packaging
 6 | lxml[html-clean]
 7 | pyease-grpc>=1.6.0
 8 | python-dotenv>=0.15.0,<2.0.0
 9 | beautifulsoup4>=4.8.0,<5.0.0
10 | requests>=2.20.0,<2.33.0
11 | python-slugify>=4.0.0,<9.0.0
12 | colorama>=0.4.0,<0.5.0
13 | tqdm>=4.60,<5.0
14 | PyExecJS>=1.5.1,<2.0.0
15 | ebooklib>=0.17.0,<1.0.0
16 | pillow>=6.0.0
17 | cloudscraper>=1.2.71
18 | readability-lxml>=0.8.0,<1.0.0
19 | questionary>=1.6.0
20 | prompt-toolkit~=3.0
21 | html5lib~=1.1
22 | base58~=2.1.1
23 | python-box>=6.0.0,<8.0.0
24 | pycryptodome>=3.0.0,<4.0.0
25 | selenium>=3.141.0
26 | tenacity>=9.0.0
27 | 
28 | # bot requirements
29 | discord.py>=2.0.0
30 | python-telegram-bot[job-queue]~=20.0
31 | uvicorn
32 | fastapi[standard]
33 | cachetools
34 | sqlmodel
35 | passlib[argon2]
36 | python-jose[cryptography]
37 | python-dateutil
38 | reflex
39 | 
40 | # dev requirements
41 | wheel
42 | black
43 | flake8
44 | tk-tools
45 | setuptools
46 | pyinstaller
47 | types-tqdm
48 | types-colorama
49 | types-cachetools
50 | types-python-dateutil
51 | types-passlib
52 | types-python-jose


--------------------------------------------------------------------------------
/res/lncrawl-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/res/lncrawl-icon.png


--------------------------------------------------------------------------------
/res/lncrawl-web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/res/lncrawl-web.png


--------------------------------------------------------------------------------
/res/lncrawl.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/res/lncrawl.ico


--------------------------------------------------------------------------------
/scripts/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim-bookworm
 2 | 
 3 | USER root
 4 | # Install general dependencies
 5 | RUN apt-get update -yq \
 6 |     && apt-get install -yq \
 7 |     wget tar xz-utils make cmake g++ libffi-dev libegl1 libopengl0 libxcb-cursor0 \
 8 |     libnss3 libgl1-mesa-glx libxcomposite1 libxrandr2 libxi6 fontconfig \
 9 |     libxkbcommon-x11-0 libxtst6 libxkbfile1 libxcomposite-dev libxdamage-dev \
10 |     && rm -rf /var/lib/apt/lists/* \
11 |     && apt-get clean autoclean \
12 |     && apt-get autoremove -yq
13 | 
14 | # Install calibre
15 | RUN wget -nv -O- https://download.calibre-ebook.com/linux-installer.sh | sh /dev/stdin \
16 |     && ln -s /opt/calibre/ebook-convert /usr/local/bin/ebook-convert
17 | 
18 | # Add app user
19 | RUN useradd -ms /bin/bash lncrawl
20 | USER lncrawl
21 | 
22 | # Install global requirements
23 | RUN alias python=python3
24 | RUN alias pip=pip3
25 | RUN export PATH="/home/lncrawl/.local/bin:$PATH"
26 | RUN pip install -U pip wheel
27 | 
28 | WORKDIR /app
29 | 
30 | # Install app requirements
31 | COPY --chown=lncrawl:lncrawl requirements.txt .
32 | RUN pip install -r requirements.txt
33 | 
34 | COPY .env .env
35 | COPY lncrawl lncrawl
36 | COPY sources sources
37 | 
38 | ENV OUTPUT_PATH=/home/lncrawl/output
39 | RUN mkdir -p $OUTPUT_PATH
40 | 
41 | ENTRYPOINT [ "python", "-m", "lncrawl" ]
42 | 


--------------------------------------------------------------------------------
/scripts/bitanon.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | VERSION=$(head -n 1 lncrawl/VERSION)
 4 | 
 5 | # SHLINK_API_KEY=
 6 | 
 7 | 
 8 | EXE_LINK="https://github.com/dipu-bd/lightnovel-crawler/releases/download/v$VERSION/lncrawl.exe"
 9 | EXE_TITLE="Lightnovel Crawler v$VERSION (Windows)"
10 | 
11 | LINUX_LINK="https://github.com/dipu-bd/lightnovel-crawler/releases/download/v$VERSION/lncrawl-linux"
12 | LINUX_TITLE="Lightnovel Crawler v$VERSION (Linux)"
13 | 
14 | MAC_LINK="https://github.com/dipu-bd/lightnovel-crawler/releases/download/v$VERSION/lncrawl-mac"
15 | MAC_TITLE="Lightnovel Crawler v$VERSION (Mac)"
16 | 
17 | set -ex
18 | 
19 | curl -X 'PATCH' \
20 |   'https://go.bitanon.dev/rest/v3/short-urls/lncrawl-windows' \
21 |   -H 'accept: application/json' \
22 |   -H 'Content-Type: application/json' \
23 |   -H "X-Api-Key: $SHLINK_API_KEY" \
24 |   -d '{"title": "'"$EXE_TITLE"'","longUrl": "'"$EXE_LINK"'"}'
25 | 
26 | curl -X 'PATCH' \
27 |   'https://go.bitanon.dev/rest/v3/short-urls/lncrawl-linux' \
28 |   -H 'accept: application/json' \
29 |   -H 'Content-Type: application/json' \
30 |   -H "X-Api-Key: $SHLINK_API_KEY" \
31 |   -d '{"title": "'"$LINUX_TITLE"'","longUrl": "'"$LINUX_LINK"'"}'
32 |   
33 | curl -X 'PATCH' \
34 |   'https://go.bitanon.dev/rest/v3/short-urls/lncrawl-mac' \
35 |   -H 'accept: application/json' \
36 |   -H 'Content-Type: application/json' \
37 |   -H "X-Api-Key: $SHLINK_API_KEY" \
38 |   -d '{"title": "'"$MAC_TITLE"'","longUrl": "'"$MAC_LINK"'"}'
39 |   


--------------------------------------------------------------------------------
/scripts/build.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF 
 2 | 
 3 | SET /P VERSION=<lncrawl\VERSION
 4 | 
 5 | SET PY=python
 6 | SET PIP=%PY% -m pip --disable-pip-version-check
 7 | 
 8 | RD /S /Q "dist" ".venv" "build" "lightnovel_crawler.egg-info" &
 9 | 
10 | %PY% -m venv .venv
11 | CALL .venv\Scripts\activate.bat
12 | 
13 | %PIP% install -U pip wheel setuptools
14 | %PIP% install -r requirements-dev.txt
15 | %PIP% install -r requirements-app.txt
16 | 
17 | %PY% setup.py clean bdist_wheel package
18 | 
19 | CALL venv\Scripts\deactivate.bat
20 | RD /S /Q ".venv" "build" "lightnovel_crawler.egg-info" &
21 | 
22 | ECHO ON
23 | 


--------------------------------------------------------------------------------
/scripts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | VERSION=$(head -n 1 ./lncrawl/VERSION)
 4 | 
 5 | rm -rf .venv build dist *.egg-info
 6 | 
 7 | python3 -m venv .venv
 8 | PY=".venv/bin/python"
 9 | PIP=".venv/bin/pip --disable-pip-version-check"
10 | 
11 | $PIP install -U pip wheel setuptools
12 | $PIP install -r requirements-dev.txt
13 | $PIP install -r requirements-app.txt
14 | 
15 | $PY setup.py clean bdist_wheel package
16 | 
17 | deactivate
18 | rm -rf .venv build *.egg-info
19 | 
20 | # FINISHED
21 | 


--------------------------------------------------------------------------------
/scripts/entry_point.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #==============================================
 4 | # OpenShift or non-sudo environments support
 5 | # https://docs.openshift.com/container-platform/3.11/creating_images/guidelines.html#openshift-specific-guidelines
 6 | #==============================================
 7 | 
 8 | if ! whoami &> /dev/null; then
 9 |   if [ -w /etc/passwd ]; then
10 |     echo "${USER_NAME:-default}:x:$(id -u):0:${USER_NAME:-default} user:${HOME}:/sbin/nologin" >> /etc/passwd
11 |   fi
12 | fi
13 | 
14 | /usr/bin/supervisord --configuration /etc/supervisord.conf &
15 | SUPERVISOR_PID=$!
16 | 
17 | 
18 | function shutdown {
19 |     echo "Trapped SIGTERM/SIGINT/x so shutting down supervisord..."
20 |     kill -s SIGTERM ${SUPERVISOR_PID}
21 |     wait ${SUPERVISOR_PID}
22 |     echo "Shutdown complete"
23 | }
24 | 
25 | trap shutdown SIGTERM SIGINT
26 | 
27 | sleep 30
28 | exec /usr/bin/python3 -m lncrawl --suppress $@ &
29 | wait ${SUPERVISOR_PID}
30 | 


--------------------------------------------------------------------------------
/scripts/lint.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 | 
3 | flake8 -v --count --show-source --statistics


--------------------------------------------------------------------------------
/scripts/lint.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | 
3 | flake8 -v --count --show-source --statistics
4 | 


--------------------------------------------------------------------------------
/scripts/lncrawl.service:
--------------------------------------------------------------------------------
 1 | [Unit]
 2 | Description=Lightnovel Crawler
 3 | After=network-online.target
 4 | Wants=network-online.target
 5 | 
 6 | [Service]
 7 | User=dipu
 8 | WorkingDirectory=/home/dipu/projects/lightnovel-crawler
 9 | RestartSec=2s
10 | Restart=always
11 | ExecStart=/bin/bash ./scripts/start.sh
12 | ExecStop=/bin/bash ./scripts/stop.sh
13 | 
14 | [Install]
15 | WantedBy=multi-user.target
16 | Alias=lncrawl.service
17 | 


--------------------------------------------------------------------------------
/scripts/publish.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | SET /P VERSION=<lncrawl\VERSION
 4 | 
 5 | SET PY=python
 6 | SET PIP=%PY% -m pip --disable-pip-version-check
 7 | 
 8 | REM CALL scripts\build.bat
 9 | 
10 | %PIP% install twine
11 | %PY% -m twine upload "dist\lightnovel_crawler-%VERSION%-py3-none-any.whl"
12 | 
13 | ECHO ON
14 | 


--------------------------------------------------------------------------------
/scripts/publish.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VERSION=$(head -n 1 lncrawl/VERSION)
 4 | 
 5 | PY="python3"
 6 | PIP="$PY -m pip --disable-pip-version-check"
 7 | 
 8 | # . scripts/build.sh
 9 | 
10 | $PIP install twine
11 | $PY -m twine upload "dist/lightnovel_crawler-$VERSION-py3-none-any.whl"
12 | 
13 | # FINISHED
14 | 


--------------------------------------------------------------------------------
/scripts/push_tag.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | SET /P VERSION=<lncrawl\VERSION
 4 | 
 5 | git pull --rebase
 6 | 
 7 | git tag "v%VERSION%"
 8 | git push --tags
 9 | 
10 | ECHO ON
11 | 


--------------------------------------------------------------------------------
/scripts/push_tag.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | VERSION=$(head -n 1 lncrawl/VERSION)
4 | 
5 | git pull --rebase
6 | 
7 | git tag "v$VERSION"
8 | git push --tags
9 | 


--------------------------------------------------------------------------------
/scripts/push_tag_force.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | SET /P VERSION=<lncrawl\VERSION
 4 | 
 5 | git pull --rebase
 6 | 
 7 | git push --delete origin "v%VERSION%"
 8 | git tag -d "v%VERSION%"
 9 | git tag "v%VERSION%"
10 | git push --tags
11 | 
12 | ECHO ON
13 | 


--------------------------------------------------------------------------------
/scripts/push_tag_force.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VERSION=$(head -n 1 lncrawl/VERSION)
 4 | 
 5 | git pull --rebase
 6 | 
 7 | git push --delete origin "v$VERSION"
 8 | git tag -d "v$VERSION"
 9 | git tag "v$VERSION"
10 | git push --tags
11 | 


--------------------------------------------------------------------------------
/scripts/rebrandly.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh
 2 | 
 3 | VERSION=$(head -n 1 lncrawl/VERSION)
 4 | 
 5 | #REBRANDLY_API_KEY=
 6 | 
 7 | LINUX_LINK_ID=d29bfd6a75b34993b77ac7d69869eefd
 8 | LINUX_LINK="https://github.com/dipu-bd/lightnovel-crawler/releases/download/v$VERSION/lncrawl"
 9 | LINUX_TITLE="Lightnovel Crawler v$VERSION (Linux)"
10 | 
11 | EXE_LINK_ID=8e556b9bb13e456c9bbe2c4e29aa0833
12 | EXE_LINK="https://github.com/dipu-bd/lightnovel-crawler/releases/download/v$VERSION/lncrawl.exe"
13 | EXE_TITLE="Lightnovel Crawler v$VERSION.exe"
14 | 
15 | set -ex
16 | curl --request POST \
17 |      --url "https://api.rebrandly.com/v1/links/$LINUX_LINK_ID" \
18 |      --header 'Accept: application/json' \
19 |      --header 'Content-Type: application/json' \
20 |      --header "apikey: $REBRANDLY_API_KEY" \
21 |      --data '{"destination": "'"$LINUX_LINK"'","title": "'"$LINUX_TITLE"'"}'
22 | 
23 | curl --request POST \
24 |      --url "https://api.rebrandly.com/v1/links/$EXE_LINK_ID" \
25 |      --header 'Accept: application/json' \
26 |      --header 'Content-Type: application/json' \
27 |      --header "apikey: $REBRANDLY_API_KEY" \
28 |      --data '{"destination": "'"$EXE_LINK"'","title": "'"$EXE_TITLE"'"}'
29 | 


--------------------------------------------------------------------------------
/scripts/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | export LC_ALL="en_US.UTF-8"
 3 | 
 4 | shards=2
 5 | curdir="$(dirname "$(readlink -f "$0")")"
 6 | cd "$(dirname "$curdir")"
 7 | echo "Workdir: $(pwd)"
 8 | 
 9 | echo "Fetch updates..."
10 | git pull origin $(git rev-parse --abbrev-ref HEAD) --rebase
11 | 
12 | echo "Setup virtual environment..."
13 | if [ ! -d venv ]; then
14 |     echo "Creating new venv"
15 |     python3 -m venv venv
16 | fi
17 | 
18 | echo "Install requirements..."
19 | ./venv/bin/python -m pip install -U pip wheel
20 | ./venv/bin/python -m pip install -U -r requirements.txt
21 | 
22 | echo "Stopping previous instances..."
23 | /bin/bash scripts/stop.sh
24 | 
25 | echo "Starting $shards shards..."
26 | for i in $(seq $shards)
27 | do
28 |     echo "Starting shard $((i-1)) of $shards shards..." &&
29 |     ./venv/bin/python lncrawl --bot discord --shard-id $((i-1)) --shard-count $shards &&
30 |     echo "Stopped shard $((i-1)) of $shards shards." &
31 | done
32 | wait
33 | 
34 | echo "Force stop remaining instances..."
35 | /bin/bash scripts/stop.sh
36 | 


--------------------------------------------------------------------------------
/scripts/stop.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pgrep ebook-convert | xargs kill -9 >/dev/null 2>&1
4 | pgrep python -a | grep "discord" | awk '{print $1}' | xargs kill -9 >/dev/null 2>&1
5 | echo "Stopped all discord bots."
6 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # https://setuptools.readthedocs.io/en/latest/userguide/declarative_config.html
 2 | 
 3 | [metadata]
 4 | name = lightnovel-crawler
 5 | version = file: lncrawl/VERSION
 6 | author = Sudipto Chandra
 7 | author_email = dipu.sudipta@gmail.com
 8 | url = https://github.com/dipu-bd/lightnovel-crawler
 9 | description = An app to download novels from online sources and generate e-books.
10 | long_description = file: README.pip
11 | long_description_content_type = text/markdown
12 | license = Apache 2.0
13 | license_file = LICENSE
14 | platforms = any
15 | keywords = lightnovel, crawler, lncrawl, novel, pdf, epub, mobi, scraper
16 | classifiers =
17 |     Development Status :: 5 - Production/Stable
18 |     License :: OSI Approved :: Apache Software License
19 |     Natural Language :: English
20 |     Intended Audience :: End Users/Desktop
21 |     Programming Language :: Python :: 3 :: Only
22 |     Programming Language :: Python :: 3.8
23 |     Programming Language :: Python :: 3.9
24 |     Programming Language :: Python :: 3.10
25 |     Programming Language :: Python :: 3.11
26 |     Programming Language :: Python :: 3.12
27 |     Topic :: Games/Entertainment
28 |     Environment :: Console
29 | project_urls =
30 |     "Source Code" = https://github.com/dipu-bd/lightnovel-crawler
31 |     "Issue Tracker" = https://github.com/dipu-bd/lightnovel-crawler/issues
32 |     "Documentation" = https://github.com/dipu-bd/lightnovel-crawler/blob/master/README.md
33 | 
34 | [options]
35 | zip_safe = False
36 | python_requires = >= 3.8
37 | include_package_data = True
38 | 
39 | [options.entry_points]
40 | console_scripts =
41 |     lncrawl = lncrawl:main
42 |     lightnovel_crawler = lncrawl:main
43 |     lightnovel-crawler = lncrawl:main
44 | 
45 | [flake8]
46 | #select=E9,F63,F7,F82
47 | ignore=E203,E265,E265,W503
48 | indent-size=4
49 | max-line-length=150
50 | exclude =  .git, .eggs, __pycache__, tests/, docs/, build/, dist/, res/, venv/, venv-win/, venv36/, venv-linux/
51 | # max-complexity=10
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | from glob import glob
 5 | 
 6 | if sys.version_info[:2] < (3, 8):
 7 |     raise RuntimeError("Lightnovel crawler only supports Python 3.8 and later.")
 8 | 
 9 | try:
10 |     from setuptools import config, setup
11 | except ImportError:
12 |     print("Run `pip install setuptools`")
13 |     exit(1)
14 | 
15 | 
16 | def parse_requirements(filename):
17 |     with open(filename, "r", encoding="utf8") as f:
18 |         requirements = f.read().strip().split("\n")
19 |         requirements = [
20 |             r.strip() for r in requirements if r.strip() and not r.startswith("#")
21 |         ]
22 |         return requirements
23 | 
24 | 
25 | def is_ignored(fname: str):
26 |     try:
27 |         status = os.popen(f"git check-ignore {fname}").read()
28 |         return bool(status.strip())
29 |     except Exception:
30 |         return False
31 | 
32 | 
33 | run_pyi = "package" in sys.argv
34 | if run_pyi:
35 |     sys.argv.remove("package")
36 | 
37 | if len(sys.argv) == 1:
38 |     sys.argv += ["build"]
39 | 
40 | lncrawl_files = []
41 | lncrawl_packages = ["lncrawl"]
42 | for fname in glob("lncrawl/**/*", recursive=True):
43 |     if os.path.isdir(fname) and not is_ignored(fname):
44 |         lncrawl_packages.append(".".join(fname.split(os.sep)))
45 |     if os.path.isfile(fname) and not is_ignored(fname):
46 |         lncrawl_files.append("/".join(fname.split(os.sep)[1:]))
47 | 
48 | sources_files = []
49 | sources_packages = ["sources"]
50 | for fname in glob("sources/**/*", recursive=True):
51 |     if os.path.isdir(fname) and not is_ignored(fname):
52 |         sources_packages.append(".".join(fname.split(os.sep)))
53 |     if os.path.isfile(fname) and not is_ignored(fname):
54 |         sources_files.append("/".join(fname.split(os.sep)[1:]))
55 | 
56 | config.read_configuration("setup.cfg")
57 | 
58 | setup(
59 |     install_requires=parse_requirements("requirements-app.txt"),
60 |     packages=lncrawl_packages + sources_packages,
61 |     package_data={
62 |         "lncrawl": lncrawl_files,
63 |         "sources": sources_files,
64 |     },
65 | )
66 | 
67 | if run_pyi:
68 |     from setup_pyi import package
69 | 
70 |     package()
71 | 


--------------------------------------------------------------------------------
/sources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dipu-bd/lightnovel-crawler/1ed455e6436ec7d9d6c7a497f621c6ba58f1a1b7/sources/__init__.py


--------------------------------------------------------------------------------
/sources/ar/kolnovel.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.mangastream import MangaStreamTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Kolnovel(MangaStreamTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://kolnovel.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/8/88tang.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class TangEatDrinkRead(Crawler):
11 |     base_url = "https://88tangeatdrinkread.wordpress.com/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         title = soup.select_one("h1.entry-title").text
18 |         self.novel_title = title.rsplit("~", 1)[0].strip()
19 |         logger.debug("Novel title = %s", self.novel_title)
20 | 
21 |         self.novel_author = "by 88 Tang"
22 |         logger.info("Novel author: %s", self.novel_author)
23 | 
24 |         # Removes none TOC links.
25 |         toc_parts = soup.select_one(".entry-content")
26 |         for notoc in toc_parts.select(".sharedaddy, .code-block, script, .adsbygoogle"):
27 |             notoc.extract()
28 | 
29 |         # Extract volume-wise chapter entries
30 |         # TODO: Chapter title are url links, it's the way translator formatted website.
31 |         chapters = soup.select(
32 |             '.entry-content a[href*="88tangeatdrinkread.wordpress.com"]'
33 |         )
34 | 
35 |         for a in chapters:
36 |             chap_id = len(self.chapters) + 1
37 |             vol_id = 1 + len(self.chapters) // 100
38 |             if len(self.volumes) < vol_id:
39 |                 self.volumes.append({"id": vol_id})
40 |             self.chapters.append(
41 |                 {
42 |                     "id": chap_id,
43 |                     "volume": vol_id,
44 |                     "url": self.absolute_url(a["href"]),
45 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
46 |                 }
47 |             )
48 | 
49 |     def download_chapter_body(self, chapter):
50 |         soup = self.get_soup(chapter["url"])
51 |         contents = soup.select_one("div.entry-content")
52 |         return self.cleaner.extract_contents(contents)
53 | 


--------------------------------------------------------------------------------
/sources/en/a/allnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class AllNovelCrawler(NovelFullTemplate):
10 |     base_url = [
11 |         "https://allnovel.org/",
12 |         "https://www.allnovel.org/",
13 |         "https://allnovelxo.com/"
14 |     ]
15 | 
16 |     def initialize(self) -> None:
17 |         self.cleaner.bad_tags.update(["h3"])
18 | 


--------------------------------------------------------------------------------
/sources/en/a/allnovelfull.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class AllNovelFullCrawler(NovelFullTemplate):
10 |     base_url = [
11 |         "https://allnovelfull.com/",
12 |         "https://allnovelfull.net/"
13 |     ]
14 | 


--------------------------------------------------------------------------------
/sources/en/a/anythingnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class AnythingNovelCrawler(Crawler):
 9 |     base_url = "https://anythingnovel.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         self.novel_title = soup.select("#wrap .breadcrumbs span")[-1].text.strip()
16 |         logger.info("Novel title: %s", self.novel_title)
17 | 
18 |         possible_novel_cover = soup.select_one("#content a img")
19 |         if possible_novel_cover:
20 |             self.novel_cover = self.absolute_url(possible_novel_cover["src"])
21 |         logger.info("Novel cover: %s", self.novel_cover)
22 | 
23 |         volumes = set([])
24 |         for a in reversed(soup.select("#content div li a")):
25 |             title = a.text.strip()
26 |             chapter_id = len(self.chapters) + 1
27 |             volume_id = 1 + (chapter_id - 1) // 100
28 |             volumes.add(volume_id)
29 |             self.chapters.append(
30 |                 {
31 |                     "id": chapter_id,
32 |                     "volume": volume_id,
33 |                     "title": title,
34 |                     "url": a["href"],
35 |                 }
36 |             )
37 | 
38 |         self.chapters.sort(key=lambda x: x["id"])
39 |         self.volumes = [{"id": x, "title": ""} for x in volumes]
40 | 
41 |     def download_chapter_body(self, chapter):
42 |         soup = self.get_soup(chapter["url"])
43 |         content = soup.select_one("div#content")
44 |         self.cleaner.clean_contents(content)
45 |         body = content.select("p")
46 |         body = [str(p) for p in body if self.should_take(p)]
47 |         return "<p>" + "</p><p>".join(body) + "</p>"
48 | 
49 |     def should_take(self, p):
50 |         txt = p.text.strip().lower()
51 |         return txt and txt != "advertisement"
52 | 


--------------------------------------------------------------------------------
/sources/en/a/arcanetranslations.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.mangastream import MangaStreamTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Arcanetranslations(MangaStreamTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://arcanetranslations.com/"]
12 | 
13 |     def select_chapter_body(self, tag):
14 |         result = super().select_chapter_body(tag)
15 |         if "Login to buy access to this content" in result.text:
16 |             raise Exception(
17 |                 "This content is behind a paywall. Please login to access it."
18 |             )
19 |         return result
20 | 


--------------------------------------------------------------------------------
/sources/en/a/asianhobbyist.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class AsianHobbyistCrawler(Crawler):
10 |     base_url = "https://www.asianhobbyist.com/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url)
15 | 
16 |         possible_title = soup.select_one("h1.entry-title")
17 |         assert possible_title, "No novel title"
18 |         self.novel_title = possible_title.text
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         possible_image = soup.select_one(".main-wrap .background img[data-lazy-src]")
22 |         if possible_image:
23 |             self.novel_cover = self.absolute_url(possible_image["data-lazy-src"])
24 |         logger.info("Novel cover: %s", self.novel_cover)
25 | 
26 |         for a in soup.select(".divTable .tableBody div.fn a"):
27 |             title = a.text.strip()
28 |             chap_id = len(self.chapters) + 1
29 |             self.chapters.append(
30 |                 {
31 |                     "id": chap_id,
32 |                     "title": title,
33 |                     "url": self.absolute_url(a["href"]),
34 |                 }
35 |             )
36 | 
37 |     def download_chapter_body(self, chapter):
38 |         soup = self.get_soup(chapter["url"])
39 |         content = soup.select_one(".entry-content")
40 |         self.cleaner.extract_contents(content)
41 |         return content.decode_contents()
42 | 


--------------------------------------------------------------------------------
/sources/en/a/asianovel.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | # Created using AsianHobbyist as a template.
10 | class AsianNovelCrawler(Crawler):
11 |     base_url = "https://read.asianovel.com/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         novel_toc_url = self.novel_url + "/table-of-contents"
16 |         soup = self.get_soup(novel_toc_url)
17 | 
18 |         possible_title = soup.select_one(".novel-description-full")
19 |         assert possible_title, "No novel title"
20 | 
21 |         self.novel_title = possible_title.get_text()
22 |         logger.info("Novel title: %s", self.novel_title)
23 | 
24 |         possible_image = soup.select_one("article:first-of-type .row img")
25 |         if possible_image:
26 |             self.novel_cover = self.absolute_url(possible_image["src"])
27 | 
28 |         logger.info("Novel cover: %s", self.novel_cover)
29 | 
30 |         self.volumes.append({"id": 1})
31 |         for a in soup.select("#toc > div a"):
32 |             title = a.select_one("div:first-of-type").get_text().strip()
33 | 
34 |             chap_id = len(self.chapters) + 1
35 |             match = re.findall(r"ch(apter)? (\d+)", title, re.IGNORECASE)
36 |             if len(match) == 1:
37 |                 chap_id = int(match[0][1])
38 | 
39 |             self.chapters.append(
40 |                 {
41 |                     "volume": 1,
42 |                     "id": chap_id,
43 |                     "title": title,
44 |                     "url": self.absolute_url(a["href"]),
45 |                 }
46 |             )
47 | 
48 |     def download_chapter_body(self, chapter):
49 |         logger.debug("Visiting %s", chapter["url"])
50 |         soup = self.get_soup(chapter["url"])
51 | 
52 |         content = soup.select_one("#story")
53 |         self.cleaner.clean_contents(content)
54 | 
55 |         return "".join([str(p) for p in content.select("p") if len(p.text.strip()) > 1])
56 | 


--------------------------------------------------------------------------------
/sources/en/b/boxnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.madara import MadaraTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class BoxNovelCrawler(MadaraTemplate):
10 |     base_url = ["https://boxnovel.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.cleaner.bad_css.update(
14 |             [
15 |                 ".para-comment",
16 |                 ".j_open_para_comment",
17 |                 ".j_para_comment_count",
18 |                 ".para-comment-num",
19 |                 "#wp-manga-current-chap",
20 |                 ".cha-tit",
21 |                 ".subtitle ",
22 |             ]
23 |         )
24 |         self.cleaner.bad_tag_text_pairs.update(
25 |             {
26 |                 "p": r"Thank you for reading on myboxnovel.com"
27 |             }
28 |         )
29 | 


--------------------------------------------------------------------------------
/sources/en/c/chickengege.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | from lncrawl.core.exeptions import LNException
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class ChickenGegeCrawler(Crawler):
13 |     base_url = ["https://www.chickengege.org/"]
14 | 
15 |     def initialize(self) -> None:
16 |         self.cleaner.bad_css.update([".m-a-box", ".m-a-box-container"])
17 | 
18 |     def read_novel_info(self):
19 |         soup = self.get_soup(self.novel_url)
20 | 
21 |         title_tag = soup.select_one("h1.entry-title")
22 |         if not isinstance(title_tag, Tag):
23 |             raise LNException("No title found")
24 | 
25 |         self.novel_title = title_tag.text.strip()
26 | 
27 |         image_tag = soup.select_one("img.novelist-cover-image")
28 |         if isinstance(image_tag, Tag):
29 |             self.novel_cover = self.absolute_url(image_tag["src"])
30 | 
31 |         logger.info("Novel cover: %s", self.novel_cover)
32 | 
33 |         for a in soup.select("ul#novelList a, ul#extraList a, table#novelList a"):
34 |             self.chapters.append(
35 |                 {
36 |                     "id": len(self.chapters) + 1,
37 |                     "title": a.text.strip(),
38 |                     "url": self.absolute_url(a["href"]),
39 |                 }
40 |             )
41 | 
42 |     def download_chapter_body(self, chapter):
43 |         soup = self.get_soup(chapter["url"])
44 |         contents = soup.select_one("article div.entry-content")
45 |         self.cleaner.clean_contents(contents)
46 | 
47 |         return str(contents)
48 | 


--------------------------------------------------------------------------------
/sources/en/c/ckandawrites.online.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.mangastream import MangaStreamTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class CkandawritesOnline(MangaStreamTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://ckandawrites.online/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/c/crescentmoon.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class CrescentMoonCrawler(Crawler):
11 |     base_url = "https://crescentmoon.blog/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         self.novel_title = soup.find("h1", {"class": "entry-title"}).text.strip()
18 |         logger.info("Novel title: %s", self.novel_title)
19 | 
20 |         self.novel_cover = self.absolute_url(
21 |             soup.select_one("div.entry-content p a")["href"]
22 |         )
23 |         logger.info("Novel cover: %s", self.novel_cover)
24 | 
25 |         self.novel_author = soup.select("div.entry-content p")[2].text.strip()
26 |         logger.info("Novel author: %s", self.novel_author)
27 | 
28 |         toc = None
29 |         a = soup.select("div.entry-content p")
30 |         for idx, item in enumerate(a):
31 |             if "table of contents" in item.text.strip().lower():
32 |                 toc = a[idx + 1]
33 |         assert toc, "No table of contents"
34 | 
35 |         for x in toc.find_all("a"):
36 |             chap_id = len(self.chapters) + 1
37 |             vol_id = 1 + len(self.chapters) // 100
38 |             if len(self.volumes) < vol_id:
39 |                 self.volumes.append({"id": vol_id})
40 |             self.chapters.append(
41 |                 {
42 |                     "id": chap_id,
43 |                     "volume": vol_id,
44 |                     "url": self.absolute_url(x["href"]),
45 |                     "title": x.text.strip() or ("Chapter %d" % chap_id),
46 |                 }
47 |             )
48 | 
49 |     def download_chapter_body(self, chapter):
50 |         soup = self.get_soup(chapter["url"])
51 |         contents = soup.select("div.entry-content")
52 |         return self.cleaner.extract_contents(contents)
53 | 


--------------------------------------------------------------------------------
/sources/en/d/dmtrans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class DMTranslations(Crawler):
11 |     base_url = [
12 |         "https://dmtranslationscn.com/",
13 |     ]
14 | 
15 |     def read_novel_info(self):
16 |         logger.debug("Visiting %s", self.novel_url)
17 |         soup = self.get_soup(self.novel_url)
18 | 
19 |         possible_title = soup.select_one(".entry-title")
20 |         assert possible_title, "No novel title"
21 |         self.novel_title = possible_title.text.strip()
22 |         logger.info("Novel title: %s", self.novel_title)
23 | 
24 |         possible_image = soup.select_one("div.entry-content p img")
25 |         if possible_image:
26 |             self.novel_cover = self.absolute_url(possible_image["src"])
27 |         logger.info("Novel cover: %s", self.novel_cover)
28 | 
29 |         self.novel_author = "Translated by DM Translations"
30 |         logger.info("Novel author: %s", self.novel_author)
31 | 
32 |         # Extract volume-wise chapter entries
33 |         chapters = soup.find("div", {"class": "entry-content"}).findAll("a")
34 | 
35 |         for a in chapters:
36 |             chap_id = len(self.chapters) + 1
37 |             vol_id = 1 + len(self.chapters) // 100
38 |             if len(self.volumes) < vol_id:
39 |                 self.volumes.append({"id": vol_id})
40 |             self.chapters.append(
41 |                 {
42 |                     "id": chap_id,
43 |                     "volume": vol_id,
44 |                     "url": self.absolute_url(a["href"]),
45 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
46 |                 }
47 |             )
48 | 
49 |     def download_chapter_body(self, chapter):
50 |         soup = self.get_soup(chapter["url"])
51 | 
52 |         body_parts = soup.select_one("div.entry-content")
53 | 
54 |         for content in body_parts.select("p"):
55 |             for bad in ["Translator- DM", "Previous Chapter", "Next Chapter"]:
56 |                 if bad in content.text:
57 |                     content.extract()
58 | 
59 |         return self.cleaner.extract_contents(body_parts)
60 | 


--------------------------------------------------------------------------------
/sources/en/e/ebotnovel.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.mangastream import MangaStreamTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Ebotnovel(MangaStreamTemplate):
 9 |     has_mtl = True
10 |     has_manga = False
11 |     base_url = ["https://ebotnovel.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/e/exiledrebels.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class ExiledRebelsScanlations(Crawler):
11 |     base_url = "https://exiledrebelsscanlations.com/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         self.novel_title = soup.find("h1", {"class": "entry-title"}).text.strip()
18 |         logger.info("Novel title: %s", self.novel_title)
19 | 
20 |         possible_image = soup.select_one(".post-thumbnail img")
21 |         if possible_image:
22 |             self.novel_cover = self.absolute_url(possible_image["src"])
23 |         logger.info("Novel cover: %s", self.novel_cover)
24 | 
25 |         self.novel_author = "Translated by ExR"
26 |         logger.info("Novel author: %s", self.novel_author)
27 | 
28 |         # Extract volume-wise chapter entries
29 |         # Stops external links being selected as chapters
30 |         chapters = soup.select(
31 |             'div.lcp_catlist p [href*="exiledrebelsscanlations.com/"]'
32 |         )
33 | 
34 |         for a in chapters:
35 |             chap_id = len(self.chapters) + 1
36 |             vol_id = 1 + len(self.chapters) // 100
37 |             if len(self.volumes) < vol_id:
38 |                 self.volumes.append({"id": vol_id})
39 |             self.chapters.append(
40 |                 {
41 |                     "id": chap_id,
42 |                     "volume": vol_id,
43 |                     "url": self.absolute_url(a["href"]),
44 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
45 |                 }
46 |             )
47 | 
48 |     def download_chapter_body(self, chapter):
49 |         soup = self.get_soup(chapter["url"])
50 |         contents = soup.select_one("div#wtr-content")
51 |         return self.cleaner.extract_contents(contents)
52 | 


--------------------------------------------------------------------------------
/sources/en/f/fenrirtranslations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | from lncrawl.templates.madara import MadaraTemplate
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class FenrirTranslationsCrawler(MadaraTemplate):
11 |     base_url = ["https://fenrirtranslations.com/"]
12 | 
13 |     def initialize(self) -> None:
14 |         self.cleaner.bad_css.update(
15 |             [
16 |                 "div.chapter-warning",
17 |                 "div.code-block"
18 |             ]
19 |         )
20 | 
21 |     def parse_authors(self, soup: BeautifulSoup):
22 |         for a in soup.select('.manga-authors a[href*="author"]'):
23 |             yield a.text.strip()
24 | 
25 |     def parse_summary(self, soup):
26 |         possible_summary = soup.select_one(".manga-summary")
27 |         return self.cleaner.extract_contents(possible_summary)
28 | 
29 |     def select_chapter_tags(self, soup: BeautifulSoup):
30 |         try:
31 |             clean_novel_url = self.novel_url.split("?")[0].strip("/")
32 |             response = self.submit_form(f"{clean_novel_url}/ajax/chapters/")
33 |             soup = self.make_soup(response)
34 |             chapters = soup.select(".free ul.main .wp-manga-chapter a")
35 |             yield from reversed(chapters)
36 |         except Exception as e:
37 |             logger.debug("Failed to fetch chapters using ajax", e)
38 | 


--------------------------------------------------------------------------------
/sources/en/f/freelightnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | import re
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class FreeLightNovel(Crawler):
11 |     base_url = "https://www.freelightnovel.com/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         possible_title = soup.select_one("h1.page-header")
18 |         assert possible_title, "No novel title"
19 |         self.novel_title = possible_title.text
20 |         logger.info("Novel title: %s", self.novel_title)
21 | 
22 |         possible_image = soup.select_one(".content img.img-responsive")
23 |         if possible_image:
24 |             self.novel_cover = self.absolute_url(possible_image["src"])
25 |         logger.info("Novel cover: %s", self.novel_cover)
26 | 
27 |         self.volumes.append({"id": 1})
28 |         for a in soup.select(".book-toc .dropdown-menu li.leaf a"):
29 |             title = a.text.strip()
30 | 
31 |             chap_id = len(self.chapters) + 1
32 |             match = re.findall(r"ch(apter)? (\d+)", title, re.IGNORECASE)
33 |             if len(match) == 1:
34 |                 chap_id = int(match[0][1])
35 | 
36 |             self.chapters.append(
37 |                 {
38 |                     "volume": 1,
39 |                     "id": chap_id,
40 |                     "title": title,
41 |                     "url": self.absolute_url(a["href"]),
42 |                 }
43 |             )
44 | 
45 |     def download_chapter_body(self, chapter):
46 |         logger.debug("Visiting %s", chapter["url"])
47 |         soup = self.get_soup(chapter["url"])
48 | 
49 |         content = soup.select_one(".content")
50 |         self.cleaner.clean_contents(content)
51 | 
52 |         return "".join([str(p) for p in content.select("p") if len(p.text.strip()) > 1])
53 | 


--------------------------------------------------------------------------------
/sources/en/h/hotnovelfull.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class HotNovelFullCrawler(NovelFullTemplate):
10 |     base_url = ["https://hotnovelfull.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.cleaner.bad_tag_text_pairs.update(
14 |             {
15 |                 "h4": [
16 |                     r"Chapter \d+",
17 |                     r"^\s*(Translator|Editor):.*$",
18 |                 ],
19 |                 "strong": r"This chapter upload first at NovelNext\.com",
20 |             }
21 |         )
22 | 


--------------------------------------------------------------------------------
/sources/en/i/imperfectcomic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class ImperfectComicCrawler(MangaStreamTemplate):
10 |     base_url = ["https://imperfectcomic.org/"]
11 |     has_manga = True
12 | 


--------------------------------------------------------------------------------
/sources/en/i/isotls.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class IsotlsCrawler(Crawler):
 9 |     base_url = [
10 |         'https://isotls.com/',
11 |         'https://www.isotls.com/',
12 |     ]
13 | 
14 |     def read_novel_info(self):
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         possible_cover = soup.select_one('meta[property="og:image"]')
18 |         if possible_cover:
19 |             self.novel_cover = self.absolute_url(possible_cover['content'])
20 | 
21 |         possible_title = soup.select_one('meta[property="og:title"]')
22 |         assert possible_title, 'No novel title'
23 |         self.novel_title = possible_title['content']
24 | 
25 |         possible_novel_author = soup.select_one('meta[name="twitter:data1"]')
26 |         if possible_novel_author:
27 |             self.novel_author = possible_novel_author['content']
28 | 
29 |         for a in soup.select('main section:nth-child(3) nav ul li a'):
30 |             chap_id = len(self.chapters) + 1
31 |             vol_id = len(self.chapters) // 100 + 1
32 |             if len(self.chapters) % 100 == 0:
33 |                 self.volumes.append({'id': vol_id})
34 | 
35 |             self.chapters.append({
36 |                 'id': chap_id,
37 |                 'volume': vol_id,
38 |                 'title': a.text.strip(),
39 |                 'url': self.absolute_url(a['href']),
40 |             })
41 | 
42 |     def download_chapter_body(self, chapter):
43 |         soup = self.get_soup(chapter['url'])
44 |         contents = soup.select_one("div.content")
45 |         return self.cleaner.extract_contents(contents)
46 | 


--------------------------------------------------------------------------------
/sources/en/i/snowycodex.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from bs4 import BeautifulSoup, Tag
 6 | 
 7 | from lncrawl.models import Chapter
 8 | from lncrawl.templates.browser.chapter_only import ChapterOnlyBrowserTemplate
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class SnowyCodexCrawler(ChapterOnlyBrowserTemplate):
14 |     base_url = "https://snowycodex.com/"
15 | 
16 |     def initialize(self) -> None:
17 |         self.cleaner.bad_css.update(
18 |             {
19 |                 ".wpulike",
20 |                 ".sharedaddy",
21 |                 ".wpulike-default",
22 |                 '[style="text-align:center;"]',
23 |             }
24 |         )
25 |         self.cleaner.bad_tag_text_pairs.update(
26 |             {
27 |                 "p": r"[\u4E00-\u9FFF]+",
28 |             }
29 |         )
30 | 
31 |     def parse_title(self, soup: BeautifulSoup) -> str:
32 |         tag = soup.select_one(".entry-content h2")
33 |         assert isinstance(tag, Tag)
34 |         return tag.text.strip()
35 | 
36 |     def parse_cover(self, soup: BeautifulSoup) -> str:
37 |         tag = soup.select_one(".entry-content img")
38 |         assert isinstance(tag, Tag)
39 |         if tag.has_attr("data-src"):
40 |             return self.absolute_url(tag["data-src"])
41 |         elif tag.has_attr("src"):
42 |             return self.absolute_url(tag["src"])
43 | 
44 |     def parse_authors(self, soup: BeautifulSoup):
45 |         tag = soup.find("strong", string="Author:")
46 |         assert isinstance(tag, Tag)
47 |         yield tag.next_sibling.text.strip()
48 | 
49 |     def select_chapter_tags(self, soup: BeautifulSoup):
50 |         yield from soup.select(".entry-content a[href*='/chapter']")
51 | 
52 |     def parse_chapter_item(self, tag: Tag, id: int) -> Chapter:
53 |         return Chapter(
54 |             id=id,
55 |             title=tag.text.strip(),
56 |             url=self.absolute_url(tag["href"]),
57 |         )
58 | 
59 |     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
60 |         return soup.select_one(".entry-content")
61 | 


--------------------------------------------------------------------------------
/sources/en/l/leafstudio.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from typing import List
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | from lncrawl.models import Chapter, SearchResult
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class LiteroticaCrawler(Crawler):
12 |     base_url = ["https://leafstudio.site/"]
13 | 
14 |     def initialize(self) -> None:
15 |         self.init_executor(ratelimit=2)
16 | 
17 |     def search_novel(self, query) -> List[SearchResult]:
18 |         soup = self.get_soup(
19 |             f"{self.home_url}novels?search={query}&type=&language=&status=&sort="
20 |         )
21 |         results = []
22 |         for item in soup.select("a.novel-item"):
23 |             results.append(
24 |                 SearchResult(
25 |                     title=item.select_one("p.novel-item-title").text.strip(),
26 |                     url=item["href"],
27 |                 )
28 |             )
29 |         return results
30 | 
31 |     def read_novel_info(self) -> None:
32 |         soup = self.get_soup(self.novel_url)
33 |         self.novel_title = soup.select_one(".title").text
34 |         self.novel_author = "LeafStudio"
35 |         self.novel_synopsis = soup.select_one(".desc_div > p:nth-child(2)").text or None
36 |         self.novel_tags = [item.text for item in soup.select("a.novel_genre")] or None
37 |         self.novel_cover = soup.select_one("#novel_cover")["src"] or None
38 |         for item in soup.select("a.free_chap").__reversed__():
39 |             self.chapters.append(
40 |                 dict(id=len(self.chapters) + 1, title=item.text, url=item["href"])
41 |             )
42 | 
43 |     def download_chapter_body(self, chapter: Chapter) -> str:
44 |         soup = self.get_soup(chapter["url"])
45 |         chapterText = ""
46 |         for item in soup.select("p.chapter_content"):
47 |             chapterText += self.cleaner.extract_contents(item)
48 |         return chapterText.replace("Login to buy access to this Chapter.", "")
49 | 


--------------------------------------------------------------------------------
/sources/en/l/lightnovelmeta.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class LightNovelMetaCrawler(NovelMTLTemplate):
 9 |     base_url = "https://www.lightnovelmeta.com"
10 | 


--------------------------------------------------------------------------------
/sources/en/l/lightnovelpub.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from lncrawl.templates.novelpub import NovelPubTemplate
 4 | 
 5 | 
 6 | class LightnovelpubCrawler(NovelPubTemplate):
 7 |     base_url = [
 8 |         "https://www.lightnovelpub.com/",
 9 |     ]
10 | 


--------------------------------------------------------------------------------
/sources/en/l/lightnovelworld.com.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from lncrawl.templates.novelpub import NovelPubTemplate
 4 | 
 5 | 
 6 | class LightnovelworldComCrawler(NovelPubTemplate):
 7 |     base_url = [
 8 |         "https://www.lightnovelworld.com/",
 9 |     ]
10 | 


--------------------------------------------------------------------------------
/sources/en/l/lightnovelworld.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class LightNovelWorldCrawler(Crawler):
 9 |     base_url = "https://lightnovel.world/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         self.novel_author = soup.select_one("span.textC999").text.strip()
16 |         logger.info("Novel author: %s", self.novel_author)
17 | 
18 |         possible_title = soup.select_one("li.text1")
19 |         for span in possible_title.select("span"):
20 |             span.extract()
21 |         self.novel_title = possible_title.text.strip()
22 |         logger.info("Novel title: %s", self.novel_title)
23 | 
24 |         possible_image = soup.select_one(".book_info_l img")
25 |         if possible_image:
26 |             self.novel_cover = self.absolute_url(possible_image["src"])
27 |         logger.info("Novel cover: %s", self.novel_cover)
28 | 
29 |         volumes = set([])
30 |         for a in soup.select("div#chapter_content ul li a"):
31 |             chap_id = 1 + len(self.chapters)
32 |             vol_id = 1 + len(self.chapters) // 100
33 |             volumes.add(vol_id)
34 |             self.chapters.append(
35 |                 {
36 |                     "id": chap_id,
37 |                     "volume": vol_id,
38 |                     "url": self.absolute_url(a["href"]),
39 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
40 |                 }
41 |             )
42 | 
43 |         self.volumes = [{"id": x} for x in volumes]
44 | 
45 |     def download_chapter_body(self, chapter):
46 |         soup = self.get_soup(chapter["url"])
47 | 
48 |         contents = soup.select_one("div#content_detail")
49 |         for ads in contents.select("div"):
50 |             ads.extract()
51 | 
52 |         return str(contents)
53 | 


--------------------------------------------------------------------------------
/sources/en/l/lightnovetrans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | from typing import Generator, Union
 5 | 
 6 | from bs4 import BeautifulSoup, Tag
 7 | 
 8 | from lncrawl.models import Chapter, Volume
 9 | from lncrawl.templates.soup.general import GeneralSoupTemplate
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class LNTCrawler(GeneralSoupTemplate):
15 |     base_url = ["https://lightnovelstranslations.com/"]
16 | 
17 |     has_manga = False
18 |     has_mtl = False
19 | 
20 |     def get_novel_soup(self) -> BeautifulSoup:
21 |         return self.get_soup(f"{self.novel_url}/?tab=table_contents")
22 | 
23 |     def parse_title(self, soup: BeautifulSoup) -> str:
24 |         tag = soup.select_one(".novel_title")
25 |         assert tag
26 |         return tag.text.strip()
27 | 
28 |     def parse_cover(self, soup: BeautifulSoup) -> str:
29 |         tag = soup.select_one(".novel-image img")
30 |         assert tag
31 |         if tag.has_attr("data-src"):
32 |             return self.absolute_url(tag["data-src"])
33 |         if tag.has_attr("src"):
34 |             return self.absolute_url(tag["src"])
35 | 
36 |     def parse_authors(self, soup: BeautifulSoup) -> Generator[str, None, None]:
37 |         for p in soup.select(".entry-content > p"):
38 |             if "Author" in p.text:
39 |                 yield p.text.replace("Author:", "").strip()
40 | 
41 |     def parse_chapter_list(
42 |         self, soup: BeautifulSoup
43 |     ) -> Generator[Union[Chapter, Volume], None, None]:
44 |         _id = 0
45 |         for a in soup.select(".novel_list_chapter_content li.unlock a"):
46 |             _id += 1
47 |             yield Chapter(
48 |                 id=_id, url=self.absolute_url(a["href"]), title=a.text.strip()
49 |             )
50 | 
51 |     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
52 |         return soup.select_one(".text_story")
53 | 


--------------------------------------------------------------------------------
/sources/en/l/ltnovel.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class LtNovel(NovelMTLTemplate):
8 |     base_url = "https://www.ltnovel.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/l/luminarynovels.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from bs4 import BeautifulSoup, Tag
 4 | 
 5 | from lncrawl.templates.madara import MadaraTemplate
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class Luminarynovels(MadaraTemplate):
11 |     has_mtl = False
12 |     has_manga = False
13 |     base_url = ["https://luminarynovels.com/"]
14 | 
15 |     def initialize(self) -> None:
16 |         # contains self-promo and discord link
17 |         self.cleaner.bad_css.add("div.chapter-warning.alert.alert-warning")
18 | 
19 |     def select_chapter_tags(self, soup: BeautifulSoup):
20 |         try:
21 |             clean_novel_url = self.novel_url.split("?")[0].strip("/")
22 |             response = self.submit_form(f"{clean_novel_url}/ajax/chapters/", max_retries=0)
23 |             soup = self.make_soup(response)
24 |             chapters = soup.select(" div.page-content-listing.single-page > div > ul > li > a")
25 |             if not chapters:
26 |                 raise Exception("No chapters on first URL")
27 |         except Exception:
28 |             nl_id = soup.select_one("#manga-chapters-holder[data-id]")
29 |             assert isinstance(nl_id, Tag)
30 |             response = self.submit_form(
31 |                 f"{self.home_url}wp-admin/admin-ajax.php",
32 |                 data={
33 |                     "action": "manga_get_chapters",
34 |                     "manga": nl_id["data-id"],
35 |                 },
36 |             )
37 |             soup = self.make_soup(response)
38 |             chapters = soup.select("ul.main .wp-manga-chapter a")
39 | 
40 |         yield from reversed(chapters)
41 | 


--------------------------------------------------------------------------------
/sources/en/l/lunarletters.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class LunarLetters(Crawler):
 9 |     base_url = "https://lunarletters.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         possible_title = soup.select_one('meta[property="og:title"]')
16 |         assert possible_title, "No novel title"
17 |         self.novel_title = possible_title["content"]
18 |         logger.info("Novel title: %s", self.novel_title)
19 | 
20 |         possible_novel_cover = soup.select_one('meta[property="og:image"]')
21 |         if possible_novel_cover:
22 |             self.novel_cover = self.absolute_url(possible_novel_cover["content"])
23 |         logger.info("Novel cover: %s", self.novel_cover)
24 | 
25 |         self.novel_author = " ".join(
26 |             [
27 |                 a.text.strip()
28 |                 for a in soup.select('.author-content a[href*="series-author"]')
29 |             ]
30 |         )
31 |         logger.info("%s", self.novel_author)
32 | 
33 |         volumes = set()
34 |         chapters = soup.select("ul.main li.wp-manga-chapter a")
35 |         for a in reversed(chapters):
36 |             chap_id = len(self.chapters) + 1
37 |             vol_id = (chap_id - 1) // 100 + 1
38 |             volumes.add(vol_id)
39 |             self.chapters.append(
40 |                 {
41 |                     "id": chap_id,
42 |                     "volume": vol_id,
43 |                     "url": self.absolute_url(a["href"]),
44 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
45 |                 }
46 |             )
47 | 
48 |         self.volumes = [{"id": x} for x in volumes]
49 | 
50 |     def download_chapter_body(self, chapter):
51 |         soup = self.get_soup(chapter["url"])
52 |         contents = soup.select(".reading-content p")
53 |         return "".join([str(p) for p in contents])
54 | 


--------------------------------------------------------------------------------
/sources/en/m/mangarockteam.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | from lncrawl.templates.madara import MadaraTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class MangaRockTeamCrawler(MadaraTemplate):
10 |     has_manga = True
11 |     base_url = ["https://mangarockteam.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/m/mltnovels.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class MltNovelsCrawler(MangaStreamTemplate):
10 |     base_url = ["https://mltnovels.com/"]
11 |     has_mtl = True
12 | 


--------------------------------------------------------------------------------
/sources/en/m/myboxnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.madara import MadaraTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class MyBoxNovelCrawler(MadaraTemplate):
10 |     base_url = ["https://myboxnovel.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.cleaner.bad_css.update(
14 |             [
15 |                 ".para-comment",
16 |                 ".j_open_para_comment",
17 |                 ".j_para_comment_count",
18 |                 ".para-comment-num",
19 |                 "#wp-manga-current-chap",
20 |                 ".cha-tit",
21 |                 ".subtitle ",
22 |             ]
23 |         )
24 |         # self.cleaner.bad_tag_text_pairs.update(
25 |         #     {
26 |         #         "div": r"Visit our comic site Webnovel\.live",
27 |         #     }
28 |         # )
29 | 


--------------------------------------------------------------------------------
/sources/en/n/newnovelorg.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.madara import MadaraTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class NewNovelOrg(MadaraTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://newnovel.org/"]
12 | 
13 |     def initialize(self) -> None:
14 |         self.cleaner.bad_css.update(
15 |             [
16 |                 ".para-comment",
17 |                 ".j_open_para_comment",
18 |                 ".j_para_comment_count",
19 |                 ".para-comment-num",
20 |                 "#wp-manga-current-chap",
21 |                 ".cha-tit",
22 |                 ".subtitle ",
23 |             ]
24 |         )
25 | 


--------------------------------------------------------------------------------
/sources/en/n/noblemtl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NobleMtlCrawler(MangaStreamTemplate):
10 |     base_url = ["https://noblemtl.com/"]
11 | 


--------------------------------------------------------------------------------
/sources/en/n/novel-bin.net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelfull import NovelFullTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Novel_Bin_Net(NovelFullTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://novel-bin.net/"]
12 | 
13 |     def initialize(self) -> None:
14 |         self.init_executor(ratelimit=1)
15 | 


--------------------------------------------------------------------------------
/sources/en/n/novel-bin.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelfull import NovelFullTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Novel_Bin(NovelFullTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://novel-bin.com/", "https://novelbin.me"]
12 | 
13 |     def initialize(self) -> None:
14 |         self.init_executor(ratelimit=1)
15 | 


--------------------------------------------------------------------------------
/sources/en/n/novelbin.net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelfull import NovelFullTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Novelbin_Net(NovelFullTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://novelbin.net/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/n/novelbin.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelbinCrawler(NovelFullTemplate):
10 |     base_url = ["https://novelbin.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.init_executor(ratelimit=0.99)
14 | 


--------------------------------------------------------------------------------
/sources/en/n/novelfull.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelFullCrawler(NovelFullTemplate):
10 |     base_url = [
11 |         "http://novelfull.com/",
12 |         "https://novelfull.com/",
13 |         "https://novelfull.net/",
14 |     ]
15 | 
16 |     def initialize(self) -> None:
17 |         self.cleaner.bad_css.update(
18 |             [
19 |                 'div[align="left"]',
20 |                 'img[src*="proxy?container=focus"]',
21 |             ]
22 |         )
23 | 


--------------------------------------------------------------------------------
/sources/en/n/novelfullplus.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelFullPlus(NovelFullTemplate):
10 |     base_url = ["https://novelfullplus.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.cleaner.bad_tags.update(["h1", "h2", "h3", "h4"])
14 | 


--------------------------------------------------------------------------------
/sources/en/n/novelhulk.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelHulkCrawler(NovelFullTemplate):
10 |     base_url = ["https://novelhulk.com/"]
11 | 


--------------------------------------------------------------------------------
/sources/en/n/novelmt.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class NovelMtCrawler(NovelMTLTemplate):
 8 |     has_mtl = True
 9 |     base_url = "https://www.novelmt.com/"
10 | 


--------------------------------------------------------------------------------
/sources/en/n/novelmtl.py:
--------------------------------------------------------------------------------
1 | from lncrawl.templates.novelmtl import NovelMTLTemplate
2 | 
3 | 
4 | class NovelMTLCrawler(NovelMTLTemplate):
5 |     has_mtl = False
6 |     has_manga = False
7 |     base_url = "https://www.novelmtl.com/"
8 | 


--------------------------------------------------------------------------------
/sources/en/n/novelnext.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import BeautifulSoup, Tag
 5 | 
 6 | from lncrawl.templates.novelfull import NovelFullTemplate
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class NovelNextCrawler(NovelFullTemplate):
12 |     base_url = ["https://novelnext.com/", "https://novelnext.dramanovels.io/"]
13 | 
14 |     def initialize(self) -> None:
15 |         self.init_executor(ratelimit=0.2)
16 |         self.cleaner.bad_tag_text_pairs.update(
17 |             {
18 |                 "h4": [
19 |                     r"Chapter \d+",
20 |                     r"^\s*(Translator|Editor):.*$",
21 |                 ],
22 |                 "p": [
23 |                     r"^\s*(Translator|Editor):.*$",
24 |                     r"Bookmark this website \( ",
25 |                     r"\)  to update the latest novels\.",
26 |                 ],
27 |                 "strong": r"NovelNext\.com",
28 |             }
29 |         )
30 | 
31 |     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
32 |         return soup.select_one("#chr-content, #chapter-content")
33 | 


--------------------------------------------------------------------------------
/sources/en/n/novelnextz.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelfull import NovelFullTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Novelnextz(NovelFullTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://novelnextz.com/"]
12 | 
13 |     def initialize(self) -> None:
14 |         self.cleaner.bad_tag_text_pairs.update(
15 |             {
16 |                 "h4": [
17 |                     r"Chapter \d+",
18 |                     r"^\s*(Translator|Editor):.*$",
19 |                 ],
20 |                 "p": [
21 |                     r"^\s*(Translator|Editor):.*$",
22 |                     r"Bookmark this website \( ",
23 |                     r"\)  to update the latest novels\.",
24 |                 ],
25 |                 "strong": r"NovelNext\.com",
26 |             }
27 |         )
28 | 


--------------------------------------------------------------------------------
/sources/en/n/novelpub.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from lncrawl.templates.novelpub import NovelPubTemplate
 4 | 
 5 | 
 6 | class NovelpubCrawler(NovelPubTemplate):
 7 |     base_url = [
 8 |         "https://www.novelpub.com/",
 9 |     ]
10 | 


--------------------------------------------------------------------------------
/sources/en/n/novelrare.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelrareCrawler(Crawler):
10 |     base_url = "https://novelrare.com/"
11 | 
12 |     def read_novel_info(self):
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         possible_title = soup.select_one("#manga-title h1")
16 |         if possible_title:
17 |             self.novel_title = possible_title.get_text(strip=True)
18 | 
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         possible_synopsis = soup.select_one("div[aria-labelledby='manga-info'] p")
22 |         if possible_synopsis:
23 |             self.novel_synopsis = possible_synopsis.get_text()
24 | 
25 |         img_src = soup.select_one("div.summary_image img")
26 |         if img_src:
27 |             self.novel_cover = self.absolute_url(img_src["src"])
28 | 
29 |         chapters_table = soup.select_one("div.listing-chapters_wrap")
30 |         for a in reversed(
31 |             chapters_table.find_all("a", class_=lambda x: x != "c-new-tag")
32 |         ):
33 |             chap_id = 1 + (len(self.chapters))
34 | 
35 |             self.chapters.append(
36 |                 {
37 |                     "id": chap_id,
38 |                     "title": a.text.strip(),
39 |                     "url": self.absolute_url(a['href'])
40 |                 }
41 |             )
42 | 
43 |     def download_chapter_body(self, chapter):
44 |         soup = self.get_soup(chapter["url"])
45 |         content = soup.select_one("div.text-left")
46 |         return self.cleaner.extract_contents(content)
47 | 


--------------------------------------------------------------------------------
/sources/en/n/novelzec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class NovelZec(Crawler):
10 |     base_url = "https://novelzec.com/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url)
15 | 
16 |         possible_title = soup.select_one(".entry-header h1")
17 |         assert possible_title, "No novel title"
18 |         self.novel_title = possible_title.text.strip()
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         authors = soup.select('.entry-header span a[href*="/author/"]')
22 |         self.novel_author = ", ".join([a.text.strip() for a in authors])
23 |         logger.info("Novel author: %s", self.novel_author)
24 | 
25 |         possible_image = soup.select_one(".entry-header img")
26 |         if possible_image:
27 |             self.novel_cover = self.absolute_url(possible_image["src"])
28 |         logger.info("Novel cover: %s", self.novel_cover)
29 | 
30 |         for a in reversed(soup.select("#chap-list li a")):
31 |             chap_id = len(self.chapters) + 1
32 |             vol_id = len(self.chapters) // 100 + 1
33 |             if len(self.chapters) % 100 == 0:
34 |                 self.volumes.append({"id": vol_id})
35 |             self.chapters.append(
36 |                 {
37 |                     "id": chap_id,
38 |                     "volume": vol_id,
39 |                     "title": a.text.strip(),
40 |                     "url": self.absolute_url(a["href"]),
41 |                 }
42 |             )
43 | 
44 |     def download_chapter_body(self, chapter):
45 |         soup = self.get_soup(chapter["url"])
46 |         contents = soup.select_one(".content-story")
47 |         return self.cleaner.extract_contents(contents)
48 | 


--------------------------------------------------------------------------------
/sources/en/n/novlove.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelfull import NovelFullTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Novlove(NovelFullTemplate):
 9 |     has_mtl = False
10 |     has_manga = False
11 |     base_url = ["https://novlove.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/o/oppatrans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class OppaTranslations(Crawler):
10 |     base_url = "https://www.oppatranslations.com/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url)
15 | 
16 |         self.novel_title = soup.find("h1", {"class": "entry-title"}).text.strip()
17 |         logger.info("Novel title: %s", self.novel_title)
18 | 
19 |         possible_image = soup.select_one("div.entry-content p img")
20 |         if possible_image:
21 |             self.novel_cover = self.absolute_url(possible_image["src"])
22 |         logger.info("Novel cover: %s", self.novel_cover)
23 | 
24 |         self.novel_author = soup.select("div.entry-content p")[8].text.strip()
25 |         logger.info("Novel author: %s", self.novel_author)
26 | 
27 |         # Extract volume-wise chapter entries
28 |         # Stops external links being selected as chapters
29 |         chapters = soup.select('div.entry-content p [href*="oppatranslations.com/"]')
30 | 
31 |         for a in chapters:
32 |             chap_id = len(self.chapters) + 1
33 |             vol_id = 1 + len(self.chapters) // 100
34 |             if len(self.volumes) < vol_id:
35 |                 self.volumes.append({"id": vol_id})
36 |             self.chapters.append(
37 |                 {
38 |                     "id": chap_id,
39 |                     "volume": vol_id,
40 |                     "url": self.absolute_url(a["href"]),
41 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
42 |                 }
43 |             )
44 | 
45 |     def download_chapter_body(self, chapter):
46 |         soup = self.get_soup(chapter["url"])
47 | 
48 |         body = []
49 |         contents = soup.select("div.entry-content p")
50 |         contents = contents[:-1]
51 |         for p in contents:
52 |             para = " ".join(self.cleaner.extract_contents(p))
53 |             if len(para):
54 |                 body.append(para)
55 | 
56 |         return "<p>%s</p>" % "</p><p>".join(body)
57 | 


--------------------------------------------------------------------------------
/sources/en/o/ornovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class OrNovel(Crawler):
 9 |     base_url = "https://www.ornovel.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         self.novel_title = " ".join(
16 |             [str(x) for x in soup.select_one(".title h1").contents if not x.name]
17 |         ).strip()
18 |         logger.info("Novel title: %s", self.novel_title)
19 | 
20 |         probable_img = soup.select_one(".intro-left img.book-image")
21 |         if probable_img:
22 |             self.novel_cover = self.absolute_url(probable_img["src"])
23 |         logger.info("Novel cover: %s", self.novel_cover)
24 | 
25 |         self.novel_author = " ".join(
26 |             [a.text.strip() for a in soup.select(".author-container")]
27 |         )
28 |         logger.info("%s", self.novel_author)
29 | 
30 |         volumes = set()
31 |         chapters = soup.select("ul.chapters-all li.chapters-item a")
32 |         for a in chapters:
33 |             chap_id = len(self.chapters) + 1
34 |             vol_id = (chap_id - 1) // 100 + 1
35 |             volumes.add(vol_id)
36 |             self.chapters.append(
37 |                 {
38 |                     "id": chap_id,
39 |                     "volume": vol_id,
40 |                     "url": self.absolute_url(a["href"]),
41 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
42 |                 }
43 |             )
44 | 
45 |         self.volumes = [{"id": x} for x in volumes]
46 | 
47 |     def download_chapter_body(self, chapter):
48 |         soup = self.get_soup(chapter["url"])
49 | 
50 |         contents = soup.select_one("div.chapter-detail")
51 |         for bad in contents.select(
52 |             "h2, ins, .chapter-header .code-block, script, .adsbygoogle"
53 |         ):
54 |             bad.extract()
55 | 
56 |         return str(contents)
57 | 


--------------------------------------------------------------------------------
/sources/en/p/pandamanga.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class PandaMangaxyzCrawler(MangaStreamTemplate):
10 |     base_url = ["https://www.pandamanga.xyz/"]
11 | 


--------------------------------------------------------------------------------
/sources/en/p/pandanovelco.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from typing import Generator
 4 | from bs4 import BeautifulSoup, Tag
 5 | from lncrawl.templates.novelpub import NovelPubTemplate
 6 | 
 7 | 
 8 | class PandaNovelCo(NovelPubTemplate):
 9 |     base_url = [
10 |         "https://pandanovel.co/",
11 |     ]
12 | 
13 |     # We override because we do not have a request token like other novel pub
14 |     # (without that wrong error is raised and browser search isn't triggered)
15 |     def select_search_items(self, query: str) -> Generator[Tag, None, None]:
16 |         self.submit_form(
17 |             f"{self.home_url}lnsearchlive",
18 |             data={"inputContent": query},
19 |             headers={
20 |                 "referer": f"{self.home_url}search",
21 |             },
22 |         )
23 | 
24 |     # override this because somehow novel_url is always missing trailing /
25 |     def select_chapter_tags_in_browser(self):
26 |         next_link = f"{self.novel_url}/chapters"
27 |         while next_link:
28 |             self.browser.visit(next_link)
29 |             self.browser.wait("ul.chapter-list li")
30 |             chapter_list = self.browser.find("ul.chapter-list")
31 |             yield from chapter_list.as_tag().select("li a")
32 |             try:
33 |                 next_link = self.browser.find('.PagedList-skipToNext a[rel="next"]')
34 |                 next_link = next_link.get_attribute("href")
35 |             except Exception:
36 |                 next_link = False
37 | 
38 |     # .chapter-content -> #content
39 |     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
40 |         self.browser.wait("#content")
41 |         return soup.select_one("#content")
42 | 


--------------------------------------------------------------------------------
/sources/en/p/pandanovelorg.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from lncrawl.templates.novelfull import NovelFullTemplate
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class Pandanovelorg(NovelFullTemplate):
 8 |     has_mtl = False
 9 |     has_manga = False
10 |     base_url = ["https://pandanovel.org/"]
11 | 


--------------------------------------------------------------------------------
/sources/en/r/readmtl.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.madara import MadaraTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Readmtl(MadaraTemplate):
 9 |     has_mtl = True
10 |     has_manga = False
11 |     base_url = ["https://readmtl.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/r/readnovelfull.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.novelfull import NovelFullTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class ReadNovelFullCrawler(NovelFullTemplate):
10 |     base_url = "https://readnovelfull.com/"
11 | 


--------------------------------------------------------------------------------
/sources/en/r/readwn.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class ReadWNCrawler(NovelMTLTemplate):
 8 |     has_mtl = True
 9 |     base_url = [
10 |         "https://www.readwn.com/",
11 |         "https://www.wuxiap.com/"
12 |     ]
13 | 


--------------------------------------------------------------------------------
/sources/en/s/sleepytrans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class SleepyTranslations(Crawler):
 9 |     base_url = "https://sleepytranslations.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         possible_title = soup.select_one(".post-title h1")
16 |         for span in possible_title.select("span"):
17 |             span.extract()
18 |         self.novel_title = possible_title.text.strip()
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         possible_image = soup.select_one(".summary_image a img")
22 |         if possible_image:
23 |             self.novel_cover = self.absolute_url(possible_image["src"])
24 |         logger.info("Novel cover: %s", self.novel_cover)
25 | 
26 |         self.novel_author = " ".join(
27 |             [a.text.strip() for a in soup.select('.author-content a[href*="author"]')]
28 |         )
29 |         logger.info("%s", self.novel_author)
30 | 
31 |         self.novel_id = soup.select_one("#manga-chapters-holder")["data-id"]
32 |         logger.info("Novel id: %s", self.novel_id)
33 | 
34 |         response = self.submit_form(self.novel_url.strip("/") + "/ajax/chapters")
35 |         soup = self.make_soup(response)
36 |         for a in reversed(soup.select(".wp-manga-chapter a")):
37 |             chap_id = len(self.chapters) + 1
38 |             vol_id = 1 + len(self.chapters) // 100
39 |             if chap_id % 100 == 1:
40 |                 self.volumes.append({"id": vol_id})
41 |             self.chapters.append(
42 |                 {
43 |                     "id": chap_id,
44 |                     "volume": vol_id,
45 |                     "title": a.text.strip(),
46 |                     "url": self.absolute_url(a["href"]),
47 |                 }
48 |             )
49 | 
50 |     def download_chapter_body(self, chapter):
51 |         soup = self.get_soup(chapter["url"])
52 |         contents = soup.select(".reading-content p")
53 |         return "".join([str(p) for p in contents])
54 | 


--------------------------------------------------------------------------------
/sources/en/s/smnovels.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class SMNovelsCrawler(Crawler):
 9 |     base_url = "https://smnovels.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         # Site has no author name or novel covers.
16 |         possible_title = soup.select_one("h1.entry-title")
17 |         assert possible_title, "No novel title"
18 |         self.novel_title = possible_title.text.strip()
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         for a in soup.select(".all-chapters-list a"):
22 |             chap_id = len(self.chapters) + 1
23 |             vol_id = len(self.chapters) // 100 + 1
24 |             if len(self.chapters) % 100 == 0:
25 |                 self.volumes.append({"id": vol_id})
26 |             self.chapters.append(
27 |                 {
28 |                     "id": chap_id,
29 |                     "volume": vol_id,
30 |                     "title": a.text.strip(),
31 |                     "url": self.absolute_url(a["href"]),
32 |                 }
33 |             )
34 | 
35 |     def download_chapter_body(self, chapter):
36 |         soup = self.get_soup(chapter["url"])
37 | 
38 |         contents = soup.select_one(".entry-content")
39 |         for bad in contents.select("br"):
40 |             bad.extract()
41 |         return self.cleaner.extract_contents(contents)
42 | 


--------------------------------------------------------------------------------
/sources/en/s/sonicmtl.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from bs4 import BeautifulSoup, Tag
 3 | from lncrawl.templates.madara import MadaraTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class SonicMTLCrawler(MadaraTemplate):
 9 |     has_mtl = True
10 |     base_url = [
11 |         "https://sonicmtl.com",
12 |         "https://www.sonicmtl.com/",
13 |     ]
14 | 
15 |     def initialize(self):
16 |         super().initialize()
17 |         self.cleaner.bad_css.update(
18 |             {
19 |                 ".ad",
20 |                 ".c-ads",
21 |                 ".custom-code",
22 |                 ".body-top-ads",
23 |                 ".before-content-ad",
24 |                 ".autors-widget",
25 |             }
26 |         )
27 | 
28 |     def select_chapter_body(self, soup: BeautifulSoup) -> Tag:
29 |         return soup.select_one(".reading-content .text-left")
30 | 


--------------------------------------------------------------------------------
/sources/en/s/steambun.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | 
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class SteambunCrawler(Crawler):
11 |     base_url = "https://steambunlightnovel.com/"
12 | 
13 |     def read_novel_info(self):
14 |         logger.debug("Visiting %s", self.novel_url)
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         possible_title = soup.select_one("h1.entry-title")
18 |         assert possible_title, "No novel title"
19 |         self.novel_title = possible_title.text
20 |         logger.info("Novel title: %s", self.novel_title)
21 | 
22 |         self.novel_author = "by SteamBun Translations"
23 |         logger.info("Novel author: %s", self.novel_author)
24 | 
25 |         # Site does not list covers.
26 | 
27 |         volumes = set([])
28 |         for a in reversed(
29 |             soup.select('div.w4pl-inner li a[href*="steambunlightnovel.com"]')
30 |         ):
31 |             title = a.text.strip()
32 |             chapter_id = len(self.chapters) + 1
33 |             volume_id = 1 + (chapter_id - 1) // 100
34 |             volumes.add(volume_id)
35 |             self.chapters.append(
36 |                 {
37 |                     "id": chapter_id,
38 |                     "volume": volume_id,
39 |                     "title": title,
40 |                     "url": a["href"],
41 |                 }
42 |             )
43 | 
44 |         self.chapters.sort(key=lambda x: x["id"])
45 |         self.volumes = [{"id": x, "title": ""} for x in volumes]
46 | 
47 |     def download_chapter_body(self, chapter):
48 |         soup = self.get_soup(chapter["url"])
49 |         content = soup.select_one("div.entry-content")
50 |         assert content, "No chapter content"
51 |         self.cleaner.clean_contents(content)
52 |         body = content.select("p")
53 |         body = [str(p) for p in body if self.should_take(p)]
54 |         return "<p>" + "</p><p>".join(body) + "</p>"
55 | 
56 |     def should_take(self, p):
57 |         txt = p.text.strip().lower()
58 |         return txt and txt != "advertisement"
59 | 


--------------------------------------------------------------------------------
/sources/en/s/systemtranslation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class SystemTranslationCrawler(MangaStreamTemplate):
10 |     base_url = ["https://systemtranslation.com/"]
11 | 


--------------------------------------------------------------------------------
/sources/en/t/tamagotl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class TamagoTlCrawler(MangaStreamTemplate):
10 |     base_url = ["https://tamagotl.com/"]
11 |     has_mtl = True
12 | 


--------------------------------------------------------------------------------
/sources/en/t/teanovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import json
 3 | import logging
 4 | 
 5 | from bs4 import Tag
 6 | 
 7 | from lncrawl.core.crawler import Crawler
 8 | from lncrawl.core.exeptions import LNException
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | class TeaNovelCrawler(Crawler):
14 |     base_url = "https://www.teanovel.com"
15 | 
16 |     def initialize(self):
17 |         self.init_executor(
18 |             workers=4
19 |         )
20 | 
21 |     def read_novel_info(self):
22 |         soup = self.get_soup(self.novel_url)
23 | 
24 |         script_tag = soup.select_one("script#__NEXT_DATA__")
25 |         if not isinstance(script_tag, Tag):
26 |             raise LNException("No script data found")
27 | 
28 |         next_data = json.loads(script_tag.get_text())
29 | 
30 |         novel_data = next_data["props"]["pageProps"]["novel"]
31 | 
32 |         self.novel_title = novel_data["name"]
33 |         self.novel_author = novel_data["author"]
34 | 
35 |         img_tag = soup.select_one("main img[src*='_next/']")
36 |         if isinstance(img_tag, Tag):
37 |             self.novel_cover = self.absolute_url(img_tag["src"])
38 | 
39 |         chapters = self.get_soup(self.novel_url + "/chapter-list").select("a.border-b")
40 |         for chapter in chapters:
41 |             chapter_id = len(self.chapters) + 1
42 |             self.chapters.append(
43 |                 {
44 |                     "id": chapter_id,
45 |                     "title": chapter.select_one("p").get_text(strip=True),
46 |                     "url": self.absolute_url(chapter["href"]),
47 |                 }
48 |             )
49 | 
50 |     def download_chapter_body(self, chapter):
51 |         chapter = self.get_soup(chapter["url"])
52 |         return self.cleaner.extract_contents(chapter.select_one("div.prose"))
53 | 


--------------------------------------------------------------------------------
/sources/en/t/totallytranslations.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from requests.sessions import Session
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class TotallyTranslations(Crawler):
12 |     base_url = "https://totallytranslations.com/"
13 | 
14 |     def initialize(self):
15 |         self.scraper = Session()
16 | 
17 |     def read_novel_info(self):
18 |         logger.debug("Visiting %s", self.novel_url)
19 |         soup = self.get_soup(self.novel_url)
20 | 
21 |         possible_title = soup.select_one(".entry-title")
22 |         assert possible_title, "No novel title"
23 |         self.novel_title = possible_title.text
24 |         logger.info("Novel title: %s", self.novel_title)
25 | 
26 |         possible_image = soup.select_one(".novel-image img")
27 |         if possible_image:
28 |             self.novel_cover = self.absolute_url(possible_image["src"])
29 |         logger.info("Novel cover: %s", self.novel_cover)
30 | 
31 |         for p in soup.select(".chapters-list .chapters-title"):
32 |             vol_title = p.text.strip()
33 |             vol_id = len(self.volumes) + 1
34 |             self.volumes.append(
35 |                 {
36 |                     "id": vol_id,
37 |                     "title": vol_title,
38 |                 }
39 |             )
40 | 
41 |             ul = p.find_next("ul")
42 |             for a in ul.select("a"):
43 |                 chap_id = len(self.chapters) + 1
44 |                 self.chapters.append(
45 |                     {
46 |                         "id": chap_id,
47 |                         "volume": vol_id,
48 |                         "title": a.text.strip(),
49 |                         "url": self.absolute_url(a["href"]),
50 |                     }
51 |                 )
52 | 
53 |     def download_chapter_body(self, chapter):
54 |         soup = self.get_soup(chapter["url"])
55 |         paras = soup.select(".post-content p")
56 |         return "\n".join([str(p) for p in paras if p.text.strip()])
57 | 


--------------------------------------------------------------------------------
/sources/en/v/veratales.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class VeraTales(Crawler):
10 |     base_url = "https://veratales.com/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url)
15 | 
16 |         self.novel_title = soup.find("h1").text.strip()
17 |         logger.info("Novel title: %s", self.novel_title)
18 | 
19 |         # self.novel_author= soup.find("div",{"class":"novel-author-info"}).find("h4").text.strip()
20 |         self.novel_author = ""
21 |         logger.info("%s", self.novel_author)
22 | 
23 |         possible_image = soup.select_one("div.card-header a img")
24 |         if possible_image:
25 |             self.novel_cover = self.absolute_url(possible_image["src"])
26 |         logger.info("Novel cover: %s", self.novel_cover)
27 | 
28 |         chapters = soup.select("table td a")
29 |         for a in reversed(chapters):
30 |             chap_id = len(self.chapters) + 1
31 |             vol_id = 1 + len(self.chapters) // 100
32 |             if len(self.volumes) < vol_id:
33 |                 self.volumes.append({"id": vol_id})
34 |             self.chapters.append(
35 |                 {
36 |                     "id": chap_id,
37 |                     "volume": vol_id,
38 |                     "url": self.absolute_url(a["href"]),
39 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
40 |                 }
41 |             )
42 | 
43 |     def download_chapter_body(self, chapter):
44 |         soup = self.get_soup(chapter["url"])
45 |         contents = soup.select_one("div.reader-content")
46 |         return self.cleaner.extract_contents(contents)
47 | 


--------------------------------------------------------------------------------
/sources/en/w/webnovelonlinecom.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import re
 3 | import json
 4 | import logging
 5 | from lncrawl.core.crawler import Crawler
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class WebnovelOnlineDotComCrawler(Crawler):
11 |     base_url = "https://webnovelonline.com/"
12 | 
13 |     def read_novel_info(self):
14 |         url = self.novel_url
15 |         soup = self.get_soup(url)
16 | 
17 |         possible_title = soup.select_one(".novel-info .novel-desc h1")
18 |         assert possible_title, "No novel title"
19 |         self.novel_title = possible_title.text
20 |         logger.info("Novel title: %s", self.novel_title)
21 | 
22 |         possible_novel_cover = soup.select_one('meta[property="og:image"]')
23 |         if possible_novel_cover:
24 |             self.novel_cover = self.absolute_url(possible_novel_cover["content"])
25 |         logger.info("Novel cover: %s", self.novel_title)
26 | 
27 |         volumes = set([])
28 |         for a in reversed(soup.select(".chapter-list .item a")):
29 |             chap_id = len(self.chapters) + 1
30 |             vol_id = 1 + len(self.chapters) // 100
31 |             volumes.add(vol_id)
32 |             self.chapters.append(
33 |                 {
34 |                     "id": chap_id,
35 |                     "volume": vol_id,
36 |                     "title": a.text.strip(),
37 |                     "url": self.absolute_url(a["href"]),
38 |                 }
39 |             )
40 | 
41 |         self.volumes = [{"id": x, "title": ""} for x in volumes]
42 | 
43 |     def download_chapter_body(self, chapter):
44 |         soup = self.get_soup(chapter["url"])
45 | 
46 |         for script in soup.select("script"):
47 |             text = script.string
48 |             if not text or not text.startswith("window._INITIAL_DATA_"):
49 |                 continue
50 |             content = re.findall(r',"chapter":(".+")},', text)[0]
51 |             content = json.loads(content).strip()
52 |             return "<p>" + "</p><p>".join(content.split("\n\n")) + "</p>"
53 | 
54 |         return ""
55 | 


--------------------------------------------------------------------------------
/sources/en/w/webnovelpub.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from lncrawl.templates.novelpub import NovelPubTemplate
 4 | 
 5 | 
 6 | class WebnovelpubCrawler(NovelPubTemplate):
 7 |     base_url = [
 8 |         "https://www.webnovelpub.com/",
 9 |         "https://www.webnovelpub.pro/",
10 |     ]
11 | 


--------------------------------------------------------------------------------
/sources/en/w/whatsawhizzerwebnovels.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class WhatsAWhizzerCrawler(Crawler):
12 |     base_url = ["https://whatsawhizzerwebnovels.com/"]
13 | 
14 |     def read_novel_info(self):
15 |         logger.debug("Visiting %s", self.novel_url)
16 |         soup = self.get_soup(self.novel_url)
17 | 
18 |         self.novel_title = soup.select_one(".page-header-title").text.strip()
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         cover_tag = soup.select_one('meta[property="og:image"]')
22 | 
23 |         if isinstance(cover_tag, Tag):
24 |             self.novel_cover = cover_tag["content"]
25 | 
26 |         logger.info("Novel cover: %s", self.novel_cover)
27 | 
28 |         for a in soup.select(".entry > p > a"):
29 |             self.chapters.append(
30 |                 {
31 |                     "id": len(self.chapters) + 1,
32 |                     "url": self.absolute_url(a["href"]),
33 |                     "title": a.text.strip(),
34 |                 }
35 |             )
36 | 
37 |     def download_chapter_body(self, chapter):
38 |         soup = self.get_soup(chapter["url"])
39 |         contents = soup.select_one("article > div")
40 | 
41 |         nav_tags = contents.find_all("a", string="Table of Contents")
42 |         for nav in nav_tags:
43 |             nav.parent.extract()
44 | 
45 |         self.cleaner.clean_contents(contents)
46 | 
47 |         return str(contents)
48 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiabox.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class Wuxiabox(NovelMTLTemplate):
 9 |     has_mtl = True
10 |     has_manga = False
11 |     base_url = ["https://www.wuxiabox.com/"]
12 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiahub.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaHubCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiahub.com"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiamtl.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 3 | 
 4 | logger = logging.getLogger(__name__)
 5 | 
 6 | 
 7 | class WuxiaMTLCrawler(NovelMTLTemplate):
 8 |     base_url = "https://www.wuxiamtl.com"
 9 |     has_mtl = True
10 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxianovelhub.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from lncrawl.templates.novelmtl import NovelMTLTemplate
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class WuxiaNHCrawler(NovelMTLTemplate):
 9 |     base_url = "https://www.wuxianovelhub.com/"
10 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiapub.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaPubCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiapub.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiar.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaRCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiar.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiaspot.py:
--------------------------------------------------------------------------------
1 | from lncrawl.templates.novelmtl import NovelMTLTemplate
2 | 
3 | 
4 | class WuxiaSpotCrawler(NovelMTLTemplate):
5 |     has_mtl = False
6 |     has_manga = False
7 |     base_url = "https://www.wuxiaspot.com/"
8 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiau.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaUCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiau.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiav.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaVCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiav.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiax.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaXCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiax.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/w/wuxiaz.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from lncrawl.templates.novelmtl import NovelMTLTemplate
3 | 
4 | logger = logging.getLogger(__name__)
5 | 
6 | 
7 | class WuxiaZCrawler(NovelMTLTemplate):
8 |     base_url = "https://www.wuxiaz.com/"
9 | 


--------------------------------------------------------------------------------
/sources/en/x/xiainovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import Comment
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class YukiNovelCrawler(Crawler):
12 |     base_url = "https://www.xiainovel.com/"
13 | 
14 |     def read_novel_info(self):
15 |         logger.debug("Visiting %s", self.novel_url)
16 |         soup = self.get_soup(self.novel_url)
17 | 
18 |         possible_title = soup.select_one("div.page-header h1")
19 |         assert possible_title, "No novel title"
20 |         self.novel_title = possible_title.text
21 |         logger.info("Novel title: %s", self.novel_title)
22 | 
23 |         self.novel_author = "Translated by XiaiNovel"
24 |         logger.info("Novel author: %s", self.novel_author)
25 | 
26 |         # NOTE: Can't fetch cover url, as it's listed a base64 code.
27 |         # self.novel_cover = self.absolute_url(
28 |         #     soup.select_one('div.col-md-6 img')
29 |         # logger.info('Novel cover: %s', self.novel_cover)
30 | 
31 |         # Extract volume-wise chapter entries
32 |         chapters = soup.select("ul.list-group li a")
33 | 
34 |         chapters.reverse()
35 | 
36 |         for a in chapters:
37 |             chap_id = len(self.chapters) + 1
38 |             vol_id = 1 + len(self.chapters) // 100
39 |             if len(self.volumes) < vol_id:
40 |                 self.volumes.append({"id": vol_id})
41 |             self.chapters.append(
42 |                 {
43 |                     "id": chap_id,
44 |                     "volume": vol_id,
45 |                     "url": self.absolute_url(a["href"]),
46 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
47 |                 }
48 |             )
49 | 
50 |     def download_chapter_body(self, chapter):
51 |         soup = self.get_soup(chapter["url"])
52 | 
53 |         contents = soup.select_one("section#StoryContent")
54 | 
55 |         for d in contents.findAll("div"):
56 |             d.extract()
57 | 
58 |         for comment in contents.find_all(string=lambda text: isinstance(text, Comment)):
59 |             comment.extract()
60 | 
61 |         return str(contents)
62 | 


--------------------------------------------------------------------------------
/sources/fr/lightnovelfr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class LightnovelFrCrawler(MangaStreamTemplate):
10 |     base_url = ["https://lightnovelfr.com/"]
11 | 


--------------------------------------------------------------------------------
/sources/fr/xiaowaz.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | from lncrawl.core.exeptions import LNException
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class XiaowazCrawler(Crawler):
13 |     base_url = ["https://xiaowaz.fr/"]
14 | 
15 |     def initialize(self) -> None:
16 |         self.cleaner.bad_css.update(
17 |             [".abh_box_business", ".footnote_container_prepare"]
18 |         )
19 | 
20 |     def read_novel_info(self):
21 |         soup = self.get_soup(self.novel_url)
22 | 
23 |         title_tag = soup.select_one("h1.card_title")
24 |         if not isinstance(title_tag, Tag):
25 |             raise LNException("No title found")
26 | 
27 |         self.novel_title = title_tag.text.strip()
28 | 
29 |         image_tag = soup.select_one(".entry-content img")
30 |         if isinstance(image_tag, Tag):
31 |             self.novel_cover = self.absolute_url(image_tag["src"])
32 | 
33 |         logger.info("Novel cover: %s", self.novel_cover)
34 | 
35 |         for a in soup.select(".entry-content a[href*='/articles/']"):
36 |             self.chapters.append(
37 |                 {
38 |                     "id": len(self.chapters) + 1,
39 |                     "title": a.text.strip(),
40 |                     "url": self.absolute_url(a["href"]),
41 |                 }
42 |             )
43 | 
44 |     def download_chapter_body(self, chapter):
45 |         soup = self.get_soup(chapter["url"])
46 |         contents = soup.select_one(".entry-content")
47 |         self.cleaner.clean_contents(contents)
48 | 
49 |         return str(contents)
50 | 


--------------------------------------------------------------------------------
/sources/id/darktrans.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class DarkTranslation(Crawler):
10 |     base_url = "https://darktranslation.com/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url)
15 | 
16 |         self.novel_title = soup.find("h1", {"class": "entry-title"}).text.strip()
17 |         logger.info("Novel title: %s", self.novel_title)
18 | 
19 |         # FIXME: Problem getting cover image, tried multiple ways and keep getting error.
20 |         # self.novel_cover = self.absolute_url(
21 |         #     soup.select_one('div.entry-content p img')
22 |         # logger.info('Novel cover: %s', self.novel_cover)
23 | 
24 |         self.novel_author = "by Dark Translation"
25 |         logger.info("Novel author: %s", self.novel_author)
26 | 
27 |         # Extract volume-wise chapter entries
28 |         # Stops external links being selected as chapters
29 |         chapters = soup.select("div.entry-content p a")
30 | 
31 |         for a in chapters:
32 |             chap_id = len(self.chapters) + 1
33 |             vol_id = 1 + len(self.chapters) // 100
34 |             if len(self.volumes) < vol_id:
35 |                 self.volumes.append({"id": vol_id})
36 |             self.chapters.append(
37 |                 {
38 |                     "id": chap_id,
39 |                     "volume": vol_id,
40 |                     "url": self.absolute_url(a["href"]),
41 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
42 |                 }
43 |             )
44 | 
45 |     def download_chapter_body(self, chapter):
46 |         soup = self.get_soup(chapter["url"])
47 |         contents = soup.select("div.entry-content")
48 |         return self.cleaner.extract_contents(contents)
49 | 


--------------------------------------------------------------------------------
/sources/id/novelringan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class NovelRinganCrawler(Crawler):
 9 |     base_url = "https://novelringan.com/"
10 | 
11 |     def read_novel_info(self):
12 |         logger.debug("Visiting %s", self.novel_url)
13 |         soup = self.get_soup(self.novel_url)
14 | 
15 |         possible_title = soup.select_one("h1.entry-title")
16 |         assert possible_title, "No novel title"
17 |         self.novel_title = possible_title.text
18 |         logger.info("Novel title: %s", self.novel_title)
19 | 
20 |         possible_image = soup.select_one("div.imgprop img")
21 |         if possible_image:
22 |             self.novel_cover = self.absolute_url(possible_image["src"])
23 |         logger.info("Novel cover: %s", self.novel_cover)
24 | 
25 |         self.novel_author = " ".join(
26 |             [a.text.strip() for a in soup.select('.entry-author a[href*="/author/"]')]
27 |         )
28 |         logger.info("%s", self.novel_author)
29 | 
30 |         for a in reversed(soup.select(".bxcl ul li a")):
31 |             chap_id = len(self.chapters) + 1
32 |             vol_id = 1 + len(self.chapters) // 100
33 |             if len(self.volumes) < vol_id:
34 |                 self.volumes.append({"id": vol_id})
35 |             self.chapters.append(
36 |                 {
37 |                     "id": chap_id,
38 |                     "volume": vol_id,
39 |                     "url": self.absolute_url(a["href"]),
40 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
41 |                 }
42 |             )
43 | 
44 |     def download_chapter_body(self, chapter):
45 |         soup = self.get_soup(chapter["url"])
46 |         contents = soup.select(".entry-content p")
47 | 
48 |         body = [str(p) for p in contents if p.text.strip()]
49 | 
50 |         return "<p>" + "</p><p>".join(body) + "</p>"
51 | 


--------------------------------------------------------------------------------
/sources/id/zhiend.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class ZhiEnd(Crawler):
10 |     base_url = ["http://zhi-end.blogspot.com/", "http://zhi-end.blogspot.co.id/"]
11 | 
12 |     def initialize(self):
13 |         self.home_url = "http://zhi-end.blogspot.com/"
14 | 
15 |     def read_novel_info(self):
16 |         logger.debug("Visiting %s", self.novel_url)
17 |         soup = self.get_soup(self.novel_url)
18 | 
19 |         possible_title = soup.select_one("h1.entry-title")
20 |         assert possible_title, "No novel title"
21 |         self.novel_title = possible_title.text.strip()
22 |         logger.info("Novel title: %s", self.novel_title)
23 | 
24 |         possible_image = soup.select_one("div.entry-content div a img")
25 |         if possible_image:
26 |             self.novel_cover = self.absolute_url(possible_image["src"])
27 |         logger.info("Novel cover: %s", self.novel_cover)
28 | 
29 |         self.novel_author = "Translated by Zhi End"
30 |         logger.info("Novel author: %s", self.novel_author)
31 | 
32 |         # Extract volume-wise chapter entries
33 |         chapters = soup.select('div.entry-content div [href*="zhi-end.blogspot"]')
34 | 
35 |         for a in chapters:
36 |             chap_id = len(self.chapters) + 1
37 |             vol_id = 1 + len(self.chapters) // 100
38 |             if len(self.volumes) < vol_id:
39 |                 self.volumes.append({"id": vol_id})
40 |             self.chapters.append(
41 |                 {
42 |                     "id": chap_id,
43 |                     "volume": vol_id,
44 |                     "url": self.absolute_url(a["href"]),
45 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
46 |                 }
47 |             )
48 | 
49 |     def download_chapter_body(self, chapter):
50 |         soup = self.get_soup(chapter["url"])
51 | 
52 |         body_parts = soup.select_one("div.post-body")
53 | 
54 |         return self.cleaner.extract_contents(body_parts)
55 | 


--------------------------------------------------------------------------------
/sources/multi/quotev.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from bs4 import Tag
 5 | 
 6 | from lncrawl.core.crawler import Crawler
 7 | from lncrawl.core.exeptions import LNException
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class QuotevCrawler(Crawler):
13 |     base_url = ["https://www.quotev.com/"]
14 | 
15 |     def initialize(self) -> None:
16 |         self.cleaner.bad_css.update([".nosel"])
17 | 
18 |     def read_novel_info(self):
19 |         soup = self.get_soup(self.novel_url)
20 | 
21 |         title_tag = soup.select_one("#quizHeaderTitle h1")
22 |         if not isinstance(title_tag, Tag):
23 |             raise LNException("No title found")
24 | 
25 |         self.novel_title = title_tag.text.strip()
26 | 
27 |         image_tag = soup.select_one("meta[property='og:image']")
28 |         if isinstance(image_tag, Tag):
29 |             self.novel_cover = self.absolute_url(image_tag["content"])
30 | 
31 |         logger.info("Novel cover: %s", self.novel_cover)
32 | 
33 |         for a in soup.select("#rselectList a"):
34 |             self.chapters.append(
35 |                 {
36 |                     "id": len(self.chapters) + 1,
37 |                     "title": a.text.strip(),
38 |                     "url": self.absolute_url(a["href"]),
39 |                 }
40 |             )
41 | 
42 |     def download_chapter_body(self, chapter):
43 |         soup = self.get_soup(chapter["url"])
44 |         contents = soup.select_one("#rescontent")
45 |         self.cleaner.clean_contents(contents)
46 | 
47 |         return str(contents)
48 | 


--------------------------------------------------------------------------------
/sources/pt/centralnovel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.templates.mangastream import MangaStreamTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class CentralNovelCrawler(MangaStreamTemplate):
10 |     base_url = ["https://centralnovel.com/"]
11 | 
12 |     def initialize(self) -> None:
13 |         self.init_executor(ratelimit=2.99)
14 | 


--------------------------------------------------------------------------------
/sources/ru/bestmanga.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | from lncrawl.templates.madara import MadaraTemplate
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class BestMangaCrawler(MadaraTemplate):
10 |     has_manga = True
11 |     base_url = ["https://bestmanga.club/"]
12 | 


--------------------------------------------------------------------------------
/sources/ru/ifreedom.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class IfreedomCrawler(Crawler):
10 |     base_url = [
11 |         "https://ifreedom.su/",
12 |         "https://bookhamster.ru/"
13 |     ]
14 | 
15 |     def read_novel_info(self):
16 |         soup = self.get_soup(self.novel_url)
17 | 
18 |         possible_title = soup.select_one("h1.entry-title")
19 |         if possible_title:
20 |             self.novel_title = possible_title.get_text()
21 | 
22 |         logger.info("Novel title: %s", self.novel_title)
23 | 
24 |         possible_author = soup.select_one("span.dashicons-admin-users").next\
25 |             .next\
26 |             .next
27 |         if "Не указан" not in str(possible_author):
28 |             self.novel_author = possible_author.get_text()
29 |             logger.info("Novel author: %s", self.novel_author)
30 | 
31 |         possible_full_synopsis = soup.select_one("span.open-desc")
32 |         if possible_full_synopsis:
33 |             possible_full_synopsis = possible_full_synopsis["onclick"]
34 |             self.novel_synopsis = possible_full_synopsis.split("= '")[1].strip("';")
35 |         else:
36 |             self.novel_synopsis = soup.select_one("div.descr-ranobe").get_text()
37 | 
38 |         img_src = soup.select_one("div.img-ranobe img")
39 |         if img_src:
40 |             self.novel_cover = self.absolute_url(img_src["src"])
41 | 
42 |         for a in reversed(soup.select(".menu-ranobe a")):
43 |             chap_id = 1 + (len(self.chapters))
44 | 
45 |             self.chapters.append(
46 |                 {
47 |                     "id": chap_id,
48 |                     "title": a.text.strip(),
49 |                     "url": self.absolute_url(a['href'])
50 |                 }
51 |             )
52 | 
53 |     def download_chapter_body(self, chapter):
54 |         soup = self.get_soup(chapter["url"])
55 |         content = soup.select_one("div.entry-content")
56 |         return self.cleaner.extract_contents(content)
57 | 


--------------------------------------------------------------------------------
/sources/zh/daocaorenshuwu.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class Daocaorenshuwu(Crawler):
10 |     base_url = [
11 |         "https://www.daocaorenshuwu.com/",
12 |     ]
13 | 
14 |     def read_novel_info(self):
15 |         logger.debug("Visiting %s", self.novel_url)
16 |         soup = self.get_soup(self.novel_url)
17 | 
18 |         possible_title = soup.select_one(".container .book-info h1.book-name")
19 |         assert possible_title, "No novel title"
20 |         self.novel_title = possible_title.text
21 |         logger.info("Novel title: %s", self.novel_title)
22 | 
23 |         self.novel_author = soup.select(".container .media-body .row div")[
24 |             0
25 |         ].text.strip()
26 |         logger.info("Novel author: %s", self.novel_author)
27 | 
28 |         possible_image = soup.select_one(".container .media-left a img")
29 |         if possible_image:
30 |             self.novel_cover = self.absolute_url(possible_image["src"])
31 |         logger.info("Novel cover: %s", self.novel_cover)
32 | 
33 |         # Extract volume-wise ch
34 |         # apter entries
35 |         chapters = soup.select("#all-chapter a")
36 | 
37 |         for a in chapters:
38 |             chap_id = len(self.chapters) + 1
39 |             vol_id = 1 + len(self.chapters) // 100
40 |             if len(self.volumes) < vol_id:
41 |                 self.volumes.append({"id": vol_id})
42 |             self.chapters.append(
43 |                 {
44 |                     "id": chap_id,
45 |                     "volume": vol_id,
46 |                     "url": self.absolute_url(a["href"]),
47 |                     "title": a.text.strip() or ("Chapter %d" % chap_id),
48 |                 }
49 |             )
50 | 
51 |     def download_chapter_body(self, chapter):
52 |         soup = self.get_soup(chapter["url"])
53 |         contents = soup.select(".cont-text > p")
54 |         contents = [str(p) for p in contents if p.text.strip()]
55 |         return "".join(contents)
56 | 


--------------------------------------------------------------------------------
/sources/zh/powanjuan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class PowanjuanCrawler(Crawler):
10 |     base_url = "https://www.powanjuan.cc/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url, encoding='gb2312')
15 | 
16 |         possible_title = soup.select_one(".desc h1")
17 |         assert possible_title, "No novel title"
18 |         self.novel_title = possible_title.text.split('(')[0].strip()
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         possible_novel_author = soup.select_one('.descTip span')
22 |         if possible_novel_author:
23 |             self.novel_author = possible_novel_author.text.replace('作者：', '').strip()
24 |         logger.info("Novel author: %s", self.novel_author)
25 | 
26 |         possible_synopsis = soup.select_one('.descInfo p')
27 |         if possible_synopsis:
28 |             self.novel_synopsis = possible_synopsis.text
29 |         logger.info("Novel synopsis: %s", self.novel_synopsis)
30 | 
31 |         volumes = set([])
32 |         for a in soup.select(".catalog ul.clearfix li a"):
33 |             ch_id = len(self.chapters) + 1
34 |             vol_id = 1 + len(self.chapters) // 100
35 |             volumes.add(vol_id)
36 |             self.chapters.append(
37 |                 {
38 |                     "id": ch_id,
39 |                     "volume": vol_id,
40 |                     "title": a.text.strip(),
41 |                     "url": self.absolute_url(a["href"]),
42 |                 }
43 |             )
44 | 
45 |         self.volumes = [{"id": x, "title": ""} for x in volumes]
46 | 
47 |     def download_chapter_body(self, chapter):
48 |         soup = self.get_soup(chapter["url"], encoding='gb2312')
49 |         contents = soup.select_one("#mycontent")
50 |         return self.cleaner.extract_contents(contents)
51 | 


--------------------------------------------------------------------------------
/sources/zh/soxs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | from lncrawl.core.crawler import Crawler
 4 | 
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class Soxc(Crawler):
10 |     base_url = ["https://www.soxs.cc/"]
11 | 
12 |     def read_novel_info(self):
13 |         self.novel_url = self.novel_url.replace("/book/", "/")
14 |         self.novel_url = self.novel_url.replace(".html", "/")
15 |         soup = self.get_soup(self.novel_url)
16 | 
17 |         possible_title = soup.select_one(".xiaoshuo h1")
18 |         assert possible_title, "No novel title"
19 |         self.novel_title = possible_title.get_text()
20 |         logger.info(f"Novel title: {self.novel_title}")
21 | 
22 |         self.novel_author = soup.select_one(".xiaoshuo h6").get_text()
23 |         logger.info(f"Novel Author: {self.novel_author}")
24 | 
25 |         possible_novel_cover = soup.select_one(".book_cover img")
26 |         if possible_novel_cover:
27 |             self.novel_cover = self.absolute_url(possible_novel_cover["src"])
28 |         logger.info(f"Novel Cover: {self.novel_cover}")
29 | 
30 |         logger.info("Getting chapters...")
31 |         for chapter in soup.select(".novel_list dd a"):
32 |             url = self.absolute_url(chapter["href"])
33 |             chap_id = len(self.chapters) + 1
34 |             if len(self.chapters) % 100 == 0:
35 |                 vol_id = len(self.chapters) // 100 + 1
36 |                 self.volumes.append({"id": vol_id})
37 | 
38 |             self.chapters.append(
39 |                 {
40 |                     "id": chap_id,
41 |                     "url": url,
42 |                     "volume": vol_id,
43 |                 }
44 |             )
45 | 
46 |     def download_chapter_body(self, chapter):
47 |         soup = self.get_soup(chapter["url"])
48 |         title = soup.select_one(".read_title h1").text.strip()
49 |         chapter["title"] = title
50 | 
51 |         content = soup.select(".content")
52 |         content = "\n".join(str(p) for p in content)
53 |         content = content.replace(self.novel_url, "")
54 |         content = content.replace("soxscc", "mtlrealm.com ")
55 |         return content
56 | 


--------------------------------------------------------------------------------
/sources/zh/trxs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import logging
 3 | 
 4 | from lncrawl.core.crawler import Crawler
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class TrxsCrawler(Crawler):
10 |     base_url = "https://trxs.cc/"
11 | 
12 |     def read_novel_info(self):
13 |         logger.debug("Visiting %s", self.novel_url)
14 |         soup = self.get_soup(self.novel_url, encoding='gb2312')
15 | 
16 |         possible_title = soup.select_one(".book_info h1")
17 |         assert possible_title, "No novel title"
18 |         self.novel_title = possible_title.text
19 |         logger.info("Novel title: %s", self.novel_title)
20 | 
21 |         possible_novel_cover = soup.select_one('.book_info img')
22 |         if possible_novel_cover:
23 |             self.novel_cover = self.absolute_url(possible_novel_cover["src"])
24 |         logger.info("Novel cover: %s", self.novel_cover)
25 | 
26 |         possible_synopsis = soup.select_one('.book_info p')
27 |         if possible_synopsis:
28 |             self.novel_synopsis = possible_synopsis.text
29 |         logger.info("Novel synopsis %s", self.novel_synopsis)
30 | 
31 |         possible_novel_author = soup.select_one('.book_info a')
32 |         if possible_novel_author:
33 |             self.novel_author = possible_novel_author.text
34 |         logger.info("Novel author: %s", self.novel_author)
35 | 
36 |         volumes = set([])
37 |         for a in soup.select(".book_list a"):
38 |             ch_id = len(self.chapters) + 1
39 |             vol_id = 1 + len(self.chapters) // 100
40 |             volumes.add(vol_id)
41 |             self.chapters.append(
42 |                 {
43 |                     "id": ch_id,
44 |                     "volume": vol_id,
45 |                     "title": a.text,
46 |                     "url": self.absolute_url(a["href"]),
47 |                 }
48 |             )
49 | 
50 |         self.volumes = [{"id": x, "title": ""} for x in volumes]
51 | 
52 |     def download_chapter_body(self, chapter):
53 |         soup = self.get_soup(chapter["url"], encoding='gb2312')
54 |         contents = soup.select_one(".read_chapterDetail")
55 |         return self.cleaner.extract_contents(contents)
56 | 


--------------------------------------------------------------------------------