├── .idea ├── .gitignore ├── codeStyles │ ├── codeStyleConfig.xml │ └── Project.xml ├── misc.xml ├── vcs.xml ├── modules.xml ├── prettier.xml ├── dictionaries │ └── project.xml └── Ebook-API.iml ├── .env.example ├── .prettierrc ├── src ├── interfaces │ ├── firestoreBookstore.ts │ ├── bookstore.ts │ ├── general.ts │ ├── result.ts │ └── book.ts ├── stores │ ├── index.ts │ ├── likerLand.ts │ ├── kindle.ts │ ├── hyread.ts │ ├── pubu.ts │ ├── booksCompany.ts │ ├── kobo.ts │ ├── taaze.ts │ ├── playStore.ts │ ├── readmoo.ts │ └── bookWalker.ts ├── bot.ts ├── routers │ ├── bookstores.ts │ └── searches.ts ├── index.ts └── firestore.ts ├── .editorconfig ├── .vscode └── settings.json ├── tsconfig.json ├── .github └── workflows │ └── code-check.yml ├── README.md ├── package.json └── .gitignore /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | PORT=65432 2 | TOKEN= 3 | GROUPID= 4 | DBURL= 5 | FIREBASE_SERVICE_ACCOUNT_BASE64= 6 | READMOO_AP_ID= 7 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "all", 4 | "printWidth": 100, 5 | "endOfLine": "lf" 6 | } 7 | -------------------------------------------------------------------------------- /src/interfaces/firestoreBookstore.ts: -------------------------------------------------------------------------------- 1 | import { Bookstore } from './bookstore.js'; 2 | export interface FirestoreBookstore extends Bookstore { 3 | proxyUrl: string; 4 | } 5 | -------------------------------------------------------------------------------- /src/interfaces/bookstore.ts: -------------------------------------------------------------------------------- 1 | export interface Bookstore { 2 | id: string; 3 | displayName: string; 4 | website: string; 5 | isOnline: boolean; 6 | status: string; 7 | } 8 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/interfaces/general.ts: -------------------------------------------------------------------------------- 1 | export interface AnyObject { 2 | [key: string]: T; 3 | } 4 | const NS_PER_SEC = 1e9; 5 | const MS_PER_NS = 1e-6; 6 | 7 | export const getProcessTime = ([seconds, nanoseconds]: [number, number]) => { 8 | return (seconds * NS_PER_SEC + nanoseconds) * MS_PER_NS; 9 | }; 10 | -------------------------------------------------------------------------------- /src/interfaces/result.ts: -------------------------------------------------------------------------------- 1 | import { Book } from './book.js'; 2 | import { Bookstore } from './bookstore.js'; 3 | 4 | export interface Result { 5 | bookstore: Bookstore; 6 | isOkay: boolean; 7 | status: string; 8 | processTime: number; 9 | quantity: number; 10 | books: Book[] | []; 11 | error?: string; 12 | } 13 | -------------------------------------------------------------------------------- /.idea/prettier.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | charset = utf-8 3 | end_of_line = lf 4 | insert_final_newline = true 5 | trim_trailing_whitespace = true 6 | max_line_length = 100 7 | 8 | [*.md] 9 | indent_style = space 10 | indent_size = 2 11 | trim_trailing_whitespace = false 12 | 13 | [*.{js,jsx,ts,tsx}] 14 | indent_style = space 15 | indent_size = 2 16 | 17 | [*.json] 18 | indent_style = space 19 | indent_size = 4 20 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "DBURL", 4 | "Firestore", 5 | "GROUPID", 6 | "bookwalker", 7 | "hyread", 8 | "pubu", 9 | "readmoo", 10 | "taaze" 11 | ], 12 | "editor.formatOnSave": true, 13 | "editor.defaultFormatter": "esbenp.prettier-vscode", 14 | "typescript.tsdk": "node_modules/typescript/lib" 15 | } 16 | -------------------------------------------------------------------------------- /src/interfaces/book.ts: -------------------------------------------------------------------------------- 1 | export interface Book { 2 | id?: string; 3 | thumbnail?: string; 4 | title: string; 5 | link: string; 6 | priceCurrency?: string; 7 | price?: number; 8 | about?: string; 9 | publisher?: string; 10 | publishDate?: string; 11 | authors?: string[]; 12 | nonDrmPrice?: number; 13 | translator?: string; 14 | translators?: string[]; 15 | painters?: string[]; 16 | } 17 | -------------------------------------------------------------------------------- /.idea/dictionaries/project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | bookwalker 5 | dburl 6 | groupid 7 | hyread 8 | inited 9 | kobo 10 | likerLand 11 | marky 12 | pubu 13 | readmoo 14 | searchlist 15 | taaze 16 | 17 | 18 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "module": "ES2015", 5 | "outDir": "dist", 6 | "rootDir": "src", 7 | "strict": true, 8 | "noImplicitAny": true, 9 | "moduleResolution": "node", 10 | "resolveJsonModule": true, 11 | "esModuleInterop": true, 12 | "sourceMap": true, 13 | "baseUrl": ".", 14 | "paths": { 15 | "*": ["node_modules/*", "src/types/*"] 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/stores/index.ts: -------------------------------------------------------------------------------- 1 | export { default as booksCompany } from './booksCompany.js'; 2 | export { default as kobo } from './kobo.js'; 3 | export { default as taaze } from './taaze.js'; 4 | export { default as bookWalker } from './bookWalker.js'; 5 | export { default as readmoo } from './readmoo.js'; 6 | export { default as playStore } from './playStore.js'; 7 | export { default as pubu } from './pubu.js'; 8 | export { default as hyread } from './hyread.js'; 9 | export { default as kindle } from './kindle.js'; 10 | export { default as likerLand } from './likerLand.js'; 11 | -------------------------------------------------------------------------------- /.idea/Ebook-API.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.github/workflows/code-check.yml: -------------------------------------------------------------------------------- 1 | name: Code Check 2 | 3 | on: 4 | pull_request: 5 | paths-ignore: 6 | - '.github/**' 7 | - '.vscode/**' 8 | - '.idea/**' 9 | - '.husky/**' 10 | 11 | jobs: 12 | code-check: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 🛎️ 16 | uses: actions/checkout@v2 17 | with: 18 | persist-credentials: false 19 | 20 | - name: Setup Node 21 | uses: actions/setup-node@v2 22 | with: 23 | node-version: '16' 24 | 25 | - name: Install Dependencies 26 | run: npm ci --prefer-offline --ignore-scripts --progress=false --no-audit --no-fund 27 | 28 | - name: Format Check 29 | run: npm run format-check 30 | 31 | - name: Compile Test 32 | run: npm run build 33 | -------------------------------------------------------------------------------- /src/bot.ts: -------------------------------------------------------------------------------- 1 | import TelegramBot from 'node-telegram-bot-api'; 2 | 3 | let bot: TelegramBot; 4 | let groupId: string; 5 | 6 | export const botInit = (token: string, group: string) => { 7 | return new Promise((resolve, reject) => { 8 | if (bot) { 9 | return reject('bot is already inited.'); 10 | } 11 | 12 | bot = new TelegramBot(token, { 13 | polling: false, 14 | request: { 15 | url: 'https://api.telegram.org', 16 | agentOptions: { 17 | keepAlive: true, 18 | family: 4, 19 | }, 20 | }, 21 | }); 22 | groupId = group; 23 | 24 | resolve(); 25 | }).catch((error) => { 26 | if (error) { 27 | console.error(error); 28 | } 29 | }); 30 | }; 31 | 32 | export const sendMessage = (message: string) => { 33 | return bot 34 | .sendMessage(groupId, message, { parse_mode: 'Markdown' }) 35 | .catch((error) => console.error(error)); 36 | }; 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TaiwanEbookSearch API 2 | 3 | ![](https://media.giphy.com/media/ggj6JI9uKxTaBB81Yo/giphy.gif) 4 | 5 | ## Prerequisite 6 | 7 | 1. Install [Git latest version](https://git-scm.com) and [Node.js latest LTS version](https://nodejs.org) 8 | 2. Get codebase 9 | ```bash 10 | $ git clone git@github.com:Taiwan-Ebook-Lover/TaiwanEbookSearch-API.git 11 | ``` 12 | 3. Install dependencies 13 | ```bash 14 | $ cd TaiwanEbookSearch-API 15 | $ npm install 16 | ``` 17 | 18 | ## Environment variables 19 | 20 | Copy `.env.example` to `.env` to customize those environment variables. 21 | 22 | ### The `.env` structure 23 | 24 | - `PORT` => local serve port number. 25 | - `TOKEN` => Telegram bot token. 26 | - `GROUPID` => Telegram group id. 27 | - `DBURL` => Firebase project URL. 28 | - `FIREBASE_SERVICE_ACCOUNT_BASE64` => BASE64 string of firebase service account file. (Tool script: `$ npm run convert-firebase-config -- -in serviceAccount.json`) 29 | - `READMOO_AP_ID` => Readmoo affiliate program id. 30 | 31 | ## Build & Serve 32 | 33 | proxy to local server: 34 | 35 | ```bash 36 | npm run build 37 | npm start 38 | ``` 39 | -------------------------------------------------------------------------------- /src/routers/bookstores.ts: -------------------------------------------------------------------------------- 1 | import { Router } from 'express'; 2 | import { getBookstores } from '../firestore.js'; 3 | 4 | const bookstoresRouter = Router(); 5 | 6 | bookstoresRouter.get('/', (req, res, next) => { 7 | getBookstores() 8 | .then((bookstores) => { 9 | return res.status(200).send(bookstores.map(({ proxyUrl, ...bookstore }) => bookstore)); 10 | }) 11 | .catch((error) => { 12 | console.time('Error time: '); 13 | console.error(error); 14 | 15 | return res.status(503).send({ 16 | message: 'Something is wrong...', 17 | }); 18 | }); 19 | }); 20 | 21 | bookstoresRouter.get('/:id', (req, res, next) => { 22 | const bookstoreId: string = req.params.id; 23 | getBookstores(bookstoreId) 24 | .then((bookstores) => { 25 | if (bookstores.length == 0) { 26 | return res.status(400).send({ 27 | message: `Bookstore ${bookstoreId} is invalid.`, 28 | }); 29 | } 30 | const { proxyUrl, ...bookstore } = bookstores[0]; 31 | return res.status(200).send(bookstore); 32 | }) 33 | .catch((error) => { 34 | console.time('Error time: '); 35 | console.error(error); 36 | 37 | return res.status(503).send({ 38 | message: 'Something is wrong...', 39 | }); 40 | }); 41 | }); 42 | 43 | export { bookstoresRouter }; 44 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import express, { RequestHandler } from 'express'; 2 | import cors from 'cors'; 3 | import compression from 'compression'; 4 | 5 | import 'dotenv/config'; 6 | 7 | import { botInit } from './bot.js'; 8 | import { connect } from './firestore.js'; 9 | import { searchesRouter } from './routers/searches.js'; 10 | import { bookstoresRouter } from './routers/bookstores.js'; 11 | import { ServiceAccount } from 'firebase-admin'; 12 | 13 | const app: express.Application = express(); 14 | 15 | const init = () => { 16 | // Telegram bot is coming 17 | botInit(process.env.TOKEN as string, process.env.GROUPID as string); 18 | 19 | // Database is coming too 20 | const firebaseUrl: string = process.env.DBURL ?? ''; 21 | const serviceAccount: ServiceAccount = JSON.parse( 22 | Buffer.from(process.env.FIREBASE_SERVICE_ACCOUNT_BASE64 as string, 'base64').toString(), 23 | ); 24 | 25 | connect(firebaseUrl, serviceAccount); 26 | 27 | /** 28 | * Build db, Server 29 | */ 30 | 31 | app.listen(process.env.PORT, () => { 32 | console.log(`listening on http://localhost:${process.env.PORT}`); 33 | }); 34 | }; 35 | 36 | // compress all responses 37 | app.use(compression()); 38 | 39 | // for parsing application/json 40 | app.use(express.json() as RequestHandler); 41 | 42 | // for parsing application/x-www-form-urlencoded 43 | app.use(express.urlencoded({ extended: true }) as RequestHandler); 44 | 45 | // for cors 46 | app.use( 47 | cors({ 48 | methods: ['GET', 'POST', 'PATCH', 'OPTION', 'DELETE'], 49 | credentials: true, 50 | origin: true, 51 | }), 52 | ); 53 | 54 | /** 55 | * Route 56 | */ 57 | 58 | app.use('/searches', searchesRouter); 59 | 60 | app.use('/bookstores', bookstoresRouter); 61 | 62 | /** 63 | * Error Handler 64 | */ 65 | 66 | app.get('*', (req, res) => { 67 | return res.status(405).send({ 68 | message: 'Method Not Allowed!', 69 | }); 70 | }); 71 | 72 | init(); 73 | -------------------------------------------------------------------------------- /.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 39 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "taiwan-ebook-search-api", 3 | "version": "1.2.0", 4 | "description": "API", 5 | "main": "./dist/index.ts", 6 | "type": "module", 7 | "scripts": { 8 | "convert-firebase-config": "openssl base64 -A", 9 | "start": "node ./dist/index", 10 | "build": "tsc", 11 | "tsc": "tsc", 12 | "dev": "tsc -w & nodemon ./dist/index", 13 | "format": "prettier --write \"src/**/*.{js,ts,md,json,yml,yaml}\"", 14 | "format-check": "prettier --check \"src/**/*.{js,ts,md,json,yml,yaml}\"" 15 | }, 16 | "repository": { 17 | "type": "git", 18 | "url": "git+https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API.git" 19 | }, 20 | "author": "Yuer Lee ", 21 | "contributors": [ 22 | "Safefly Tsai (https://safefly.tw)" 23 | ], 24 | "license": "MIT", 25 | "dependencies": { 26 | "cheerio": "1.1.2", 27 | "compression": "^1.8.1", 28 | "cors": "^2.8.5", 29 | "date-fns": "^4.1.0", 30 | "dotenv": "^17.2.3", 31 | "express": "^4.21.2", 32 | "firebase-admin": "^13.5.0", 33 | "https-proxy-agent": "^7.0.6", 34 | "node-fetch": "^3.3.2", 35 | "node-telegram-bot-api": "~0.66.0", 36 | "timeout-signal": "^2.0.0", 37 | "ua-parser-js": "^1.0.41" 38 | }, 39 | "bugs": { 40 | "url": "https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API/issues" 41 | }, 42 | "homepage": "https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API", 43 | "devDependencies": { 44 | "@types/compression": "^1.8.1", 45 | "@types/cors": "^2.8.19", 46 | "@types/express": "^4.17.25", 47 | "@types/morgan": "^1.9.10", 48 | "@types/node": "^24.9.2", 49 | "@types/node-fetch": "^3.0.3", 50 | "@types/node-telegram-bot-api": "^0.64.12", 51 | "@types/ua-parser-js": "~0.7.39", 52 | "nodemon": "^3.1.10", 53 | "prettier": "3.3.3", 54 | "typescript": "~5.9.3" 55 | }, 56 | "lint-staged": { 57 | "*.{js,ts,md,json,yml,yaml}": "prettier --write" 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/firestore.ts: -------------------------------------------------------------------------------- 1 | import admin, { ServiceAccount } from 'firebase-admin'; 2 | import { FirestoreBookstore } from './interfaces/firestoreBookstore.js'; 3 | import { AnyObject } from './interfaces/general.js'; 4 | 5 | export let firestore: FirebaseFirestore.Firestore; 6 | 7 | export const connect = (url: string, serviceAccount: ServiceAccount): Promise => { 8 | return new Promise((resolve, reject) => { 9 | // check firestore connected status 10 | if (firestore) { 11 | reject('DB is already connected.'); 12 | } else { 13 | admin.initializeApp({ 14 | credential: admin.credential.cert(serviceAccount), 15 | databaseURL: url, 16 | }); 17 | resolve(admin.firestore()); 18 | } 19 | }) 20 | .then((connection: FirebaseFirestore.Firestore) => { 21 | // update firestore 22 | firestore = connection; 23 | }) 24 | .catch((error) => { 25 | console.time('Error time: '); 26 | console.error(error); 27 | }); 28 | }; 29 | 30 | export const getBookstores = (bookstoreId?: string): Promise => { 31 | const bookstores: FirestoreBookstore[] = []; 32 | let bookstoreRef: FirebaseFirestore.Query; 33 | 34 | if (bookstoreId) { 35 | bookstoreRef = firestore.collection('bookstores').where('id', '==', bookstoreId); 36 | } else { 37 | bookstoreRef = firestore.collection('bookstores'); 38 | } 39 | 40 | return bookstoreRef 41 | .get() 42 | .then((snapshot: FirebaseFirestore.QuerySnapshot) => { 43 | if (snapshot.empty) { 44 | throw Error('No matching bookstore.'); 45 | } 46 | for (const bookstore of snapshot.docs) { 47 | const bookstoreData = bookstore.data() as FirestoreBookstore; 48 | bookstores.push(bookstoreData); 49 | } 50 | return bookstores; 51 | }) 52 | .catch((error) => { 53 | console.time('Error time: '); 54 | console.error(error); 55 | return bookstores; 56 | }); 57 | }; 58 | 59 | export const insertSearch = (data: AnyObject): Promise => { 60 | const formattedData = JSON.parse( 61 | JSON.stringify(data, (key, value) => (value === undefined ? null : value)), 62 | ); 63 | return firestore 64 | .collection('searches') 65 | .add(formattedData) 66 | .then(({ id }) => ({ ...formattedData, id })) 67 | .catch((error) => { 68 | console.time('Error time: '); 69 | console.error(error); 70 | return ''; 71 | }); 72 | }; 73 | 74 | export const getSearch = (id: string): Promise> => { 75 | return firestore 76 | .collection('searches') 77 | .doc(id) 78 | .get() 79 | .then((doc) => { 80 | if (!doc.exists) { 81 | throw Error('No matching bookstore.'); 82 | } 83 | return { ...doc.data(), id }; 84 | }) 85 | .catch((error) => { 86 | console.time('Error time: '); 87 | console.error(error); 88 | return {}; 89 | }); 90 | }; 91 | -------------------------------------------------------------------------------- /src/stores/likerLand.ts: -------------------------------------------------------------------------------- 1 | import fetch from 'node-fetch'; 2 | import timeoutSignal from 'timeout-signal'; 3 | 4 | import pkg from 'https-proxy-agent'; 5 | const { HttpsProxyAgent } = pkg; 6 | 7 | import { Book } from '../interfaces/book.js'; 8 | import { Result } from '../interfaces/result.js'; 9 | import { getProcessTime } from '../interfaces/general.js'; 10 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 11 | 12 | export default ( 13 | { proxyUrl, ...bookstore }: FirestoreBookstore, 14 | keywords = '', 15 | userAgent: string, 16 | ) => { 17 | // start calc process time 18 | const hrStart = process.hrtime(); 19 | 20 | if (!bookstore.isOnline) { 21 | const hrEnd = process.hrtime(hrStart); 22 | const processTime = getProcessTime(hrEnd); 23 | const result: Result = { 24 | bookstore, 25 | isOkay: false, 26 | status: 'Bookstore is offline', 27 | processTime, 28 | books: [], 29 | quantity: 0, 30 | }; 31 | 32 | return result; 33 | } 34 | 35 | // URL encode 36 | keywords = encodeURIComponent(keywords); 37 | const base = `https://api.like.co/likernft/book/store/search?q=${keywords}`; 38 | 39 | const options = { 40 | method: 'GET', 41 | compress: true, 42 | signal: timeoutSignal(10000), 43 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 44 | headers: { 45 | 'User-Agent': `${userAgent}`, 46 | }, 47 | }; 48 | 49 | return fetch(base, options) 50 | .then((response) => { 51 | if (!response.ok) { 52 | throw response.statusText; 53 | } 54 | 55 | return response.json(); 56 | }) 57 | .then((data) => { 58 | // calc process time 59 | const hrEnd = process.hrtime(hrStart); 60 | const processTime = getProcessTime(hrEnd); 61 | const books: Book[] = (data as any).list.map((item: any) => { 62 | const { iscnId, imageUrl, name, url, minPrice, description, ownerName } = item; 63 | return { 64 | id: iscnId, 65 | thumbnail: imageUrl, 66 | title: name, 67 | link: url, 68 | priceCurrency: 'USD', 69 | price: minPrice, 70 | about: description, 71 | authors: [ownerName], 72 | }; 73 | }); 74 | const result: Result = { 75 | bookstore, 76 | isOkay: true, 77 | status: 'Crawler success.', 78 | processTime, 79 | books, 80 | quantity: books.length, 81 | }; 82 | 83 | return result; 84 | }) 85 | .catch((error) => { 86 | // calc process time 87 | const hrEnd = process.hrtime(hrStart); 88 | const processTime = getProcessTime(hrEnd); 89 | 90 | console.log(error.message); 91 | 92 | const result: Result = { 93 | bookstore, 94 | isOkay: false, 95 | status: 'Crawler failed.', 96 | processTime, 97 | books: [], 98 | quantity: 0, 99 | error: error.message, 100 | }; 101 | 102 | return result; 103 | }); 104 | }; 105 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # firebase service account json 2 | serviceAccount.json 3 | 4 | # Paw API file 5 | *.paw 6 | 7 | # Logs 8 | logs 9 | *.log 10 | npm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | 14 | # Runtime data 15 | pids 16 | *.pid 17 | *.seed 18 | *.pid.lock 19 | 20 | # Directory for instrumented libs generated by jscoverage/JSCover 21 | lib-cov 22 | 23 | # Coverage directory used by tools like istanbul 24 | coverage 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | dist/ 41 | 42 | # Dependency directories 43 | node_modules/ 44 | jspm_packages/ 45 | 46 | # Typescript v1 declaration files 47 | typings/ 48 | 49 | # Optional npm cache directory 50 | .npm 51 | 52 | # Optional eslint cache 53 | .eslintcache 54 | 55 | # Optional REPL history 56 | .node_repl_history 57 | 58 | # Output of 'npm pack' 59 | *.tgz 60 | 61 | # Yarn Integrity file 62 | .yarn-integrity 63 | 64 | # dotenv environment variables file 65 | .env 66 | 67 | # General 68 | .DS_Store 69 | .AppleDouble 70 | .LSOverride 71 | 72 | # Icon must end with two \r 73 | Icon 74 | 75 | # Thumbnails 76 | ._* 77 | 78 | # Files that might appear in the root of a volume 79 | .DocumentRevisions-V100 80 | .fseventsd 81 | .Spotlight-V100 82 | .TemporaryItems 83 | .Trashes 84 | .VolumeIcon.icns 85 | .com.apple.timemachine.donotpresent 86 | 87 | # Directories potentially created on remote AFP share 88 | .AppleDB 89 | .AppleDesktop 90 | Network Trash Folder 91 | Temporary Items 92 | .apdisk 93 | 94 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 95 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 96 | 97 | # User-specific stuff 98 | .idea/**/workspace.xml 99 | .idea/**/tasks.xml 100 | .idea/**/usage.statistics.xml 101 | .idea/**/shelf 102 | .idea/dictionaries/** 103 | !.idea/dictionaries/project.xml 104 | 105 | # Generated files 106 | .idea/**/contentModel.xml 107 | 108 | # Sensitive or high-churn files 109 | .idea/**/dataSources/ 110 | .idea/**/dataSources.ids 111 | .idea/**/dataSources.local.xml 112 | .idea/**/sqlDataSources.xml 113 | .idea/**/dynamic.xml 114 | .idea/**/uiDesigner.xml 115 | .idea/**/dbnavigator.xml 116 | 117 | # Gradle 118 | .idea/**/gradle.xml 119 | .idea/**/libraries 120 | 121 | # Gradle and Maven with auto-import 122 | # When using Gradle or Maven with auto-import, you should exclude module files, 123 | # since they will be recreated, and may cause churn. Uncomment if using 124 | # auto-import. 125 | # .idea/artifacts 126 | # .idea/compiler.xml 127 | # .idea/jarRepositories.xml 128 | # .idea/modules.xml 129 | # .idea/*.iml 130 | # .idea/modules 131 | # *.iml 132 | # *.ipr 133 | 134 | # CMake 135 | cmake-build-*/ 136 | 137 | # Mongo Explorer plugin 138 | .idea/**/mongoSettings.xml 139 | 140 | # File-based project format 141 | *.iws 142 | 143 | # IntelliJ 144 | out/ 145 | 146 | # mpeltonen/sbt-idea plugin 147 | .idea_modules/ 148 | 149 | # JIRA plugin 150 | atlassian-ide-plugin.xml 151 | 152 | # Cursive Clojure plugin 153 | .idea/replstate.xml 154 | 155 | # Sonarlint plugin 156 | .idea/sonarlint/ 157 | 158 | # Crashlytics plugin (for Android Studio and IntelliJ) 159 | com_crashlytics_export_strings.xml 160 | crashlytics.properties 161 | crashlytics-build.properties 162 | fabric.properties 163 | 164 | # Editor-based Rest Client 165 | .idea/httpRequests 166 | 167 | # Android studio 3.1+ serialized cache file 168 | .idea/caches/build_file_checksums.ser 169 | 170 | -------------------------------------------------------------------------------- /src/stores/kindle.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://www.amazon.com/s?k=${keywords}&i=digital-text`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body)); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI) { 99 | const noEbookFilter = $('#s-refinements').children('div').first().children().length === 0; 100 | 101 | if (noEbookFilter) { 102 | // Avoid returning results from all categories if no ebook matches. 103 | return []; 104 | } 105 | 106 | const $list = $('.s-main-slot').children('.s-result-item'); 107 | 108 | let books: Book[] = []; 109 | 110 | if ($list.length === 0) { 111 | // console.log('Not found in kindle!'); 112 | 113 | return books; 114 | } 115 | 116 | $list.each((i, elem) => { 117 | const id = $(elem).attr('data-asin'); 118 | 119 | if (!id) { 120 | return; 121 | } 122 | 123 | const $h2 = $(elem).find('h2'); 124 | books.push({ 125 | id, 126 | title: $h2.text().trim(), 127 | price: parseFloat($(elem).find('.a-price .a-offscreen').eq(0).text().replace('$', '')), 128 | priceCurrency: 'USD', 129 | link: `https://www.amazon.com${$h2.find('a').attr('href')}`, 130 | thumbnail: $(elem).find('img').attr('src'), 131 | }); 132 | }); 133 | 134 | return books; 135 | } 136 | -------------------------------------------------------------------------------- /src/stores/hyread.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://ebook.hyread.com.tw/searchList.jsp?search_field=FullText&MZAD=0&search_input=${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | cookie: 'notBot=1', 48 | }, 49 | }; 50 | 51 | return fetch(base, options) 52 | .then((response) => { 53 | if (!response.ok) { 54 | throw response.statusText; 55 | } 56 | 57 | return response.text(); 58 | }) 59 | .then((body) => { 60 | return _getBooks(cheerio.load(body), base); 61 | }) 62 | .then((books) => { 63 | // calc process time 64 | const hrEnd = process.hrtime(hrStart); 65 | const processTime = getProcessTime(hrEnd); 66 | const result: Result = { 67 | bookstore, 68 | isOkay: true, 69 | status: 'Crawler success.', 70 | processTime, 71 | books, 72 | quantity: books.length, 73 | }; 74 | 75 | return result; 76 | }) 77 | .catch((error) => { 78 | // calc process time 79 | const hrEnd = process.hrtime(hrStart); 80 | const processTime = getProcessTime(hrEnd); 81 | 82 | console.log(error.message); 83 | 84 | const result: Result = { 85 | bookstore, 86 | isOkay: false, 87 | status: 'Crawler failed.', 88 | processTime, 89 | books: [], 90 | quantity: 0, 91 | error: error.message, 92 | }; 93 | 94 | return result; 95 | }); 96 | }; 97 | 98 | // parse 找書 99 | function _getBooks($: cheerio.CheerioAPI, base: string) { 100 | const $books = $('.book-wrap'); 101 | 102 | let books: Book[] = []; 103 | 104 | // 找不到就是沒這書 105 | if (!$books.length) { 106 | // console.log('Not found in hyread!'); 107 | 108 | return books; 109 | } 110 | 111 | $books.each((i, elem) => { 112 | const book = { 113 | id: ($(elem).children('.book-title-01').children('a').prop('href') ?? '').replace( 114 | /bookDetail.jsp\?id=/, 115 | '', 116 | ), 117 | thumbnail: $(elem) 118 | .children('.book-cover') 119 | .children('.book-overlay') 120 | .children('.book-link') 121 | .children('.coverBox') 122 | .children('.bookPic') 123 | .prop('src'), 124 | title: $(elem).children('.book-title-01').children('a').text(), 125 | link: new URL( 126 | $(elem).children('.book-title-01').children('a').prop('href') ?? '', 127 | base, 128 | ).toString(), 129 | priceCurrency: 'TWD', 130 | price: parseFloat($(elem).children('.book-money').children('.book-price').text()) || -1, 131 | // about: , 132 | }; 133 | 134 | books.push(book); 135 | }); 136 | 137 | return books; 138 | } 139 | -------------------------------------------------------------------------------- /src/stores/pubu.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://www.pubu.com.tw/search?sort=0&orderBy=&haveBOOK=true&haveMAGAZINE=false&haveMEDIA=false&q=${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body), base); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI, base: string) { 99 | const $list = $('#search-list-content').children('div').children('article'); 100 | 101 | let books: Book[] = []; 102 | 103 | // 找不到就是沒這書 104 | if (!$list.length) { 105 | // console.log('Not found in Pubu!'); 106 | return books; 107 | } 108 | 109 | $list.each((i, elem) => { 110 | // 價格列表包(部分書籍有一般版與下載版兩種價格) 111 | const $priceList = $(elem).find('.info-price').children('div'); 112 | 113 | const id = $(elem).find('.cover').children('a').prop('data-ecga'); 114 | 115 | let book: Book = { 116 | id, 117 | thumbnail: $(elem).find('.cover').children('a').children('img').prop('data-src'), 118 | title: $(elem).find('.cover').children('a').children('img').prop('title'), 119 | link: id ? new URL(`ebook/${id}`, base).toString() : '', 120 | priceCurrency: 'TWD', 121 | price: parseFloat($priceList.eq(0).children('span').text().replace('NT$', '')) || -1, 122 | authors: [ 123 | ...$(elem) 124 | .find('.info-others') 125 | .children('a.author') 126 | .text() 127 | .trim() 128 | .split(/, |,|、|,|//g) 129 | .map((author) => { 130 | // 特別分工的作者,改變格式 131 | const authorSplit = author.split(':'); 132 | 133 | if (authorSplit.length > 1) { 134 | author = `${authorSplit[1]}(${authorSplit[0]})`; 135 | } 136 | 137 | return author; 138 | }), 139 | ].flat(Infinity), 140 | publisher: $(elem).find('.info-others').children('a:not(.author)').text().trim(), 141 | }; 142 | // 有多種價格,則為下載版 143 | if ($priceList.length > 1) { 144 | book.nonDrmPrice = parseFloat($priceList.eq(1).children('span').text()); 145 | } 146 | 147 | books[i] = book; 148 | }); 149 | 150 | return books; 151 | } 152 | -------------------------------------------------------------------------------- /src/stores/booksCompany.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://search.books.com.tw/search/query/cat/6/sort/1/v/0/page/1/spell/1/key/${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body)); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI) { 99 | const $list = $('#itemlist_table > tbody'); 100 | 101 | let books: Book[] = []; 102 | 103 | if (!$list.length) { 104 | // console.log('Not found in books company!'); 105 | 106 | return books; 107 | } 108 | 109 | $list.each((i, elem) => { 110 | let authors: string[] = []; 111 | 112 | $('a[rel=go_author]', elem).each((i, e) => { 113 | authors = authors.concat($(e).prop('title').split('、')); 114 | }); 115 | 116 | const id = ($(elem).attr('id') ?? '').match(/(?<=itemlist_)\S*/)?.[0] ?? ''; 117 | 118 | const price = parseFloat( 119 | $('.list-nav', elem) 120 | .children('li') 121 | .children('strong') 122 | .last() 123 | .text() 124 | .replace(/NT\$|,/g, ''), 125 | ); 126 | 127 | const rawThumbnailUrl: string = $('.box_1', elem) 128 | .children('a') 129 | .children('img') 130 | .prop('data-src'); 131 | const thumbnailUrl = _getBooksCompanyThumbnail(rawThumbnailUrl); 132 | 133 | books[i] = { 134 | id, 135 | thumbnail: thumbnailUrl, 136 | title: $('a[rel=mid_name]', elem).prop('title'), 137 | link: `https://www.books.com.tw/products/${id}`, 138 | priceCurrency: 'TWD', 139 | price: price >= 0 ? price : -1, 140 | about: $('.txt_cont', elem) 141 | .children('p') 142 | .text() 143 | .replace(/...... more\n\t\t\t\t\t\t\t\t/g, ' ...'), 144 | publisher: $('a[rel=mid_publish]', elem).prop('title'), 145 | }; 146 | 147 | if (authors.length > 0) { 148 | books[i].authors = authors; 149 | } 150 | }); 151 | 152 | return books; 153 | } 154 | 155 | function _getBooksCompanyThumbnail(url: string) { 156 | const thumbnailRegexPattern = /.+\/getImage\?i=(https:\/\/.+\.jpg).+/; 157 | const match = url.match(thumbnailRegexPattern); 158 | const thumbnailUrl = match ? match[1] : 'null'; 159 | return thumbnailUrl; 160 | } 161 | -------------------------------------------------------------------------------- /src/stores/kobo.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://www.kobo.com/tw/zh/search?fcmedia=Book&Query=${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body), base); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI, base: string) { 99 | const $list = $('ul[class=result-items] li'); 100 | 101 | let books: Book[] = []; 102 | 103 | // 找不到就是沒這書 104 | if ($list.length === 0) { 105 | // console.log('Not found in kobo!'); 106 | 107 | return books; 108 | } 109 | 110 | $list.each((i, elem) => { 111 | // 從 script elem 拉 JSON data 112 | const info = JSON.parse( 113 | $(elem).children('.item-detail').children('script').html() || '{ data: null }', 114 | ).data; 115 | 116 | // 若有副標題,併入主標題 117 | let title = info.name; 118 | if (info.alternativeHeadline) { 119 | title += ` - ${info.alternativeHeadline}`; 120 | } 121 | 122 | const authors = ( 123 | eval( 124 | $(elem) 125 | .children('.item-detail') 126 | .children('.item-info') 127 | .children('.contributors') 128 | .children('.synopsis-contributors') 129 | .children('.synopsis-text') 130 | .children('.contributor-name') 131 | .data('track-info') as string, 132 | )?.author ?? '' 133 | ).split('、'); 134 | 135 | // 價格要先檢查是否為免費 136 | const $priceField = $(elem).children('.item-detail').children('.item-info').children('.price'); 137 | 138 | let price = 0; 139 | if (!$priceField.hasClass('free')) { 140 | price = 141 | parseFloat( 142 | $priceField 143 | .children('span') 144 | .children('span') 145 | .first() 146 | .text() 147 | .replace(/NT\$|,|\s/g, ''), 148 | ) || -1; 149 | } 150 | 151 | books[i] = { 152 | id: info.isbn, 153 | thumbnail: new URL(info.thumbnailUrl, base).toString(), 154 | title, 155 | link: info.url, 156 | priceCurrency: $(elem) 157 | .children('.item-detail') 158 | .children('.item-info') 159 | .children('.price') 160 | .children('span') 161 | .children('.currency') 162 | .text(), 163 | price, 164 | about: info.description ? `${info.description} ...` : undefined, 165 | // publisher 166 | }; 167 | 168 | if (authors?.length > 0) { 169 | books[i].authors = authors; 170 | } 171 | }); 172 | 173 | return books; 174 | } 175 | -------------------------------------------------------------------------------- /src/stores/taaze.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://www.taaze.tw/rwd_searchResult.html?keyType%5B%5D=1&prodKind=4&catFocus=14&keyword%5B%5D=${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | const books: Book[] = _getBooks(cheerio.load(body)); 60 | 61 | if (!books.length) { 62 | return books; 63 | } else { 64 | return _getBooksInfo(books); 65 | } 66 | }) 67 | .then((books) => { 68 | // calc process time 69 | const hrEnd = process.hrtime(hrStart); 70 | const processTime = getProcessTime(hrEnd); 71 | const result: Result = { 72 | bookstore, 73 | isOkay: true, 74 | status: 'Crawler success.', 75 | processTime, 76 | books, 77 | quantity: books.length, 78 | }; 79 | 80 | return result; 81 | }) 82 | .catch((error) => { 83 | // calc process time 84 | const hrEnd = process.hrtime(hrStart); 85 | const processTime = getProcessTime(hrEnd); 86 | 87 | console.log(error.message); 88 | 89 | const result: Result = { 90 | bookstore, 91 | isOkay: false, 92 | status: 'Crawler failed.', 93 | processTime, 94 | books: [], 95 | quantity: 0, 96 | error: error.message, 97 | }; 98 | 99 | return result; 100 | }); 101 | }; 102 | 103 | function _getBooksInfo(books: Book[] = []) { 104 | return Promise.all(books.map((book) => _getBookInfo(book.id))).then((infos) => { 105 | for (let i in books) { 106 | books[i].title = infos[i].booktitle; 107 | books[i].about = infos[i].bookprofile.replace(/\r/g, ''); 108 | books[i].publisher = infos[i].publisher; 109 | books[i].publishDate = infos[i].publishdate; 110 | books[i].price = parseFloat(infos[i].saleprice) || -1; 111 | 112 | if (infos[i].authors) { 113 | books[i].authors = infos[i].authors; 114 | } 115 | 116 | if (infos[i].translator) { 117 | books[i].translator = infos[i].translator; 118 | books[i].translators = [infos[i].translator]; 119 | } 120 | } 121 | 122 | return books; 123 | }); 124 | } 125 | 126 | // parse 找書 127 | function _getBooks($: cheerio.CheerioAPI) { 128 | const $list = $('#listView').children('.media'); 129 | 130 | let books: Book[] = []; 131 | 132 | if ($list.length === 0) { 133 | // console.log('Not found in taaze!'); 134 | 135 | return books; 136 | } 137 | 138 | $list.each((i, elem) => { 139 | const id = $(elem).prop('rel'); 140 | 141 | books[i] = { 142 | id, 143 | thumbnail: `https://media.taaze.tw/showLargeImage.html?sc=${id}`, 144 | title: id, //info.booktitle 145 | link: `https://www.taaze.tw/goods/${id}.html`, 146 | priceCurrency: 'TWD', 147 | // price: saleprice , 148 | // about: info.bookprofile, 149 | // publisher: info.publisher, 150 | // publishDate: info.publishdate, 151 | // authors: info.author, 152 | }; 153 | }); 154 | 155 | return books; 156 | } 157 | 158 | // 單本書部分資料 159 | function _getBookInfo(id = '') { 160 | const base = `https://www.taaze.tw/new_ec/rwd/lib/searchbookAgent.jsp?prodId=${id}`; 161 | 162 | const options = { 163 | method: 'GET', 164 | compress: true, 165 | signal: timeoutSignal(10000), 166 | headers: { 167 | 'User-Agent': 'Taiwan-Ebook-Search/0.1', 168 | }, 169 | }; 170 | 171 | return fetch(base, options) 172 | .then((response) => response.json()) 173 | .then((info) => (info as any[])[0]); 174 | } 175 | -------------------------------------------------------------------------------- /src/routers/searches.ts: -------------------------------------------------------------------------------- 1 | import { Router } from 'express'; 2 | import { UAParser } from 'ua-parser-js'; 3 | import { format } from 'date-fns'; 4 | 5 | import { sendMessage } from '../bot.js'; 6 | import { firestore, insertSearch, getSearch, getBookstores } from '../firestore.js'; 7 | 8 | import { AnyObject, getProcessTime } from '../interfaces/general.js'; 9 | import { Bookstore } from '../interfaces/bookstore.js'; 10 | import { 11 | readmoo, 12 | booksCompany, 13 | kobo, 14 | taaze, 15 | bookWalker, 16 | playStore, 17 | pubu, 18 | hyread, 19 | kindle, 20 | likerLand, 21 | } from '../stores/index.js'; 22 | 23 | const bookstoreModel: AnyObject = { 24 | readmoo, 25 | booksCompany, 26 | kobo, 27 | taaze, 28 | bookWalker, 29 | playStore, 30 | pubu, 31 | hyread, 32 | kindle, 33 | likerLand, 34 | }; 35 | 36 | const searchesRouter = Router(); 37 | 38 | const _telegramPrettier = (data: AnyObject): string => { 39 | const results: [] = data.results.map(({ books, ...result }: AnyObject) => result); 40 | return ` 41 | Keywords: *${data.keywords}* 42 | Search Time: ${data.searchDateTime} 43 | Process Time: ${Math.round((data.processTime / 1000) * 100) / 100}s 44 | Total: ${data.totalQuantity} 45 | User Agent: ${data.userAgent.ua} 46 | Search ID: \`${data.id}\` 47 | Link: [🔗](https://taiwan-ebook-lover.github.io/searches/${data.id}) 48 | Bookstore Result: ${results.map( 49 | ({ bookstore, isOkay, quantity, processTime }: AnyObject): string => ` 50 | ${isOkay ? '✅' : '❌'} ${bookstore.displayName} (${quantity} | ${ 51 | Math.round((processTime / 1000) * 100) / 100 52 | }s)`, 53 | )} 54 | `; 55 | }; 56 | 57 | searchesRouter.post('/', async (req, res, next) => { 58 | // start calc process time 59 | const hrStart = process.hrtime(); 60 | const searchDateTime = new Date(); 61 | 62 | const keywords = req.query.q; 63 | const bookstoresRequest: string[] = (req.query.bookstores as string[]) || []; 64 | const bombMessage = req.query.bomb; 65 | 66 | // parse user agent 67 | const ua = new UAParser(req.headers['user-agent']); 68 | const userAgent = ua.getResult()?.ua || `Taiwan-Ebook-Search/${process.env.npm_package_version}`; 69 | 70 | if (bombMessage) { 71 | return res.status(503).send({ 72 | message: bombMessage, 73 | }); 74 | } 75 | 76 | // 關鍵字是必須的 77 | if (!keywords) { 78 | return res.status(400).send({ 79 | message: 'q is required.', 80 | }); 81 | } 82 | 83 | const bookstores = await getBookstores(); 84 | const validBookstores = bookstores.filter((store) => store.isOnline); 85 | 86 | let selectedBookstores = validBookstores.filter((store) => bookstoresRequest.includes(store.id)); 87 | 88 | if (!selectedBookstores.length) { 89 | selectedBookstores = validBookstores; 90 | } 91 | 92 | // 等全部查詢完成 93 | Promise.all( 94 | selectedBookstores 95 | .filter((bookstore: Bookstore) => !!bookstoreModel[bookstore.id]) 96 | .map((bookstore: Bookstore) => bookstoreModel[bookstore.id](bookstore, keywords, userAgent)), 97 | ) 98 | .then(async (searchResults) => { 99 | // 整理結果並紀錄 100 | let results: any[] = []; 101 | let totalQuantity: number = 0; 102 | 103 | for (const searchResult of searchResults) { 104 | totalQuantity += searchResult?.quantity ?? 0; 105 | results.push({ ...searchResult }); 106 | } 107 | 108 | // calc process time 109 | const hrEnd = process.hrtime(hrStart); 110 | const processTime = getProcessTime(hrEnd); 111 | 112 | const insertData: AnyObject = { 113 | keywords, 114 | searchDateTime: format(searchDateTime, `yyyy/LL/dd HH:mm:ss`), 115 | processTime, 116 | userAgent, 117 | totalQuantity, 118 | results, 119 | apiVersion: process.env.npm_package_version, 120 | }; 121 | 122 | if (!firestore) { 123 | throw Error('Firestore is invalid.'); 124 | } 125 | 126 | const search = await insertSearch(insertData); 127 | const telegramMessage: string = _telegramPrettier(search); 128 | 129 | sendMessage(telegramMessage); 130 | 131 | return res.status(201).send(search); 132 | }) 133 | .catch((error) => { 134 | console.time('Error time: '); 135 | console.error(error); 136 | 137 | sendMessage(JSON.stringify(error)); 138 | 139 | return res.status(503).send({ 140 | message: 'Something is wrong...', 141 | }); 142 | }); 143 | }); 144 | 145 | searchesRouter.get('/:id', async (req, res, next) => { 146 | const searchId: string = req.params.id; 147 | getSearch(searchId) 148 | .then((search) => { 149 | if (search) { 150 | return res.status(200).send(search); 151 | } else { 152 | return res.status(404).send({ 153 | message: 'Search not found.', 154 | }); 155 | } 156 | }) 157 | .catch((error) => { 158 | console.time('Error time: '); 159 | console.error(error); 160 | 161 | return res.status(503).send({ 162 | message: 'Something is wrong...', 163 | }); 164 | }); 165 | }); 166 | 167 | export { searchesRouter }; 168 | -------------------------------------------------------------------------------- /src/stores/playStore.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const rootURL = `https://play.google.com`; 39 | const base = `${rootURL}/store/search?q=${keywords}&c=books&authuser=0&gl=tw&hl=zh-tw`; 40 | 41 | const options = { 42 | method: 'GET', 43 | compress: true, 44 | signal: timeoutSignal(10000), 45 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 46 | headers: { 47 | 'User-Agent': `${userAgent}`, 48 | }, 49 | }; 50 | 51 | return fetch(base, options) 52 | .then((response) => { 53 | if (!response.ok) { 54 | throw response.statusText; 55 | } 56 | 57 | return response.text(); 58 | }) 59 | .then((body) => { 60 | return _getBooks(cheerio.load(body), rootURL, base); 61 | }) 62 | .then((books) => { 63 | // calc process time 64 | const hrEnd = process.hrtime(hrStart); 65 | const processTime = getProcessTime(hrEnd); 66 | const result: Result = { 67 | bookstore, 68 | isOkay: true, 69 | status: 'Crawler success.', 70 | processTime, 71 | books, 72 | quantity: books.length, 73 | }; 74 | 75 | return result; 76 | }) 77 | .catch((error) => { 78 | // calc process time 79 | const hrEnd = process.hrtime(hrStart); 80 | const processTime = getProcessTime(hrEnd); 81 | 82 | console.log(error.message); 83 | 84 | const result: Result = { 85 | bookstore, 86 | isOkay: false, 87 | status: 'Crawler failed.', 88 | processTime, 89 | books: [], 90 | quantity: 0, 91 | error: error.message, 92 | }; 93 | 94 | return result; 95 | }); 96 | }; 97 | 98 | // parse 找書 99 | function _getBooks($: cheerio.CheerioAPI, rootURL: string, base: string) { 100 | const $list = $('body > c-wiz') 101 | .eq(1) 102 | .children('div') 103 | .children('div') 104 | .children('c-wiz') 105 | .children('c-wiz') 106 | .find('div[role=listitem]'); 107 | 108 | let books: Book[] = []; 109 | 110 | // 找不到就是沒這書 111 | if (!$list.length) { 112 | console.log('Not found in Play Store!'); 113 | 114 | return books; 115 | } 116 | 117 | $list.each((i, elem) => { 118 | const $bookElem = $(elem); 119 | 120 | let linkUrl = new URL( 121 | $bookElem.children('div').eq(0).children('div').eq(0).children('a').prop('href') ?? '', 122 | base, 123 | ); 124 | 125 | const id = linkUrl.searchParams.get('id') as string; 126 | 127 | let price = Number(0); 128 | const $priceRootElem = $bookElem 129 | .children('div') 130 | .eq(0) 131 | .children('div') 132 | .eq(0) 133 | .children('a') 134 | .children('div') 135 | .eq(1) 136 | .children('div') 137 | .eq(1) 138 | .children('div') 139 | .last() 140 | .children() 141 | .children('span'); 142 | if ($priceRootElem.text() != '免費') { 143 | const priceElems = $priceRootElem.find('span[aria-hidden="true"] > span'); 144 | if (!priceElems.length) { 145 | price = Number($priceRootElem.text().replace(/\$|,/g, '')); 146 | } else { 147 | price = priceElems 148 | .map((index, priceElem) => Number($(priceElem).text().replace(/\$|,/g, ''))) 149 | .get() 150 | .sort((a: number, b: number) => a - b)[0]; 151 | } 152 | } 153 | 154 | // 設定書籍網址的語言與國家 155 | linkUrl.searchParams.set('gl', 'tw'); 156 | linkUrl.searchParams.set('hl', 'zh-tw'); 157 | 158 | let book: Book = { 159 | id, 160 | thumbnail: `${rootURL}/books/publisher/content/images/frontcover/${id}?fife=w256-h256`, 161 | title: $bookElem 162 | .children('div') 163 | .eq(0) 164 | .children('div') 165 | .eq(0) 166 | .children('a') 167 | .children('div') 168 | .eq(1) 169 | .children('div') 170 | .eq(0) 171 | .prop('title'), 172 | link: linkUrl.href, 173 | priceCurrency: 'TWD', 174 | price, 175 | }; 176 | 177 | books[i] = book; 178 | }); 179 | 180 | return books; 181 | } 182 | -------------------------------------------------------------------------------- /src/stores/readmoo.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://readmoo.com/search/keyword?pi=0&st=true&q=${keywords}&kw=${keywords}`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body)); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI) { 99 | const $list = $('#main_items li'); 100 | 101 | let books: Book[] = []; 102 | 103 | // 找不到就是沒這書 104 | if ($list.length === 0) { 105 | // console.log('Not found in readmoo!'); 106 | 107 | return books; 108 | } 109 | 110 | $list.each((i, elem) => { 111 | const id = $(elem) 112 | .children('.caption') 113 | .children('.price-info') 114 | .children('meta[itemprop=identifier]') 115 | .prop('content'); 116 | 117 | const apCode = Buffer.from(`https://readmoo.com/book/${id}`) 118 | .toString('base64') 119 | .replace(/\=*/g, ''); 120 | const apId = process.env.READMOO_AP_ID; 121 | const link = apId 122 | ? `https://readmoo.com/ap/target/${apId}?url=${apCode}` 123 | : ($(elem).children('.caption').children('h4').children('a').prop('href') ?? 124 | 'https://readmoo.com'); 125 | 126 | const authors = [ 127 | $(elem) 128 | .children('.caption') 129 | .children('.contributor-info') 130 | .children('a') 131 | .text() 132 | .replace(/\s+/g, ''), 133 | ]; 134 | 135 | const publisher = $(elem) 136 | .children('.caption') 137 | .children('.publisher-info') 138 | .children('a') 139 | .text() 140 | .replace(/\s+/g, ''); 141 | 142 | const publishDate = $(elem) 143 | .children('.caption') 144 | .children('.publish-date') 145 | .children('span') 146 | .text() 147 | .replace(/出版日期:|\s/g, ''); 148 | 149 | books[i] = { 150 | id, 151 | thumbnail: 152 | ($(elem) 153 | .children('.thumbnail') 154 | .children('a') 155 | .children('img') 156 | .data('lazy-original') as string) || '', 157 | title: $(elem).children('.caption').children('h4').children('a').text(), 158 | link, 159 | priceCurrency: $(elem) 160 | .children('.caption') 161 | .children('.price-info') 162 | .children('meta[itemprop=priceCurrency]') 163 | .prop('content'), 164 | price: 165 | parseFloat( 166 | $(elem) 167 | .children('.caption') 168 | .children('.price-info') 169 | .children('.our-price') 170 | .children('strong') 171 | .text() 172 | .replace(/NT\$|,/g, ''), 173 | ) || -1, 174 | about: $(elem).children('.caption').children('.description').text(), 175 | }; 176 | 177 | if (authors.length > 0) { 178 | books[i].authors = authors; 179 | } 180 | 181 | if (publisher !== '') { 182 | books[i].publisher = publisher; 183 | } 184 | 185 | if (publishDate !== '') { 186 | books[i].publishDate = publishDate; 187 | } 188 | }); 189 | 190 | return books; 191 | } 192 | -------------------------------------------------------------------------------- /src/stores/bookWalker.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import fetch from 'node-fetch'; 3 | import timeoutSignal from 'timeout-signal'; 4 | 5 | import pkg from 'https-proxy-agent'; 6 | const { HttpsProxyAgent } = pkg; 7 | 8 | import { Book } from '../interfaces/book.js'; 9 | import { Result } from '../interfaces/result.js'; 10 | import { getProcessTime } from '../interfaces/general.js'; 11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js'; 12 | 13 | export default ( 14 | { proxyUrl, ...bookstore }: FirestoreBookstore, 15 | keywords = '', 16 | userAgent: string, 17 | ) => { 18 | // start calc process time 19 | const hrStart = process.hrtime(); 20 | 21 | if (!bookstore.isOnline) { 22 | const hrEnd = process.hrtime(hrStart); 23 | const processTime = getProcessTime(hrEnd); 24 | const result: Result = { 25 | bookstore, 26 | isOkay: false, 27 | status: 'Bookstore is offline', 28 | processTime, 29 | books: [], 30 | quantity: 0, 31 | }; 32 | 33 | return result; 34 | } 35 | 36 | // URL encode 37 | keywords = encodeURIComponent(keywords); 38 | const base = `https://www.bookwalker.com.tw/search?w=${keywords}&m=0&detail=1`; 39 | 40 | const options = { 41 | method: 'GET', 42 | compress: true, 43 | signal: timeoutSignal(10000), 44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined, 45 | headers: { 46 | 'User-Agent': `${userAgent}`, 47 | }, 48 | }; 49 | 50 | return fetch(base, options) 51 | .then((response) => { 52 | if (!response.ok) { 53 | throw response.statusText; 54 | } 55 | 56 | return response.text(); 57 | }) 58 | .then((body) => { 59 | return _getBooks(cheerio.load(body), base); 60 | }) 61 | .then((books) => { 62 | // calc process time 63 | const hrEnd = process.hrtime(hrStart); 64 | const processTime = getProcessTime(hrEnd); 65 | const result: Result = { 66 | bookstore, 67 | isOkay: true, 68 | status: 'Crawler success.', 69 | processTime, 70 | books, 71 | quantity: books.length, 72 | }; 73 | 74 | return result; 75 | }) 76 | .catch((error) => { 77 | // calc process time 78 | const hrEnd = process.hrtime(hrStart); 79 | const processTime = getProcessTime(hrEnd); 80 | 81 | console.log(error.message); 82 | 83 | const result: Result = { 84 | bookstore, 85 | isOkay: false, 86 | status: 'Crawler failed.', 87 | processTime, 88 | books: [], 89 | quantity: 0, 90 | error: error.message, 91 | }; 92 | 93 | return result; 94 | }); 95 | }; 96 | 97 | // parse 找書 98 | function _getBooks($: cheerio.CheerioAPI, base: string) { 99 | let books: Book[] = []; 100 | 101 | const $categories = $('.listbox'); 102 | 103 | $categories.each((i, elem) => { 104 | if (!$(elem).children('.listbox_title').length) { 105 | return; 106 | } 107 | 108 | $(elem) 109 | .children('.bookdesc') 110 | .each((i, elem) => { 111 | let title = $(elem).children('.bookdata').children('h2').children('a').text(); 112 | let subTitle = $(elem).children('.bookdata').children('h3').children('a').text(); 113 | if (subTitle) { 114 | title += ` / ${subTitle}`; 115 | } 116 | 117 | const authors: string[] = []; 118 | const translators: string[] = []; 119 | const painters: string[] = []; 120 | 121 | $(elem) 122 | .children('.bookdata') 123 | .children('.bw_item') 124 | .children('.writerinfo') 125 | .children('.writer_data') 126 | .children('li') 127 | .map((i, el) => $(el).text()) 128 | .toArray() 129 | .map((str) => str.split(' : ')) 130 | .forEach(([authorTitle, authorName]) => { 131 | switch (authorTitle) { 132 | case '作者': 133 | authors.push(authorName); 134 | break; 135 | case '譯者': 136 | translators.push(authorName); 137 | break; 138 | case '插畫': 139 | painters.push(authorName); 140 | break; 141 | default: 142 | authors.push(`${authorName} (${authorTitle})`); 143 | break; 144 | } 145 | }); 146 | 147 | books[i] = { 148 | id: ( 149 | $(elem).children('.bookdata').children('h2').children('a').prop('href') ?? '' 150 | ).replace('/product/', ''), 151 | thumbnail: $(elem) 152 | .children('.bookcover') 153 | .children('.bookitem') 154 | .children('a') 155 | .children('img') 156 | .data('src') as string, 157 | title: title, 158 | link: new URL( 159 | $(elem).children('.bookdata').children('h2').children('a').prop('href') ?? '', 160 | base, 161 | ).toString(), 162 | priceCurrency: 'TWD', 163 | price: 164 | parseFloat( 165 | $(elem) 166 | .children('.bookdata') 167 | .children('.bw_item') 168 | .children('.writerinfo') 169 | .children('h4') 170 | .children('span') 171 | .text() 172 | .replace(/\D/g, ''), 173 | ) || -1, 174 | about: $(elem) 175 | .children('.bookdata') 176 | .children('.topic_content') 177 | .children('.bookinfo') 178 | .children('h4') 179 | .text() 180 | .concat( 181 | $(elem) 182 | .children('.bookdata') 183 | .children('.topic_content') 184 | .children('.bookinfo') 185 | .children('h5') 186 | .children('span') 187 | .text(), 188 | ), 189 | // publisher:, 190 | }; 191 | 192 | if (authors.length > 0) { 193 | books[i].authors = authors; 194 | } 195 | 196 | if (translators.length > 0) { 197 | books[i].translators = translators; 198 | } 199 | 200 | if (painters.length > 0) { 201 | books[i].painters = painters; 202 | } 203 | }); 204 | }); 205 | 206 | return books; 207 | } 208 | --------------------------------------------------------------------------------