├── .idea
├── .gitignore
├── codeStyles
│ ├── codeStyleConfig.xml
│ └── Project.xml
├── misc.xml
├── vcs.xml
├── modules.xml
├── prettier.xml
├── dictionaries
│ └── project.xml
└── Ebook-API.iml
├── .env.example
├── .prettierrc
├── src
├── interfaces
│ ├── firestoreBookstore.ts
│ ├── bookstore.ts
│ ├── general.ts
│ ├── result.ts
│ └── book.ts
├── stores
│ ├── index.ts
│ ├── likerLand.ts
│ ├── kindle.ts
│ ├── hyread.ts
│ ├── pubu.ts
│ ├── booksCompany.ts
│ ├── kobo.ts
│ ├── taaze.ts
│ ├── playStore.ts
│ ├── readmoo.ts
│ └── bookWalker.ts
├── bot.ts
├── routers
│ ├── bookstores.ts
│ └── searches.ts
├── index.ts
└── firestore.ts
├── .editorconfig
├── .vscode
└── settings.json
├── tsconfig.json
├── .github
└── workflows
│ └── code-check.yml
├── README.md
├── package.json
└── .gitignore
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | PORT=65432
2 | TOKEN=
3 | GROUPID=
4 | DBURL=
5 | FIREBASE_SERVICE_ACCOUNT_BASE64=
6 | READMOO_AP_ID=
7 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "singleQuote": true,
3 | "trailingComma": "all",
4 | "printWidth": 100,
5 | "endOfLine": "lf"
6 | }
7 |
--------------------------------------------------------------------------------
/src/interfaces/firestoreBookstore.ts:
--------------------------------------------------------------------------------
1 | import { Bookstore } from './bookstore.js';
2 | export interface FirestoreBookstore extends Bookstore {
3 | proxyUrl: string;
4 | }
5 |
--------------------------------------------------------------------------------
/src/interfaces/bookstore.ts:
--------------------------------------------------------------------------------
1 | export interface Bookstore {
2 | id: string;
3 | displayName: string;
4 | website: string;
5 | isOnline: boolean;
6 | status: string;
7 | }
8 |
--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/src/interfaces/general.ts:
--------------------------------------------------------------------------------
1 | export interface AnyObject {
2 | [key: string]: T;
3 | }
4 | const NS_PER_SEC = 1e9;
5 | const MS_PER_NS = 1e-6;
6 |
7 | export const getProcessTime = ([seconds, nanoseconds]: [number, number]) => {
8 | return (seconds * NS_PER_SEC + nanoseconds) * MS_PER_NS;
9 | };
10 |
--------------------------------------------------------------------------------
/src/interfaces/result.ts:
--------------------------------------------------------------------------------
1 | import { Book } from './book.js';
2 | import { Bookstore } from './bookstore.js';
3 |
4 | export interface Result {
5 | bookstore: Bookstore;
6 | isOkay: boolean;
7 | status: string;
8 | processTime: number;
9 | quantity: number;
10 | books: Book[] | [];
11 | error?: string;
12 | }
13 |
--------------------------------------------------------------------------------
/.idea/prettier.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | [*]
2 | charset = utf-8
3 | end_of_line = lf
4 | insert_final_newline = true
5 | trim_trailing_whitespace = true
6 | max_line_length = 100
7 |
8 | [*.md]
9 | indent_style = space
10 | indent_size = 2
11 | trim_trailing_whitespace = false
12 |
13 | [*.{js,jsx,ts,tsx}]
14 | indent_style = space
15 | indent_size = 2
16 |
17 | [*.json]
18 | indent_style = space
19 | indent_size = 4
20 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cSpell.words": [
3 | "DBURL",
4 | "Firestore",
5 | "GROUPID",
6 | "bookwalker",
7 | "hyread",
8 | "pubu",
9 | "readmoo",
10 | "taaze"
11 | ],
12 | "editor.formatOnSave": true,
13 | "editor.defaultFormatter": "esbenp.prettier-vscode",
14 | "typescript.tsdk": "node_modules/typescript/lib"
15 | }
16 |
--------------------------------------------------------------------------------
/src/interfaces/book.ts:
--------------------------------------------------------------------------------
1 | export interface Book {
2 | id?: string;
3 | thumbnail?: string;
4 | title: string;
5 | link: string;
6 | priceCurrency?: string;
7 | price?: number;
8 | about?: string;
9 | publisher?: string;
10 | publishDate?: string;
11 | authors?: string[];
12 | nonDrmPrice?: number;
13 | translator?: string;
14 | translators?: string[];
15 | painters?: string[];
16 | }
17 |
--------------------------------------------------------------------------------
/.idea/dictionaries/project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | bookwalker
5 | dburl
6 | groupid
7 | hyread
8 | inited
9 | kobo
10 | likerLand
11 | marky
12 | pubu
13 | readmoo
14 | searchlist
15 | taaze
16 |
17 |
18 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ESNext",
4 | "module": "ES2015",
5 | "outDir": "dist",
6 | "rootDir": "src",
7 | "strict": true,
8 | "noImplicitAny": true,
9 | "moduleResolution": "node",
10 | "resolveJsonModule": true,
11 | "esModuleInterop": true,
12 | "sourceMap": true,
13 | "baseUrl": ".",
14 | "paths": {
15 | "*": ["node_modules/*", "src/types/*"]
16 | }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/stores/index.ts:
--------------------------------------------------------------------------------
1 | export { default as booksCompany } from './booksCompany.js';
2 | export { default as kobo } from './kobo.js';
3 | export { default as taaze } from './taaze.js';
4 | export { default as bookWalker } from './bookWalker.js';
5 | export { default as readmoo } from './readmoo.js';
6 | export { default as playStore } from './playStore.js';
7 | export { default as pubu } from './pubu.js';
8 | export { default as hyread } from './hyread.js';
9 | export { default as kindle } from './kindle.js';
10 | export { default as likerLand } from './likerLand.js';
11 |
--------------------------------------------------------------------------------
/.idea/Ebook-API.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.github/workflows/code-check.yml:
--------------------------------------------------------------------------------
1 | name: Code Check
2 |
3 | on:
4 | pull_request:
5 | paths-ignore:
6 | - '.github/**'
7 | - '.vscode/**'
8 | - '.idea/**'
9 | - '.husky/**'
10 |
11 | jobs:
12 | code-check:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout 🛎️
16 | uses: actions/checkout@v2
17 | with:
18 | persist-credentials: false
19 |
20 | - name: Setup Node
21 | uses: actions/setup-node@v2
22 | with:
23 | node-version: '16'
24 |
25 | - name: Install Dependencies
26 | run: npm ci --prefer-offline --ignore-scripts --progress=false --no-audit --no-fund
27 |
28 | - name: Format Check
29 | run: npm run format-check
30 |
31 | - name: Compile Test
32 | run: npm run build
33 |
--------------------------------------------------------------------------------
/src/bot.ts:
--------------------------------------------------------------------------------
1 | import TelegramBot from 'node-telegram-bot-api';
2 |
3 | let bot: TelegramBot;
4 | let groupId: string;
5 |
6 | export const botInit = (token: string, group: string) => {
7 | return new Promise((resolve, reject) => {
8 | if (bot) {
9 | return reject('bot is already inited.');
10 | }
11 |
12 | bot = new TelegramBot(token, {
13 | polling: false,
14 | request: {
15 | url: 'https://api.telegram.org',
16 | agentOptions: {
17 | keepAlive: true,
18 | family: 4,
19 | },
20 | },
21 | });
22 | groupId = group;
23 |
24 | resolve();
25 | }).catch((error) => {
26 | if (error) {
27 | console.error(error);
28 | }
29 | });
30 | };
31 |
32 | export const sendMessage = (message: string) => {
33 | return bot
34 | .sendMessage(groupId, message, { parse_mode: 'Markdown' })
35 | .catch((error) => console.error(error));
36 | };
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TaiwanEbookSearch API
2 |
3 | 
4 |
5 | ## Prerequisite
6 |
7 | 1. Install [Git latest version](https://git-scm.com) and [Node.js latest LTS version](https://nodejs.org)
8 | 2. Get codebase
9 | ```bash
10 | $ git clone git@github.com:Taiwan-Ebook-Lover/TaiwanEbookSearch-API.git
11 | ```
12 | 3. Install dependencies
13 | ```bash
14 | $ cd TaiwanEbookSearch-API
15 | $ npm install
16 | ```
17 |
18 | ## Environment variables
19 |
20 | Copy `.env.example` to `.env` to customize those environment variables.
21 |
22 | ### The `.env` structure
23 |
24 | - `PORT` => local serve port number.
25 | - `TOKEN` => Telegram bot token.
26 | - `GROUPID` => Telegram group id.
27 | - `DBURL` => Firebase project URL.
28 | - `FIREBASE_SERVICE_ACCOUNT_BASE64` => BASE64 string of firebase service account file. (Tool script: `$ npm run convert-firebase-config -- -in serviceAccount.json`)
29 | - `READMOO_AP_ID` => Readmoo affiliate program id.
30 |
31 | ## Build & Serve
32 |
33 | proxy to local server:
34 |
35 | ```bash
36 | npm run build
37 | npm start
38 | ```
39 |
--------------------------------------------------------------------------------
/src/routers/bookstores.ts:
--------------------------------------------------------------------------------
1 | import { Router } from 'express';
2 | import { getBookstores } from '../firestore.js';
3 |
4 | const bookstoresRouter = Router();
5 |
6 | bookstoresRouter.get('/', (req, res, next) => {
7 | getBookstores()
8 | .then((bookstores) => {
9 | return res.status(200).send(bookstores.map(({ proxyUrl, ...bookstore }) => bookstore));
10 | })
11 | .catch((error) => {
12 | console.time('Error time: ');
13 | console.error(error);
14 |
15 | return res.status(503).send({
16 | message: 'Something is wrong...',
17 | });
18 | });
19 | });
20 |
21 | bookstoresRouter.get('/:id', (req, res, next) => {
22 | const bookstoreId: string = req.params.id;
23 | getBookstores(bookstoreId)
24 | .then((bookstores) => {
25 | if (bookstores.length == 0) {
26 | return res.status(400).send({
27 | message: `Bookstore ${bookstoreId} is invalid.`,
28 | });
29 | }
30 | const { proxyUrl, ...bookstore } = bookstores[0];
31 | return res.status(200).send(bookstore);
32 | })
33 | .catch((error) => {
34 | console.time('Error time: ');
35 | console.error(error);
36 |
37 | return res.status(503).send({
38 | message: 'Something is wrong...',
39 | });
40 | });
41 | });
42 |
43 | export { bookstoresRouter };
44 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import express, { RequestHandler } from 'express';
2 | import cors from 'cors';
3 | import compression from 'compression';
4 |
5 | import 'dotenv/config';
6 |
7 | import { botInit } from './bot.js';
8 | import { connect } from './firestore.js';
9 | import { searchesRouter } from './routers/searches.js';
10 | import { bookstoresRouter } from './routers/bookstores.js';
11 | import { ServiceAccount } from 'firebase-admin';
12 |
13 | const app: express.Application = express();
14 |
15 | const init = () => {
16 | // Telegram bot is coming
17 | botInit(process.env.TOKEN as string, process.env.GROUPID as string);
18 |
19 | // Database is coming too
20 | const firebaseUrl: string = process.env.DBURL ?? '';
21 | const serviceAccount: ServiceAccount = JSON.parse(
22 | Buffer.from(process.env.FIREBASE_SERVICE_ACCOUNT_BASE64 as string, 'base64').toString(),
23 | );
24 |
25 | connect(firebaseUrl, serviceAccount);
26 |
27 | /**
28 | * Build db, Server
29 | */
30 |
31 | app.listen(process.env.PORT, () => {
32 | console.log(`listening on http://localhost:${process.env.PORT}`);
33 | });
34 | };
35 |
36 | // compress all responses
37 | app.use(compression());
38 |
39 | // for parsing application/json
40 | app.use(express.json() as RequestHandler);
41 |
42 | // for parsing application/x-www-form-urlencoded
43 | app.use(express.urlencoded({ extended: true }) as RequestHandler);
44 |
45 | // for cors
46 | app.use(
47 | cors({
48 | methods: ['GET', 'POST', 'PATCH', 'OPTION', 'DELETE'],
49 | credentials: true,
50 | origin: true,
51 | }),
52 | );
53 |
54 | /**
55 | * Route
56 | */
57 |
58 | app.use('/searches', searchesRouter);
59 |
60 | app.use('/bookstores', bookstoresRouter);
61 |
62 | /**
63 | * Error Handler
64 | */
65 |
66 | app.get('*', (req, res) => {
67 | return res.status(405).send({
68 | message: 'Method Not Allowed!',
69 | });
70 | });
71 |
72 | init();
73 |
--------------------------------------------------------------------------------
/.idea/codeStyles/Project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "taiwan-ebook-search-api",
3 | "version": "1.2.0",
4 | "description": "API",
5 | "main": "./dist/index.ts",
6 | "type": "module",
7 | "scripts": {
8 | "convert-firebase-config": "openssl base64 -A",
9 | "start": "node ./dist/index",
10 | "build": "tsc",
11 | "tsc": "tsc",
12 | "dev": "tsc -w & nodemon ./dist/index",
13 | "format": "prettier --write \"src/**/*.{js,ts,md,json,yml,yaml}\"",
14 | "format-check": "prettier --check \"src/**/*.{js,ts,md,json,yml,yaml}\""
15 | },
16 | "repository": {
17 | "type": "git",
18 | "url": "git+https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API.git"
19 | },
20 | "author": "Yuer Lee ",
21 | "contributors": [
22 | "Safefly Tsai (https://safefly.tw)"
23 | ],
24 | "license": "MIT",
25 | "dependencies": {
26 | "cheerio": "1.1.2",
27 | "compression": "^1.8.1",
28 | "cors": "^2.8.5",
29 | "date-fns": "^4.1.0",
30 | "dotenv": "^17.2.3",
31 | "express": "^4.21.2",
32 | "firebase-admin": "^13.5.0",
33 | "https-proxy-agent": "^7.0.6",
34 | "node-fetch": "^3.3.2",
35 | "node-telegram-bot-api": "~0.66.0",
36 | "timeout-signal": "^2.0.0",
37 | "ua-parser-js": "^1.0.41"
38 | },
39 | "bugs": {
40 | "url": "https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API/issues"
41 | },
42 | "homepage": "https://github.com/Taiwan-Ebook-Lover/TaiwanEbookSearch-API",
43 | "devDependencies": {
44 | "@types/compression": "^1.8.1",
45 | "@types/cors": "^2.8.19",
46 | "@types/express": "^4.17.25",
47 | "@types/morgan": "^1.9.10",
48 | "@types/node": "^24.9.2",
49 | "@types/node-fetch": "^3.0.3",
50 | "@types/node-telegram-bot-api": "^0.64.12",
51 | "@types/ua-parser-js": "~0.7.39",
52 | "nodemon": "^3.1.10",
53 | "prettier": "3.3.3",
54 | "typescript": "~5.9.3"
55 | },
56 | "lint-staged": {
57 | "*.{js,ts,md,json,yml,yaml}": "prettier --write"
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/firestore.ts:
--------------------------------------------------------------------------------
1 | import admin, { ServiceAccount } from 'firebase-admin';
2 | import { FirestoreBookstore } from './interfaces/firestoreBookstore.js';
3 | import { AnyObject } from './interfaces/general.js';
4 |
5 | export let firestore: FirebaseFirestore.Firestore;
6 |
7 | export const connect = (url: string, serviceAccount: ServiceAccount): Promise => {
8 | return new Promise((resolve, reject) => {
9 | // check firestore connected status
10 | if (firestore) {
11 | reject('DB is already connected.');
12 | } else {
13 | admin.initializeApp({
14 | credential: admin.credential.cert(serviceAccount),
15 | databaseURL: url,
16 | });
17 | resolve(admin.firestore());
18 | }
19 | })
20 | .then((connection: FirebaseFirestore.Firestore) => {
21 | // update firestore
22 | firestore = connection;
23 | })
24 | .catch((error) => {
25 | console.time('Error time: ');
26 | console.error(error);
27 | });
28 | };
29 |
30 | export const getBookstores = (bookstoreId?: string): Promise => {
31 | const bookstores: FirestoreBookstore[] = [];
32 | let bookstoreRef: FirebaseFirestore.Query;
33 |
34 | if (bookstoreId) {
35 | bookstoreRef = firestore.collection('bookstores').where('id', '==', bookstoreId);
36 | } else {
37 | bookstoreRef = firestore.collection('bookstores');
38 | }
39 |
40 | return bookstoreRef
41 | .get()
42 | .then((snapshot: FirebaseFirestore.QuerySnapshot) => {
43 | if (snapshot.empty) {
44 | throw Error('No matching bookstore.');
45 | }
46 | for (const bookstore of snapshot.docs) {
47 | const bookstoreData = bookstore.data() as FirestoreBookstore;
48 | bookstores.push(bookstoreData);
49 | }
50 | return bookstores;
51 | })
52 | .catch((error) => {
53 | console.time('Error time: ');
54 | console.error(error);
55 | return bookstores;
56 | });
57 | };
58 |
59 | export const insertSearch = (data: AnyObject): Promise => {
60 | const formattedData = JSON.parse(
61 | JSON.stringify(data, (key, value) => (value === undefined ? null : value)),
62 | );
63 | return firestore
64 | .collection('searches')
65 | .add(formattedData)
66 | .then(({ id }) => ({ ...formattedData, id }))
67 | .catch((error) => {
68 | console.time('Error time: ');
69 | console.error(error);
70 | return '';
71 | });
72 | };
73 |
74 | export const getSearch = (id: string): Promise> => {
75 | return firestore
76 | .collection('searches')
77 | .doc(id)
78 | .get()
79 | .then((doc) => {
80 | if (!doc.exists) {
81 | throw Error('No matching bookstore.');
82 | }
83 | return { ...doc.data(), id };
84 | })
85 | .catch((error) => {
86 | console.time('Error time: ');
87 | console.error(error);
88 | return {};
89 | });
90 | };
91 |
--------------------------------------------------------------------------------
/src/stores/likerLand.ts:
--------------------------------------------------------------------------------
1 | import fetch from 'node-fetch';
2 | import timeoutSignal from 'timeout-signal';
3 |
4 | import pkg from 'https-proxy-agent';
5 | const { HttpsProxyAgent } = pkg;
6 |
7 | import { Book } from '../interfaces/book.js';
8 | import { Result } from '../interfaces/result.js';
9 | import { getProcessTime } from '../interfaces/general.js';
10 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
11 |
12 | export default (
13 | { proxyUrl, ...bookstore }: FirestoreBookstore,
14 | keywords = '',
15 | userAgent: string,
16 | ) => {
17 | // start calc process time
18 | const hrStart = process.hrtime();
19 |
20 | if (!bookstore.isOnline) {
21 | const hrEnd = process.hrtime(hrStart);
22 | const processTime = getProcessTime(hrEnd);
23 | const result: Result = {
24 | bookstore,
25 | isOkay: false,
26 | status: 'Bookstore is offline',
27 | processTime,
28 | books: [],
29 | quantity: 0,
30 | };
31 |
32 | return result;
33 | }
34 |
35 | // URL encode
36 | keywords = encodeURIComponent(keywords);
37 | const base = `https://api.like.co/likernft/book/store/search?q=${keywords}`;
38 |
39 | const options = {
40 | method: 'GET',
41 | compress: true,
42 | signal: timeoutSignal(10000),
43 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
44 | headers: {
45 | 'User-Agent': `${userAgent}`,
46 | },
47 | };
48 |
49 | return fetch(base, options)
50 | .then((response) => {
51 | if (!response.ok) {
52 | throw response.statusText;
53 | }
54 |
55 | return response.json();
56 | })
57 | .then((data) => {
58 | // calc process time
59 | const hrEnd = process.hrtime(hrStart);
60 | const processTime = getProcessTime(hrEnd);
61 | const books: Book[] = (data as any).list.map((item: any) => {
62 | const { iscnId, imageUrl, name, url, minPrice, description, ownerName } = item;
63 | return {
64 | id: iscnId,
65 | thumbnail: imageUrl,
66 | title: name,
67 | link: url,
68 | priceCurrency: 'USD',
69 | price: minPrice,
70 | about: description,
71 | authors: [ownerName],
72 | };
73 | });
74 | const result: Result = {
75 | bookstore,
76 | isOkay: true,
77 | status: 'Crawler success.',
78 | processTime,
79 | books,
80 | quantity: books.length,
81 | };
82 |
83 | return result;
84 | })
85 | .catch((error) => {
86 | // calc process time
87 | const hrEnd = process.hrtime(hrStart);
88 | const processTime = getProcessTime(hrEnd);
89 |
90 | console.log(error.message);
91 |
92 | const result: Result = {
93 | bookstore,
94 | isOkay: false,
95 | status: 'Crawler failed.',
96 | processTime,
97 | books: [],
98 | quantity: 0,
99 | error: error.message,
100 | };
101 |
102 | return result;
103 | });
104 | };
105 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # firebase service account json
2 | serviceAccount.json
3 |
4 | # Paw API file
5 | *.paw
6 |
7 | # Logs
8 | logs
9 | *.log
10 | npm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 |
14 | # Runtime data
15 | pids
16 | *.pid
17 | *.seed
18 | *.pid.lock
19 |
20 | # Directory for instrumented libs generated by jscoverage/JSCover
21 | lib-cov
22 |
23 | # Coverage directory used by tools like istanbul
24 | coverage
25 |
26 | # nyc test coverage
27 | .nyc_output
28 |
29 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
30 | .grunt
31 |
32 | # Bower dependency directory (https://bower.io/)
33 | bower_components
34 |
35 | # node-waf configuration
36 | .lock-wscript
37 |
38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
39 | build/Release
40 | dist/
41 |
42 | # Dependency directories
43 | node_modules/
44 | jspm_packages/
45 |
46 | # Typescript v1 declaration files
47 | typings/
48 |
49 | # Optional npm cache directory
50 | .npm
51 |
52 | # Optional eslint cache
53 | .eslintcache
54 |
55 | # Optional REPL history
56 | .node_repl_history
57 |
58 | # Output of 'npm pack'
59 | *.tgz
60 |
61 | # Yarn Integrity file
62 | .yarn-integrity
63 |
64 | # dotenv environment variables file
65 | .env
66 |
67 | # General
68 | .DS_Store
69 | .AppleDouble
70 | .LSOverride
71 |
72 | # Icon must end with two \r
73 | Icon
74 |
75 | # Thumbnails
76 | ._*
77 |
78 | # Files that might appear in the root of a volume
79 | .DocumentRevisions-V100
80 | .fseventsd
81 | .Spotlight-V100
82 | .TemporaryItems
83 | .Trashes
84 | .VolumeIcon.icns
85 | .com.apple.timemachine.donotpresent
86 |
87 | # Directories potentially created on remote AFP share
88 | .AppleDB
89 | .AppleDesktop
90 | Network Trash Folder
91 | Temporary Items
92 | .apdisk
93 |
94 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
95 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
96 |
97 | # User-specific stuff
98 | .idea/**/workspace.xml
99 | .idea/**/tasks.xml
100 | .idea/**/usage.statistics.xml
101 | .idea/**/shelf
102 | .idea/dictionaries/**
103 | !.idea/dictionaries/project.xml
104 |
105 | # Generated files
106 | .idea/**/contentModel.xml
107 |
108 | # Sensitive or high-churn files
109 | .idea/**/dataSources/
110 | .idea/**/dataSources.ids
111 | .idea/**/dataSources.local.xml
112 | .idea/**/sqlDataSources.xml
113 | .idea/**/dynamic.xml
114 | .idea/**/uiDesigner.xml
115 | .idea/**/dbnavigator.xml
116 |
117 | # Gradle
118 | .idea/**/gradle.xml
119 | .idea/**/libraries
120 |
121 | # Gradle and Maven with auto-import
122 | # When using Gradle or Maven with auto-import, you should exclude module files,
123 | # since they will be recreated, and may cause churn. Uncomment if using
124 | # auto-import.
125 | # .idea/artifacts
126 | # .idea/compiler.xml
127 | # .idea/jarRepositories.xml
128 | # .idea/modules.xml
129 | # .idea/*.iml
130 | # .idea/modules
131 | # *.iml
132 | # *.ipr
133 |
134 | # CMake
135 | cmake-build-*/
136 |
137 | # Mongo Explorer plugin
138 | .idea/**/mongoSettings.xml
139 |
140 | # File-based project format
141 | *.iws
142 |
143 | # IntelliJ
144 | out/
145 |
146 | # mpeltonen/sbt-idea plugin
147 | .idea_modules/
148 |
149 | # JIRA plugin
150 | atlassian-ide-plugin.xml
151 |
152 | # Cursive Clojure plugin
153 | .idea/replstate.xml
154 |
155 | # Sonarlint plugin
156 | .idea/sonarlint/
157 |
158 | # Crashlytics plugin (for Android Studio and IntelliJ)
159 | com_crashlytics_export_strings.xml
160 | crashlytics.properties
161 | crashlytics-build.properties
162 | fabric.properties
163 |
164 | # Editor-based Rest Client
165 | .idea/httpRequests
166 |
167 | # Android studio 3.1+ serialized cache file
168 | .idea/caches/build_file_checksums.ser
169 |
170 |
--------------------------------------------------------------------------------
/src/stores/kindle.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://www.amazon.com/s?k=${keywords}&i=digital-text`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body));
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI) {
99 | const noEbookFilter = $('#s-refinements').children('div').first().children().length === 0;
100 |
101 | if (noEbookFilter) {
102 | // Avoid returning results from all categories if no ebook matches.
103 | return [];
104 | }
105 |
106 | const $list = $('.s-main-slot').children('.s-result-item');
107 |
108 | let books: Book[] = [];
109 |
110 | if ($list.length === 0) {
111 | // console.log('Not found in kindle!');
112 |
113 | return books;
114 | }
115 |
116 | $list.each((i, elem) => {
117 | const id = $(elem).attr('data-asin');
118 |
119 | if (!id) {
120 | return;
121 | }
122 |
123 | const $h2 = $(elem).find('h2');
124 | books.push({
125 | id,
126 | title: $h2.text().trim(),
127 | price: parseFloat($(elem).find('.a-price .a-offscreen').eq(0).text().replace('$', '')),
128 | priceCurrency: 'USD',
129 | link: `https://www.amazon.com${$h2.find('a').attr('href')}`,
130 | thumbnail: $(elem).find('img').attr('src'),
131 | });
132 | });
133 |
134 | return books;
135 | }
136 |
--------------------------------------------------------------------------------
/src/stores/hyread.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://ebook.hyread.com.tw/searchList.jsp?search_field=FullText&MZAD=0&search_input=${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | cookie: 'notBot=1',
48 | },
49 | };
50 |
51 | return fetch(base, options)
52 | .then((response) => {
53 | if (!response.ok) {
54 | throw response.statusText;
55 | }
56 |
57 | return response.text();
58 | })
59 | .then((body) => {
60 | return _getBooks(cheerio.load(body), base);
61 | })
62 | .then((books) => {
63 | // calc process time
64 | const hrEnd = process.hrtime(hrStart);
65 | const processTime = getProcessTime(hrEnd);
66 | const result: Result = {
67 | bookstore,
68 | isOkay: true,
69 | status: 'Crawler success.',
70 | processTime,
71 | books,
72 | quantity: books.length,
73 | };
74 |
75 | return result;
76 | })
77 | .catch((error) => {
78 | // calc process time
79 | const hrEnd = process.hrtime(hrStart);
80 | const processTime = getProcessTime(hrEnd);
81 |
82 | console.log(error.message);
83 |
84 | const result: Result = {
85 | bookstore,
86 | isOkay: false,
87 | status: 'Crawler failed.',
88 | processTime,
89 | books: [],
90 | quantity: 0,
91 | error: error.message,
92 | };
93 |
94 | return result;
95 | });
96 | };
97 |
98 | // parse 找書
99 | function _getBooks($: cheerio.CheerioAPI, base: string) {
100 | const $books = $('.book-wrap');
101 |
102 | let books: Book[] = [];
103 |
104 | // 找不到就是沒這書
105 | if (!$books.length) {
106 | // console.log('Not found in hyread!');
107 |
108 | return books;
109 | }
110 |
111 | $books.each((i, elem) => {
112 | const book = {
113 | id: ($(elem).children('.book-title-01').children('a').prop('href') ?? '').replace(
114 | /bookDetail.jsp\?id=/,
115 | '',
116 | ),
117 | thumbnail: $(elem)
118 | .children('.book-cover')
119 | .children('.book-overlay')
120 | .children('.book-link')
121 | .children('.coverBox')
122 | .children('.bookPic')
123 | .prop('src'),
124 | title: $(elem).children('.book-title-01').children('a').text(),
125 | link: new URL(
126 | $(elem).children('.book-title-01').children('a').prop('href') ?? '',
127 | base,
128 | ).toString(),
129 | priceCurrency: 'TWD',
130 | price: parseFloat($(elem).children('.book-money').children('.book-price').text()) || -1,
131 | // about: ,
132 | };
133 |
134 | books.push(book);
135 | });
136 |
137 | return books;
138 | }
139 |
--------------------------------------------------------------------------------
/src/stores/pubu.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://www.pubu.com.tw/search?sort=0&orderBy=&haveBOOK=true&haveMAGAZINE=false&haveMEDIA=false&q=${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body), base);
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI, base: string) {
99 | const $list = $('#search-list-content').children('div').children('article');
100 |
101 | let books: Book[] = [];
102 |
103 | // 找不到就是沒這書
104 | if (!$list.length) {
105 | // console.log('Not found in Pubu!');
106 | return books;
107 | }
108 |
109 | $list.each((i, elem) => {
110 | // 價格列表包(部分書籍有一般版與下載版兩種價格)
111 | const $priceList = $(elem).find('.info-price').children('div');
112 |
113 | const id = $(elem).find('.cover').children('a').prop('data-ecga');
114 |
115 | let book: Book = {
116 | id,
117 | thumbnail: $(elem).find('.cover').children('a').children('img').prop('data-src'),
118 | title: $(elem).find('.cover').children('a').children('img').prop('title'),
119 | link: id ? new URL(`ebook/${id}`, base).toString() : '',
120 | priceCurrency: 'TWD',
121 | price: parseFloat($priceList.eq(0).children('span').text().replace('NT$', '')) || -1,
122 | authors: [
123 | ...$(elem)
124 | .find('.info-others')
125 | .children('a.author')
126 | .text()
127 | .trim()
128 | .split(/, |,|、|,|//g)
129 | .map((author) => {
130 | // 特別分工的作者,改變格式
131 | const authorSplit = author.split(':');
132 |
133 | if (authorSplit.length > 1) {
134 | author = `${authorSplit[1]}(${authorSplit[0]})`;
135 | }
136 |
137 | return author;
138 | }),
139 | ].flat(Infinity),
140 | publisher: $(elem).find('.info-others').children('a:not(.author)').text().trim(),
141 | };
142 | // 有多種價格,則為下載版
143 | if ($priceList.length > 1) {
144 | book.nonDrmPrice = parseFloat($priceList.eq(1).children('span').text());
145 | }
146 |
147 | books[i] = book;
148 | });
149 |
150 | return books;
151 | }
152 |
--------------------------------------------------------------------------------
/src/stores/booksCompany.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://search.books.com.tw/search/query/cat/6/sort/1/v/0/page/1/spell/1/key/${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body));
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI) {
99 | const $list = $('#itemlist_table > tbody');
100 |
101 | let books: Book[] = [];
102 |
103 | if (!$list.length) {
104 | // console.log('Not found in books company!');
105 |
106 | return books;
107 | }
108 |
109 | $list.each((i, elem) => {
110 | let authors: string[] = [];
111 |
112 | $('a[rel=go_author]', elem).each((i, e) => {
113 | authors = authors.concat($(e).prop('title').split('、'));
114 | });
115 |
116 | const id = ($(elem).attr('id') ?? '').match(/(?<=itemlist_)\S*/)?.[0] ?? '';
117 |
118 | const price = parseFloat(
119 | $('.list-nav', elem)
120 | .children('li')
121 | .children('strong')
122 | .last()
123 | .text()
124 | .replace(/NT\$|,/g, ''),
125 | );
126 |
127 | const rawThumbnailUrl: string = $('.box_1', elem)
128 | .children('a')
129 | .children('img')
130 | .prop('data-src');
131 | const thumbnailUrl = _getBooksCompanyThumbnail(rawThumbnailUrl);
132 |
133 | books[i] = {
134 | id,
135 | thumbnail: thumbnailUrl,
136 | title: $('a[rel=mid_name]', elem).prop('title'),
137 | link: `https://www.books.com.tw/products/${id}`,
138 | priceCurrency: 'TWD',
139 | price: price >= 0 ? price : -1,
140 | about: $('.txt_cont', elem)
141 | .children('p')
142 | .text()
143 | .replace(/...... more\n\t\t\t\t\t\t\t\t/g, ' ...'),
144 | publisher: $('a[rel=mid_publish]', elem).prop('title'),
145 | };
146 |
147 | if (authors.length > 0) {
148 | books[i].authors = authors;
149 | }
150 | });
151 |
152 | return books;
153 | }
154 |
155 | function _getBooksCompanyThumbnail(url: string) {
156 | const thumbnailRegexPattern = /.+\/getImage\?i=(https:\/\/.+\.jpg).+/;
157 | const match = url.match(thumbnailRegexPattern);
158 | const thumbnailUrl = match ? match[1] : 'null';
159 | return thumbnailUrl;
160 | }
161 |
--------------------------------------------------------------------------------
/src/stores/kobo.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://www.kobo.com/tw/zh/search?fcmedia=Book&Query=${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body), base);
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI, base: string) {
99 | const $list = $('ul[class=result-items] li');
100 |
101 | let books: Book[] = [];
102 |
103 | // 找不到就是沒這書
104 | if ($list.length === 0) {
105 | // console.log('Not found in kobo!');
106 |
107 | return books;
108 | }
109 |
110 | $list.each((i, elem) => {
111 | // 從 script elem 拉 JSON data
112 | const info = JSON.parse(
113 | $(elem).children('.item-detail').children('script').html() || '{ data: null }',
114 | ).data;
115 |
116 | // 若有副標題,併入主標題
117 | let title = info.name;
118 | if (info.alternativeHeadline) {
119 | title += ` - ${info.alternativeHeadline}`;
120 | }
121 |
122 | const authors = (
123 | eval(
124 | $(elem)
125 | .children('.item-detail')
126 | .children('.item-info')
127 | .children('.contributors')
128 | .children('.synopsis-contributors')
129 | .children('.synopsis-text')
130 | .children('.contributor-name')
131 | .data('track-info') as string,
132 | )?.author ?? ''
133 | ).split('、');
134 |
135 | // 價格要先檢查是否為免費
136 | const $priceField = $(elem).children('.item-detail').children('.item-info').children('.price');
137 |
138 | let price = 0;
139 | if (!$priceField.hasClass('free')) {
140 | price =
141 | parseFloat(
142 | $priceField
143 | .children('span')
144 | .children('span')
145 | .first()
146 | .text()
147 | .replace(/NT\$|,|\s/g, ''),
148 | ) || -1;
149 | }
150 |
151 | books[i] = {
152 | id: info.isbn,
153 | thumbnail: new URL(info.thumbnailUrl, base).toString(),
154 | title,
155 | link: info.url,
156 | priceCurrency: $(elem)
157 | .children('.item-detail')
158 | .children('.item-info')
159 | .children('.price')
160 | .children('span')
161 | .children('.currency')
162 | .text(),
163 | price,
164 | about: info.description ? `${info.description} ...` : undefined,
165 | // publisher
166 | };
167 |
168 | if (authors?.length > 0) {
169 | books[i].authors = authors;
170 | }
171 | });
172 |
173 | return books;
174 | }
175 |
--------------------------------------------------------------------------------
/src/stores/taaze.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://www.taaze.tw/rwd_searchResult.html?keyType%5B%5D=1&prodKind=4&catFocus=14&keyword%5B%5D=${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | const books: Book[] = _getBooks(cheerio.load(body));
60 |
61 | if (!books.length) {
62 | return books;
63 | } else {
64 | return _getBooksInfo(books);
65 | }
66 | })
67 | .then((books) => {
68 | // calc process time
69 | const hrEnd = process.hrtime(hrStart);
70 | const processTime = getProcessTime(hrEnd);
71 | const result: Result = {
72 | bookstore,
73 | isOkay: true,
74 | status: 'Crawler success.',
75 | processTime,
76 | books,
77 | quantity: books.length,
78 | };
79 |
80 | return result;
81 | })
82 | .catch((error) => {
83 | // calc process time
84 | const hrEnd = process.hrtime(hrStart);
85 | const processTime = getProcessTime(hrEnd);
86 |
87 | console.log(error.message);
88 |
89 | const result: Result = {
90 | bookstore,
91 | isOkay: false,
92 | status: 'Crawler failed.',
93 | processTime,
94 | books: [],
95 | quantity: 0,
96 | error: error.message,
97 | };
98 |
99 | return result;
100 | });
101 | };
102 |
103 | function _getBooksInfo(books: Book[] = []) {
104 | return Promise.all(books.map((book) => _getBookInfo(book.id))).then((infos) => {
105 | for (let i in books) {
106 | books[i].title = infos[i].booktitle;
107 | books[i].about = infos[i].bookprofile.replace(/\r/g, '');
108 | books[i].publisher = infos[i].publisher;
109 | books[i].publishDate = infos[i].publishdate;
110 | books[i].price = parseFloat(infos[i].saleprice) || -1;
111 |
112 | if (infos[i].authors) {
113 | books[i].authors = infos[i].authors;
114 | }
115 |
116 | if (infos[i].translator) {
117 | books[i].translator = infos[i].translator;
118 | books[i].translators = [infos[i].translator];
119 | }
120 | }
121 |
122 | return books;
123 | });
124 | }
125 |
126 | // parse 找書
127 | function _getBooks($: cheerio.CheerioAPI) {
128 | const $list = $('#listView').children('.media');
129 |
130 | let books: Book[] = [];
131 |
132 | if ($list.length === 0) {
133 | // console.log('Not found in taaze!');
134 |
135 | return books;
136 | }
137 |
138 | $list.each((i, elem) => {
139 | const id = $(elem).prop('rel');
140 |
141 | books[i] = {
142 | id,
143 | thumbnail: `https://media.taaze.tw/showLargeImage.html?sc=${id}`,
144 | title: id, //info.booktitle
145 | link: `https://www.taaze.tw/goods/${id}.html`,
146 | priceCurrency: 'TWD',
147 | // price: saleprice ,
148 | // about: info.bookprofile,
149 | // publisher: info.publisher,
150 | // publishDate: info.publishdate,
151 | // authors: info.author,
152 | };
153 | });
154 |
155 | return books;
156 | }
157 |
158 | // 單本書部分資料
159 | function _getBookInfo(id = '') {
160 | const base = `https://www.taaze.tw/new_ec/rwd/lib/searchbookAgent.jsp?prodId=${id}`;
161 |
162 | const options = {
163 | method: 'GET',
164 | compress: true,
165 | signal: timeoutSignal(10000),
166 | headers: {
167 | 'User-Agent': 'Taiwan-Ebook-Search/0.1',
168 | },
169 | };
170 |
171 | return fetch(base, options)
172 | .then((response) => response.json())
173 | .then((info) => (info as any[])[0]);
174 | }
175 |
--------------------------------------------------------------------------------
/src/routers/searches.ts:
--------------------------------------------------------------------------------
1 | import { Router } from 'express';
2 | import { UAParser } from 'ua-parser-js';
3 | import { format } from 'date-fns';
4 |
5 | import { sendMessage } from '../bot.js';
6 | import { firestore, insertSearch, getSearch, getBookstores } from '../firestore.js';
7 |
8 | import { AnyObject, getProcessTime } from '../interfaces/general.js';
9 | import { Bookstore } from '../interfaces/bookstore.js';
10 | import {
11 | readmoo,
12 | booksCompany,
13 | kobo,
14 | taaze,
15 | bookWalker,
16 | playStore,
17 | pubu,
18 | hyread,
19 | kindle,
20 | likerLand,
21 | } from '../stores/index.js';
22 |
23 | const bookstoreModel: AnyObject = {
24 | readmoo,
25 | booksCompany,
26 | kobo,
27 | taaze,
28 | bookWalker,
29 | playStore,
30 | pubu,
31 | hyread,
32 | kindle,
33 | likerLand,
34 | };
35 |
36 | const searchesRouter = Router();
37 |
38 | const _telegramPrettier = (data: AnyObject): string => {
39 | const results: [] = data.results.map(({ books, ...result }: AnyObject) => result);
40 | return `
41 | Keywords: *${data.keywords}*
42 | Search Time: ${data.searchDateTime}
43 | Process Time: ${Math.round((data.processTime / 1000) * 100) / 100}s
44 | Total: ${data.totalQuantity}
45 | User Agent: ${data.userAgent.ua}
46 | Search ID: \`${data.id}\`
47 | Link: [🔗](https://taiwan-ebook-lover.github.io/searches/${data.id})
48 | Bookstore Result: ${results.map(
49 | ({ bookstore, isOkay, quantity, processTime }: AnyObject): string => `
50 | ${isOkay ? '✅' : '❌'} ${bookstore.displayName} (${quantity} | ${
51 | Math.round((processTime / 1000) * 100) / 100
52 | }s)`,
53 | )}
54 | `;
55 | };
56 |
57 | searchesRouter.post('/', async (req, res, next) => {
58 | // start calc process time
59 | const hrStart = process.hrtime();
60 | const searchDateTime = new Date();
61 |
62 | const keywords = req.query.q;
63 | const bookstoresRequest: string[] = (req.query.bookstores as string[]) || [];
64 | const bombMessage = req.query.bomb;
65 |
66 | // parse user agent
67 | const ua = new UAParser(req.headers['user-agent']);
68 | const userAgent = ua.getResult()?.ua || `Taiwan-Ebook-Search/${process.env.npm_package_version}`;
69 |
70 | if (bombMessage) {
71 | return res.status(503).send({
72 | message: bombMessage,
73 | });
74 | }
75 |
76 | // 關鍵字是必須的
77 | if (!keywords) {
78 | return res.status(400).send({
79 | message: 'q is required.',
80 | });
81 | }
82 |
83 | const bookstores = await getBookstores();
84 | const validBookstores = bookstores.filter((store) => store.isOnline);
85 |
86 | let selectedBookstores = validBookstores.filter((store) => bookstoresRequest.includes(store.id));
87 |
88 | if (!selectedBookstores.length) {
89 | selectedBookstores = validBookstores;
90 | }
91 |
92 | // 等全部查詢完成
93 | Promise.all(
94 | selectedBookstores
95 | .filter((bookstore: Bookstore) => !!bookstoreModel[bookstore.id])
96 | .map((bookstore: Bookstore) => bookstoreModel[bookstore.id](bookstore, keywords, userAgent)),
97 | )
98 | .then(async (searchResults) => {
99 | // 整理結果並紀錄
100 | let results: any[] = [];
101 | let totalQuantity: number = 0;
102 |
103 | for (const searchResult of searchResults) {
104 | totalQuantity += searchResult?.quantity ?? 0;
105 | results.push({ ...searchResult });
106 | }
107 |
108 | // calc process time
109 | const hrEnd = process.hrtime(hrStart);
110 | const processTime = getProcessTime(hrEnd);
111 |
112 | const insertData: AnyObject = {
113 | keywords,
114 | searchDateTime: format(searchDateTime, `yyyy/LL/dd HH:mm:ss`),
115 | processTime,
116 | userAgent,
117 | totalQuantity,
118 | results,
119 | apiVersion: process.env.npm_package_version,
120 | };
121 |
122 | if (!firestore) {
123 | throw Error('Firestore is invalid.');
124 | }
125 |
126 | const search = await insertSearch(insertData);
127 | const telegramMessage: string = _telegramPrettier(search);
128 |
129 | sendMessage(telegramMessage);
130 |
131 | return res.status(201).send(search);
132 | })
133 | .catch((error) => {
134 | console.time('Error time: ');
135 | console.error(error);
136 |
137 | sendMessage(JSON.stringify(error));
138 |
139 | return res.status(503).send({
140 | message: 'Something is wrong...',
141 | });
142 | });
143 | });
144 |
145 | searchesRouter.get('/:id', async (req, res, next) => {
146 | const searchId: string = req.params.id;
147 | getSearch(searchId)
148 | .then((search) => {
149 | if (search) {
150 | return res.status(200).send(search);
151 | } else {
152 | return res.status(404).send({
153 | message: 'Search not found.',
154 | });
155 | }
156 | })
157 | .catch((error) => {
158 | console.time('Error time: ');
159 | console.error(error);
160 |
161 | return res.status(503).send({
162 | message: 'Something is wrong...',
163 | });
164 | });
165 | });
166 |
167 | export { searchesRouter };
168 |
--------------------------------------------------------------------------------
/src/stores/playStore.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const rootURL = `https://play.google.com`;
39 | const base = `${rootURL}/store/search?q=${keywords}&c=books&authuser=0&gl=tw&hl=zh-tw`;
40 |
41 | const options = {
42 | method: 'GET',
43 | compress: true,
44 | signal: timeoutSignal(10000),
45 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
46 | headers: {
47 | 'User-Agent': `${userAgent}`,
48 | },
49 | };
50 |
51 | return fetch(base, options)
52 | .then((response) => {
53 | if (!response.ok) {
54 | throw response.statusText;
55 | }
56 |
57 | return response.text();
58 | })
59 | .then((body) => {
60 | return _getBooks(cheerio.load(body), rootURL, base);
61 | })
62 | .then((books) => {
63 | // calc process time
64 | const hrEnd = process.hrtime(hrStart);
65 | const processTime = getProcessTime(hrEnd);
66 | const result: Result = {
67 | bookstore,
68 | isOkay: true,
69 | status: 'Crawler success.',
70 | processTime,
71 | books,
72 | quantity: books.length,
73 | };
74 |
75 | return result;
76 | })
77 | .catch((error) => {
78 | // calc process time
79 | const hrEnd = process.hrtime(hrStart);
80 | const processTime = getProcessTime(hrEnd);
81 |
82 | console.log(error.message);
83 |
84 | const result: Result = {
85 | bookstore,
86 | isOkay: false,
87 | status: 'Crawler failed.',
88 | processTime,
89 | books: [],
90 | quantity: 0,
91 | error: error.message,
92 | };
93 |
94 | return result;
95 | });
96 | };
97 |
98 | // parse 找書
99 | function _getBooks($: cheerio.CheerioAPI, rootURL: string, base: string) {
100 | const $list = $('body > c-wiz')
101 | .eq(1)
102 | .children('div')
103 | .children('div')
104 | .children('c-wiz')
105 | .children('c-wiz')
106 | .find('div[role=listitem]');
107 |
108 | let books: Book[] = [];
109 |
110 | // 找不到就是沒這書
111 | if (!$list.length) {
112 | console.log('Not found in Play Store!');
113 |
114 | return books;
115 | }
116 |
117 | $list.each((i, elem) => {
118 | const $bookElem = $(elem);
119 |
120 | let linkUrl = new URL(
121 | $bookElem.children('div').eq(0).children('div').eq(0).children('a').prop('href') ?? '',
122 | base,
123 | );
124 |
125 | const id = linkUrl.searchParams.get('id') as string;
126 |
127 | let price = Number(0);
128 | const $priceRootElem = $bookElem
129 | .children('div')
130 | .eq(0)
131 | .children('div')
132 | .eq(0)
133 | .children('a')
134 | .children('div')
135 | .eq(1)
136 | .children('div')
137 | .eq(1)
138 | .children('div')
139 | .last()
140 | .children()
141 | .children('span');
142 | if ($priceRootElem.text() != '免費') {
143 | const priceElems = $priceRootElem.find('span[aria-hidden="true"] > span');
144 | if (!priceElems.length) {
145 | price = Number($priceRootElem.text().replace(/\$|,/g, ''));
146 | } else {
147 | price = priceElems
148 | .map((index, priceElem) => Number($(priceElem).text().replace(/\$|,/g, '')))
149 | .get()
150 | .sort((a: number, b: number) => a - b)[0];
151 | }
152 | }
153 |
154 | // 設定書籍網址的語言與國家
155 | linkUrl.searchParams.set('gl', 'tw');
156 | linkUrl.searchParams.set('hl', 'zh-tw');
157 |
158 | let book: Book = {
159 | id,
160 | thumbnail: `${rootURL}/books/publisher/content/images/frontcover/${id}?fife=w256-h256`,
161 | title: $bookElem
162 | .children('div')
163 | .eq(0)
164 | .children('div')
165 | .eq(0)
166 | .children('a')
167 | .children('div')
168 | .eq(1)
169 | .children('div')
170 | .eq(0)
171 | .prop('title'),
172 | link: linkUrl.href,
173 | priceCurrency: 'TWD',
174 | price,
175 | };
176 |
177 | books[i] = book;
178 | });
179 |
180 | return books;
181 | }
182 |
--------------------------------------------------------------------------------
/src/stores/readmoo.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://readmoo.com/search/keyword?pi=0&st=true&q=${keywords}&kw=${keywords}`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body));
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI) {
99 | const $list = $('#main_items li');
100 |
101 | let books: Book[] = [];
102 |
103 | // 找不到就是沒這書
104 | if ($list.length === 0) {
105 | // console.log('Not found in readmoo!');
106 |
107 | return books;
108 | }
109 |
110 | $list.each((i, elem) => {
111 | const id = $(elem)
112 | .children('.caption')
113 | .children('.price-info')
114 | .children('meta[itemprop=identifier]')
115 | .prop('content');
116 |
117 | const apCode = Buffer.from(`https://readmoo.com/book/${id}`)
118 | .toString('base64')
119 | .replace(/\=*/g, '');
120 | const apId = process.env.READMOO_AP_ID;
121 | const link = apId
122 | ? `https://readmoo.com/ap/target/${apId}?url=${apCode}`
123 | : ($(elem).children('.caption').children('h4').children('a').prop('href') ??
124 | 'https://readmoo.com');
125 |
126 | const authors = [
127 | $(elem)
128 | .children('.caption')
129 | .children('.contributor-info')
130 | .children('a')
131 | .text()
132 | .replace(/\s+/g, ''),
133 | ];
134 |
135 | const publisher = $(elem)
136 | .children('.caption')
137 | .children('.publisher-info')
138 | .children('a')
139 | .text()
140 | .replace(/\s+/g, '');
141 |
142 | const publishDate = $(elem)
143 | .children('.caption')
144 | .children('.publish-date')
145 | .children('span')
146 | .text()
147 | .replace(/出版日期:|\s/g, '');
148 |
149 | books[i] = {
150 | id,
151 | thumbnail:
152 | ($(elem)
153 | .children('.thumbnail')
154 | .children('a')
155 | .children('img')
156 | .data('lazy-original') as string) || '',
157 | title: $(elem).children('.caption').children('h4').children('a').text(),
158 | link,
159 | priceCurrency: $(elem)
160 | .children('.caption')
161 | .children('.price-info')
162 | .children('meta[itemprop=priceCurrency]')
163 | .prop('content'),
164 | price:
165 | parseFloat(
166 | $(elem)
167 | .children('.caption')
168 | .children('.price-info')
169 | .children('.our-price')
170 | .children('strong')
171 | .text()
172 | .replace(/NT\$|,/g, ''),
173 | ) || -1,
174 | about: $(elem).children('.caption').children('.description').text(),
175 | };
176 |
177 | if (authors.length > 0) {
178 | books[i].authors = authors;
179 | }
180 |
181 | if (publisher !== '') {
182 | books[i].publisher = publisher;
183 | }
184 |
185 | if (publishDate !== '') {
186 | books[i].publishDate = publishDate;
187 | }
188 | });
189 |
190 | return books;
191 | }
192 |
--------------------------------------------------------------------------------
/src/stores/bookWalker.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import fetch from 'node-fetch';
3 | import timeoutSignal from 'timeout-signal';
4 |
5 | import pkg from 'https-proxy-agent';
6 | const { HttpsProxyAgent } = pkg;
7 |
8 | import { Book } from '../interfaces/book.js';
9 | import { Result } from '../interfaces/result.js';
10 | import { getProcessTime } from '../interfaces/general.js';
11 | import { FirestoreBookstore } from '../interfaces/firestoreBookstore.js';
12 |
13 | export default (
14 | { proxyUrl, ...bookstore }: FirestoreBookstore,
15 | keywords = '',
16 | userAgent: string,
17 | ) => {
18 | // start calc process time
19 | const hrStart = process.hrtime();
20 |
21 | if (!bookstore.isOnline) {
22 | const hrEnd = process.hrtime(hrStart);
23 | const processTime = getProcessTime(hrEnd);
24 | const result: Result = {
25 | bookstore,
26 | isOkay: false,
27 | status: 'Bookstore is offline',
28 | processTime,
29 | books: [],
30 | quantity: 0,
31 | };
32 |
33 | return result;
34 | }
35 |
36 | // URL encode
37 | keywords = encodeURIComponent(keywords);
38 | const base = `https://www.bookwalker.com.tw/search?w=${keywords}&m=0&detail=1`;
39 |
40 | const options = {
41 | method: 'GET',
42 | compress: true,
43 | signal: timeoutSignal(10000),
44 | agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : undefined,
45 | headers: {
46 | 'User-Agent': `${userAgent}`,
47 | },
48 | };
49 |
50 | return fetch(base, options)
51 | .then((response) => {
52 | if (!response.ok) {
53 | throw response.statusText;
54 | }
55 |
56 | return response.text();
57 | })
58 | .then((body) => {
59 | return _getBooks(cheerio.load(body), base);
60 | })
61 | .then((books) => {
62 | // calc process time
63 | const hrEnd = process.hrtime(hrStart);
64 | const processTime = getProcessTime(hrEnd);
65 | const result: Result = {
66 | bookstore,
67 | isOkay: true,
68 | status: 'Crawler success.',
69 | processTime,
70 | books,
71 | quantity: books.length,
72 | };
73 |
74 | return result;
75 | })
76 | .catch((error) => {
77 | // calc process time
78 | const hrEnd = process.hrtime(hrStart);
79 | const processTime = getProcessTime(hrEnd);
80 |
81 | console.log(error.message);
82 |
83 | const result: Result = {
84 | bookstore,
85 | isOkay: false,
86 | status: 'Crawler failed.',
87 | processTime,
88 | books: [],
89 | quantity: 0,
90 | error: error.message,
91 | };
92 |
93 | return result;
94 | });
95 | };
96 |
97 | // parse 找書
98 | function _getBooks($: cheerio.CheerioAPI, base: string) {
99 | let books: Book[] = [];
100 |
101 | const $categories = $('.listbox');
102 |
103 | $categories.each((i, elem) => {
104 | if (!$(elem).children('.listbox_title').length) {
105 | return;
106 | }
107 |
108 | $(elem)
109 | .children('.bookdesc')
110 | .each((i, elem) => {
111 | let title = $(elem).children('.bookdata').children('h2').children('a').text();
112 | let subTitle = $(elem).children('.bookdata').children('h3').children('a').text();
113 | if (subTitle) {
114 | title += ` / ${subTitle}`;
115 | }
116 |
117 | const authors: string[] = [];
118 | const translators: string[] = [];
119 | const painters: string[] = [];
120 |
121 | $(elem)
122 | .children('.bookdata')
123 | .children('.bw_item')
124 | .children('.writerinfo')
125 | .children('.writer_data')
126 | .children('li')
127 | .map((i, el) => $(el).text())
128 | .toArray()
129 | .map((str) => str.split(' : '))
130 | .forEach(([authorTitle, authorName]) => {
131 | switch (authorTitle) {
132 | case '作者':
133 | authors.push(authorName);
134 | break;
135 | case '譯者':
136 | translators.push(authorName);
137 | break;
138 | case '插畫':
139 | painters.push(authorName);
140 | break;
141 | default:
142 | authors.push(`${authorName} (${authorTitle})`);
143 | break;
144 | }
145 | });
146 |
147 | books[i] = {
148 | id: (
149 | $(elem).children('.bookdata').children('h2').children('a').prop('href') ?? ''
150 | ).replace('/product/', ''),
151 | thumbnail: $(elem)
152 | .children('.bookcover')
153 | .children('.bookitem')
154 | .children('a')
155 | .children('img')
156 | .data('src') as string,
157 | title: title,
158 | link: new URL(
159 | $(elem).children('.bookdata').children('h2').children('a').prop('href') ?? '',
160 | base,
161 | ).toString(),
162 | priceCurrency: 'TWD',
163 | price:
164 | parseFloat(
165 | $(elem)
166 | .children('.bookdata')
167 | .children('.bw_item')
168 | .children('.writerinfo')
169 | .children('h4')
170 | .children('span')
171 | .text()
172 | .replace(/\D/g, ''),
173 | ) || -1,
174 | about: $(elem)
175 | .children('.bookdata')
176 | .children('.topic_content')
177 | .children('.bookinfo')
178 | .children('h4')
179 | .text()
180 | .concat(
181 | $(elem)
182 | .children('.bookdata')
183 | .children('.topic_content')
184 | .children('.bookinfo')
185 | .children('h5')
186 | .children('span')
187 | .text(),
188 | ),
189 | // publisher:,
190 | };
191 |
192 | if (authors.length > 0) {
193 | books[i].authors = authors;
194 | }
195 |
196 | if (translators.length > 0) {
197 | books[i].translators = translators;
198 | }
199 |
200 | if (painters.length > 0) {
201 | books[i].painters = painters;
202 | }
203 | });
204 | });
205 |
206 | return books;
207 | }
208 |
--------------------------------------------------------------------------------