├── public
└── assets
│ ├── bub2.png
│ ├── web.png
│ ├── favicon.ico
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── mstile-150x150.png
│ ├── apple-touch-icon.png
│ ├── android-chrome-192x192.png
│ ├── android-chrome-384x384.png
│ └── browserconfig.xml
├── .gitignore
├── .gitpod.yml
├── next.config.js
├── docker-compose.yml
├── utils
├── bullconfig.js
├── constants.js
├── scraper.js
└── helper.js
├── bull
├── email-queue
│ ├── producer.js
│ └── consumer.js
├── commons-queue
│ ├── producer.js
│ └── consumer.js
├── google-books-queue
│ ├── producer.js
│ └── consumer.js
├── pdl-queue
│ ├── producer.js
│ └── consumer.js
└── trove-queue
│ ├── producer.js
│ └── consumer.js
├── pages
├── index.js
├── faqs.js
├── googleauth.js
├── stats.js
├── api
│ └── auth
│ │ └── [...nextauth].js
├── queue.js
└── _app.js
├── components
├── BooksWrapper.js
├── Answer.js
├── Question.js
├── FaqsSection.js
├── ChangeIdentifier.js
├── QueueSection.js
├── ShowQueue.js
├── Footer.js
├── Header.js
├── ShowJobInformation.js
├── QueueTable.js
└── Books.js
├── .gitpod.Dockerfile
├── PR_TEMPLATE.md
├── .github
└── ISSUE_TEMPLATE
│ ├── feature_request.md
│ ├── bug_report.md
│ └── issue_template.md
├── .env.example
├── LICENSE.md
├── FAQ.md
├── styles
└── global.less
├── GSSOC_INSTRUCTIONS.md
├── HELP_WANTED.md
├── package.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── hooks
└── useMetadataForUI.js
├── README.md
└── server.js
/public/assets/bub2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/bub2.png
--------------------------------------------------------------------------------
/public/assets/web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/web.png
--------------------------------------------------------------------------------
/public/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon.ico
--------------------------------------------------------------------------------
/public/assets/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon-16x16.png
--------------------------------------------------------------------------------
/public/assets/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon-32x32.png
--------------------------------------------------------------------------------
/public/assets/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/mstile-150x150.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .next
2 | node_modules
3 | .vscode
4 | .env
5 | package-lock.json
6 | out/
7 | *.log
8 | .env.stage
9 | .env.local
--------------------------------------------------------------------------------
/public/assets/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/apple-touch-icon.png
--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
1 | tasks:
2 | - init: npm install && npm run build
3 | command: npm run start
4 | image:
5 | file: .gitpod.Dockerfile
6 |
--------------------------------------------------------------------------------
/next.config.js:
--------------------------------------------------------------------------------
1 | const withLess = require("next-with-less");
2 |
3 | module.exports = withLess({
4 | lessLoaderOptions: {},
5 | });
6 |
--------------------------------------------------------------------------------
/public/assets/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/android-chrome-192x192.png
--------------------------------------------------------------------------------
/public/assets/android-chrome-384x384.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/android-chrome-384x384.png
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3'
2 | services:
3 | redis:
4 | image: redis
5 | ports:
6 | - "6379:6379"
7 | restart: always
--------------------------------------------------------------------------------
/public/assets/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #da532c
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/utils/bullconfig.js:
--------------------------------------------------------------------------------
1 | const Queue = require('bull');
2 | require("dotenv").config();
3 | module.exports = {
4 | getNewQueue: (name) => {
5 | return new Queue(name,{
6 | redis: {
7 | port: process.env.redisport,
8 | host: process.env.redishost
9 | }
10 | })
11 | }
12 | }
--------------------------------------------------------------------------------
/bull/email-queue/producer.js:
--------------------------------------------------------------------------------
1 | const config = require("../../utils/bullconfig");
2 | require("./consumer");
3 |
4 | const EmailQueue = config.getNewQueue("email-queue");
5 |
6 | module.exports = async (userName, title, trueURI, status) => {
7 | EmailQueue.add({
8 | userName,
9 | title,
10 | trueURI,
11 | status,
12 | });
13 | };
14 |
--------------------------------------------------------------------------------
/pages/index.js:
--------------------------------------------------------------------------------
1 | import Header from "../components/Header";
2 | import Books from "../components/Books";
3 | const init = () => (
4 |
12 | );
13 |
14 | export default init;
15 |
--------------------------------------------------------------------------------
/components/BooksWrapper.js:
--------------------------------------------------------------------------------
1 | import { Box } from "@mui/material";
2 |
3 | export default function BooksWrapper({ isCommonsMetadataReady, children }) {
4 | return (
5 |
15 | {children}
16 |
17 | );
18 | }
19 |
--------------------------------------------------------------------------------
/.gitpod.Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gitpod/workspace-full
2 | USER gitpod
3 | RUN sudo apt-get update -q && \
4 | sudo apt-get install -yq redis-server
5 |
6 | # Install custom tools, runtime, etc. using apt-get
7 | # For example, the command below would install "bastet" - a command line tetris clone:
8 | #
9 | # RUN sudo apt-get -q update && # sudo apt-get install -yq bastet && # sudo rm -rf /var/lib/apt/lists/*
10 | #
11 | # More information: https://www.gitpod.io/docs/config-docker/
12 |
--------------------------------------------------------------------------------
/PR_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | Fixes: [Link to the Issue]
2 |
3 | ## Proposed Changes
4 | -
5 | -
6 | -
7 |
8 | ## Files Added
9 | -
10 |
2 |
3 |
4 |
5 | ## Expected Behavior
6 |
7 |
8 | ## Current Behavior
9 |
10 |
11 | ## Possible Solution
12 |
13 |
14 | ## Steps to Reproduce
15 |
16 |
17 | 1.
18 | 2.
19 | 3.
20 | 4.
21 |
22 | ## Context (Environment)
23 |
24 |
25 |
26 |
27 |
28 | ## Detailed Description
29 |
30 |
31 | ## Possible Implementation
32 |
33 |
--------------------------------------------------------------------------------
/bull/pdl-queue/producer.js:
--------------------------------------------------------------------------------
1 | const config = require("../../utils/bullconfig");
2 | const PDLQueue = config.getNewQueue("pdl-queue");
3 | const cheerio = require("cheerio"); // Basically jQuery for node.js
4 | require("./consumer");
5 | const { getPDLMetaData } = require("../../utils/helper.js");
6 |
7 | module.exports = async (
8 | bookid,
9 | IAIdentifier,
10 | categoryID,
11 | email,
12 | userName,
13 | isEmailNotification,
14 | isUploadCommons,
15 | oauthToken,
16 | commonsMetadata
17 | ) => {
18 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookid}&page=1&CategoryID=${categoryID}&Searched=W3GX`;
19 | var options = {
20 | uri,
21 | transform: function (body) {
22 | return cheerio.load(body);
23 | },
24 | };
25 |
26 | const metaData = await getPDLMetaData(options, bookid, categoryID);
27 |
28 | metaData["email"] = email;
29 | metaData["userName"] = userName;
30 | metaData["IAIdentifier"] = IAIdentifier;
31 | metaData["isEmailNotification"] = isEmailNotification;
32 | metaData["isUploadCommons"] = isUploadCommons;
33 | metaData["oauthToken"] = oauthToken;
34 | metaData["commonsMetadata"] = commonsMetadata;
35 | const details = {
36 | details: metaData,
37 | };
38 | PDLQueue.add(details);
39 | };
40 |
--------------------------------------------------------------------------------
/bull/trove-queue/producer.js:
--------------------------------------------------------------------------------
1 | const config = require("../../utils/bullconfig");
2 | const TroveQueue = config.getNewQueue("trove-queue");
3 | const { getTroveMetaData } = require("../../utils/helper.js");
4 | const cheerio = require("cheerio"); // Basically jQuery for node.js
5 |
6 | require("./consumer");
7 | module.exports = async (
8 | bookid,
9 |
10 | IAIdentifier,
11 |
12 | metaData,
13 |
14 | email,
15 |
16 | userName,
17 | isEmailNotification,
18 | isUploadCommons,
19 | oauthToken,
20 | commonsMetadata
21 | ) => {
22 | const uri = `https://trove.nla.gov.au/newspaper/article/${bookid}`;
23 | var options = {
24 | uri,
25 | transform: function (body) {
26 | return cheerio.load(body);
27 | },
28 | };
29 |
30 | const issueRenditionId = await getTroveMetaData(options);
31 |
32 | metaData["email"] = email;
33 | metaData["issueRenditionId"] = issueRenditionId;
34 | metaData["userName"] = userName;
35 | metaData["IAIdentifier"] = IAIdentifier;
36 | metaData["isEmailNotification"] = isEmailNotification;
37 | metaData["isUploadCommons"] = isUploadCommons;
38 | metaData["oauthToken"] = oauthToken;
39 | metaData["commonsMetadata"] = commonsMetadata;
40 |
41 | const details = {
42 | details: metaData,
43 | };
44 | TroveQueue.add(details);
45 | };
46 |
--------------------------------------------------------------------------------
/components/ShowQueue.js:
--------------------------------------------------------------------------------
1 | import React from "react";
2 | import Link from "next/link";
3 |
4 | const ShowQueue = ({ data, library }) => (
5 |
6 |
20 |
21 |
22 | {library}
23 |
24 | Waiting: {data?.waiting}
25 |
26 |
27 | Active: {data?.active}
28 |
29 |
30 | Completed: {data?.completed}
31 |
32 |
33 | Failed: {data?.failed}
34 |
35 |
36 | Delayed: {data?.delayed}
37 |
38 |
39 |
40 |
41 | );
42 |
43 | export default ShowQueue;
44 |
--------------------------------------------------------------------------------
/GSSOC_INSTRUCTIONS.md:
--------------------------------------------------------------------------------
1 | ## Instructions of GSSOC
2 | 1. This program is for beginners to get started with open source. The only criterion for contribution are your dedication and enthusiasm to contribute to open source.
3 | 2. You can check your contribution scores from the website:
4 | https://www.gssoc.tech/profile.html
5 | 3. Scores will be given as follows as per the issue label on the projects:
6 | * "beginner" : 2 points
7 | * "easy" : 4 points
8 | * "medium" : 7 points
9 | * "hard" : 10 points
10 |
11 | 4. The issues labelled 'taken' are already assigned to other contributors, so do not try to fix those issues until assigned to you.
12 | 5. Please refrain from commenting on the issues that are already assigned to any contributor!
13 | 6. Refer to the issue that a PR solves, so that a PR can be related to an issue.
14 | 7. More issues will be created by the Project admins in the coming weeks so keep on checking the Projects that you would like to contribute!
15 | 8. We request everyone in the Slack community to reply in thread only!
16 | 9. Do not share any personal informations like Contact details, Personal Address, Email IDs etc in public channels!
17 | 10. We request everyone in the community to be kind & respectful as much as you can! Any abuse, hurtful words or wrong language will not be tolerated and the member will be removed immediately without prior warning.
18 |
--------------------------------------------------------------------------------
/HELP_WANTED.md:
--------------------------------------------------------------------------------
1 | Following are the list of other sources that can be used to add books to Internet Archive, along with a sample book link and web APIs available on respective websites:
2 |
3 | ### 1. Project Gutenberg
4 | - Website: https://www.gutenberg.org/
5 | - Sample Link: http://www.gutenberg.org/files/155/155-h/155-h.htm
6 | - Web API for Project Gutenberg ebook metadata: https://gutendex.com (For more info: [click here](https://github.com/garethbjohnson/gutendex))
7 |
8 | ### 2. Open Book Publishers
9 | - Website: https://www.openbookpublishers.com/
10 | - Sample Link: https://www.openbookpublishers.com/product/106#0
11 | - API: https://github.com/OpenBookPublishers/obp_institution_api
12 |
13 | ### 3. Library of Congress
14 | - Website: http://www.read.gov/books/
15 | - Sample Link: http://www.read.gov/books/pageturner/aesops_fables/#page/8/mode/2up
16 | - API: The loc.gov JSON API provides structured data about Library of Congress collections.All URLs start with https://www.loc.gov/ and need to include fo=json as a parameter to get JSON.No API key or authentication is required.(For more info: [click here](https://libraryofcongress.github.io/data-exploration/))
17 |
18 | ### 4.Bloomsbury Collections
19 | - Website: https://www.bloomsburycollections.com/
20 | - Sample Link: https://www.bloomsburycollections.com/book/government-communication-cases-and-challenges/#0
21 |
22 | ### 5.Directory of Open Access Journals
23 | - Website: https://doaj.org/
24 | - Sample Link: https://doaj.org/article/5b3b17834afa41848e52db56c0c9ea78#0
25 | - API: https://doaj.org/api/v1/ (For more info: [click here](https://doaj.org/api/v1/docs))
--------------------------------------------------------------------------------
/components/Footer.js:
--------------------------------------------------------------------------------
1 | export default () => (
2 |
3 |
15 |
45 |
46 |
51 |
56 |
57 | );
58 |
--------------------------------------------------------------------------------
/utils/constants.js:
--------------------------------------------------------------------------------
1 | export const host =
2 | process.env.NODE_ENV === "production"
3 | ? "https://bub2.wmcloud.org"
4 | : "http://localhost:5000"; //If you have port set in env file, replace 5000 with "process.env.PORT"
5 | export const stats_data_endpoint = `${host}/getstats`;
6 | export const queue_data_endpoint = `${host}/getqueue`;
7 | export const queuelist_data_endpoint = `${host}/getqueuelist`;
8 | export const library = {
9 | gb: "Google Books",
10 | pdl: "Panjab Digital Library",
11 | trove: "Trove Digital Library",
12 | };
13 | export const permission = `CCO No Rights Reserved https://creativecommons.org/publicdomain/mark/1.0/`;
14 | export const faq_data = [
15 | {
16 | que: "What is Book Uploader Bot?",
17 | ans:
18 | "A Book Uploader Bot transfers documents from public libraries such as Google Books, and Punjab Digital Library etc to Internet Archive.",
19 | },
20 | {
21 | que: "What does this tool do?",
22 | ans:
23 | "The tool is built to help the community with free books that are available in the online public libraries. It makes the integration of books easier.",
24 | },
25 | {
26 | que: "Who can benefit from this tool?",
27 | ans:
28 | "The tool, under the hood, compiles all the book images/pdf and its meta data at one place where it can be accessed by anyone in the world.",
29 | },
30 | {
31 | que: "What are the future enhancements?",
32 | ans:
33 | "Bulk upload feature, direct upload to Wikimedia Commons and addition of newer libraries are some features being actively worked upon.",
34 | },
35 | {
36 | que: "Can I upload my own book?",
37 | ans:
38 | "If an appropriate license is attached to the book which allows it to be archived (which doesn't allow copyright infringement suits, etc.), then book can be uploaded.",
39 | },
40 | {
41 | que: "Can I delete or undo an upload?",
42 | ans:
43 | "Archives are supposed to be read-only and deleting is a 'write' operation. So it is not supported.",
44 | },
45 | ];
46 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "BUB2",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "dev": "node server.js",
8 | "build": "next build;",
9 | "start": "npm run build; NODE_ENV=production node server.js"
10 | },
11 | "husky": {
12 | "hooks": {
13 | "pre-commit": "lint-staged"
14 | }
15 | },
16 | "lint-staged": {
17 | "*.js": [
18 | "prettier --write",
19 | "git add"
20 | ]
21 | },
22 | "keywords": [],
23 | "author": "",
24 | "license": "ISC",
25 | "dependencies": {
26 | "@emotion/react": "^11.11.1",
27 | "@emotion/styled": "^11.11.0",
28 | "@mui/material": "^5.14.12",
29 | "@mui/styles": "^5.14.12",
30 | "@primer/octicons-react": "^9.5.0",
31 | "@wikimedia/codex-icons": "^0.15.0",
32 | "async": "^2.6.3",
33 | "blob-stream": "^0.1.3",
34 | "body-parser": "^1.18.3",
35 | "bull": "^4.11.3",
36 | "cheerio": "^1.0.0-rc.3",
37 | "cli-progress": "^2.1.1",
38 | "compression": "^1.7.4",
39 | "cors": "^2.8.5",
40 | "dotenv": "^6.2.0",
41 | "eslint": "^5.16.0",
42 | "express": "^4.16.4",
43 | "express-fileupload": "^1.1.4",
44 | "fs": "0.0.1-security",
45 | "http": "0.0.0",
46 | "https": "^1.0.0",
47 | "ioredis": "^5.4.1",
48 | "isomorphic-fetch": "^3.0.0",
49 | "isomorphic-unfetch": "^3.0.0",
50 | "jsdom": "^20.0.0",
51 | "jspdf": "^2.5.1",
52 | "jszip": "^3.2.2",
53 | "less": "^4.1.3",
54 | "less-loader": "^11.1.3",
55 | "loaders.css": "^0.1.2",
56 | "lodash": "^4.17.20",
57 | "mwn": "^2.0.1",
58 | "next": "^12.2.5",
59 | "next-auth": "^4.15.1",
60 | "next-with-less": "^3.0.1",
61 | "node-fetch": "^2.2.1",
62 | "nodemailer": "^6.7.8",
63 | "nprogress": "^0.2.0",
64 | "open": "^7.0.3",
65 | "pdf-lib": "^1.17.1",
66 | "pdfkit": "^0.9.1",
67 | "react": "^17.0.2",
68 | "react-dom": "^17.0.2",
69 | "react-loaders": "^3.0.1",
70 | "react-table": "^7.0.0-rc.16",
71 | "request": "^2.88.0",
72 | "request-promise": "^4.2.4",
73 | "scissors": "^0.2.5",
74 | "stream-length": "^1.0.2",
75 | "styled-jsx": "^3.2.1",
76 | "sweetalert2": "^7.33.1",
77 | "url": "^0.11.0",
78 | "winston": "^3.9.0"
79 | },
80 | "devDependencies": {
81 | "@wikimedia/codex": "^0.13.0",
82 | "@wikimedia/codex-design-tokens": "^0.13.0",
83 | "husky": "^4.2.3",
84 | "lint-staged": "^10.0.9",
85 | "prettier": "2.0.2"
86 | }
87 | }
88 |
--------------------------------------------------------------------------------
/pages/googleauth.js:
--------------------------------------------------------------------------------
1 | import Header from "../components/Header";
2 | import React, { useState } from "react";
3 | const GoogleAuth = () => {
4 | const CLIENT_ID =
5 | "267327767504-fbtmbl5kkf8m9bjahlv3umu4q74as560.apps.googleusercontent.com"; // Replace with your client ID
6 | const SCOPES = "https://www.googleapis.com/auth/adwords";
7 |
8 | const [tokens, setTokens] = useState({
9 | accessToken: null,
10 | refreshToken: null,
11 | });
12 |
13 | const handleGoogleLogin = () => {
14 | // Create the authorization URL
15 | const authUrl = `https://accounts.google.com/o/oauth2/v2/auth?client_id=${CLIENT_ID}&redirect_uri=${encodeURIComponent(
16 | window.location.origin
17 | )}&response_type=token&scope=${encodeURIComponent(
18 | SCOPES
19 | )}&include_granted_scopes=true`;
20 |
21 | // Open a popup for Google Login
22 | const authWindow = window.open(
23 | authUrl,
24 | "google-auth",
25 | "width=500,height=600"
26 | );
27 |
28 | // Listen for the authentication response
29 | const pollTimer = setInterval(() => {
30 | try {
31 | if (authWindow.closed) {
32 | clearInterval(pollTimer);
33 | }
34 |
35 | // Check for URL containing tokens
36 | const urlParams = new URLSearchParams(
37 | authWindow.location.hash.replace("#", "?")
38 | );
39 | console.log(urlParams, "::urlParams");
40 | if (urlParams.has("access_token")) {
41 | const accessToken = urlParams.get("access_token");
42 | // Refresh tokens aren't returned in implicit grant flow; use the code flow for a backend server
43 | setTokens({ accessToken, refreshToken: null });
44 | authWindow.close();
45 | }
46 | } catch (e) {
47 | // Security constraints: cannot access cross-origin data until redirected to same origin
48 | }
49 | }, 500);
50 | };
51 |
52 | return (
53 |
54 |
Google OAuth2 Authentication
55 |
59 | Connect Google Account
60 |
61 |
62 | {tokens.accessToken && (
63 |
64 |
Tokens Received
65 |
66 | Access Token: {tokens.accessToken}
67 |
68 |
69 | Refresh Token: (Not available in client-side flow)
70 |
71 |
72 | )}
73 |
74 | );
75 | };
76 |
77 | export default GoogleAuth;
78 |
--------------------------------------------------------------------------------
/utils/scraper.js:
--------------------------------------------------------------------------------
1 | const request = require("request-promise");
2 | const cheerio = require("cheerio");
3 |
4 | export const PdlDetails = async (id, categoryId) => {
5 | const pdlURI = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${id}&page=1&CategoryID=${categoryId}&Searched=W3GX`;
6 | const response = await request({
7 | uri: pdlURI,
8 | headers: {
9 | Accept:
10 | "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
11 | "Accept-Encoding": "gzip, deflate",
12 | "Accept-Language": "en-US,en;q=0.9,hi;q=0.8",
13 | Host: "www.panjabdigilib.org",
14 | "Upgrade-Insecure-Requests": 1,
15 | },
16 | });
17 |
18 | let $ = cheerio.load(response);
19 | let title = $(
20 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(1)>td>a"
21 | )
22 | .text()
23 | .trim();
24 | let authorLabel = $(
25 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(3)>td>table>tbody>tr>td:nth-child(1)"
26 | )
27 | .text()
28 | .trim();
29 | let author = $(
30 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(3)>td>table>tbody>tr>td:nth-child(2)>a"
31 | )
32 | .text()
33 | .trim();
34 | let description = $(
35 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(2)>td>table>tbody>tr:nth-child(1)>td:nth-child(2)"
36 | )
37 | .text()
38 | .trim();
39 | let preview = $(
40 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(1)>table>tbody>tr:nth-child(1)>td>table>tbody>tr>td>a>img"
41 | )
42 | .attr("src")
43 | .slice(8);
44 |
45 | const details = {
46 | title: title,
47 | author: authorLabel != "Author" ? "" : author,
48 | description:
49 | description == "Click here to add description"
50 | ? "No description available"
51 | : description,
52 | preview: "http://www.panjabdigilib.org" + preview,
53 | };
54 |
55 | return details;
56 | };
57 |
--------------------------------------------------------------------------------
/bull/email-queue/consumer.js:
--------------------------------------------------------------------------------
1 | const config = require("../../utils/bullconfig");
2 | const EmailQueue = config.getNewQueue("email-queue");
3 | const winston = require("winston");
4 | const logger = winston.loggers.get("defaultLogger");
5 | require("dotenv").config();
6 | const { Mwn } = require("mwn");
7 |
8 | function generateMessage(status, title, trueURI) {
9 | let message;
10 | if (status.archive && status.commons) {
11 | message = `Your file "${title}" has been uploaded to Internet Archive and Wikimedia Commons successfully. Take a look at, Internet Archive - ${trueURI.archiveLink}, Wikimedia Commons - ${trueURI.commonsLink}`;
12 | } else if (status.archive && !status.commons) {
13 | message = `Your file "${title}" has been uploaded to Internet Archive successfully! Take a look at ${trueURI}`;
14 | } else if (!status.archive && !status.commons) {
15 | message = `Your file "${title}" was not uploaded to Internet Archive! Please try again later.`;
16 | }
17 | return message;
18 | }
19 |
20 | async function mediawikiEmail(username, title, trueURI, status) {
21 | try {
22 | const bot = await Mwn.init({
23 | apiUrl: process.env.NEXT_PUBLIC_WIKIMEDIA_URL + "/w/api.php",
24 | username: process.env.EMAIL_BOT_USERNAME,
25 | password: process.env.EMAIL_BOT_PASSWORD,
26 | // Set your user agent (required for WMF wikis, see https://meta.wikimedia.org/wiki/User-Agent_policy):
27 | userAgent: "BUB2/1.0 (https://bub2.wmcloud.org)",
28 | // Set default parameters to be sent to be included in every API request
29 | defaultParams: {
30 | assert: "user", // ensure we're logged in
31 | },
32 | });
33 |
34 | const csrf_token = await bot.getCsrfToken();
35 |
36 | bot
37 | .request({
38 | action: "emailuser",
39 | target: username,
40 | subject: "BUB2 upload status",
41 | text: generateMessage(status, title, trueURI),
42 | token: csrf_token,
43 | format: "json",
44 | })
45 | .then((data) => {
46 | logger.log({
47 | level: "info",
48 | message: `Email Sent Successfully! Result : ${data}`,
49 | });
50 | return 200;
51 | })
52 | .catch((error) => {
53 | logger.log({
54 | level: "error",
55 | message: `Failed to send email with error: ${error}`,
56 | });
57 | return error;
58 | });
59 | } catch (error) {
60 | logger.log({
61 | level: "error",
62 | message: `mediawikiEmail: ${JSON.stringify(error)}`,
63 | });
64 | return error;
65 | }
66 | }
67 |
68 | EmailQueue.process(async (job, done) => {
69 | const emailResponse = await mediawikiEmail(
70 | job.data.userName,
71 | job.data.title,
72 | job.data.trueURI,
73 | job.data.status
74 | );
75 | if (emailResponse !== 200) {
76 | logger.log({
77 | level: "error",
78 | message: `EmailQueue: ${JSON.stringify(emailResponse)}`,
79 | });
80 | done(new Error(`EmailQueue: ${emailResponse}`));
81 | }
82 | done(null, true);
83 | });
84 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Book Uploader Bot Code of Conduct
2 | In order to promote an open and welcoming atmosphere, we as contributors and maintainers commit ourselves to make involvement in our project and in our group a harassment-free experience for all irrespective of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
3 |
4 | ## Our Standards
5 | Examples of behaviour which contributes to the development of a positive environment include:
6 | * There should be clear communication.
7 | * Listen to everyone's ideas.
8 | * Learn all the aspects before coming to conclusion.
9 | * Using inoffensive language
10 | * Attaining positive work environment should be one of everybody's goals
11 |
12 | Examples of unacceptable participant actions include:
13 | * Use of unethical means to attain an end.
14 | * Putting self interest above group interest.
15 | * Disrespecting an individual on the basis of gender, religion or culture.
16 | * Publishing private details of people, such as physical or email addresses, without their prior consent
17 | * Undertaking the action of insider trading
18 |
19 | ## Our Responsibilities
20 | Authorities are responsible for creating an environment where everyone is treated equally and strict actions should be taken in case of unacceptable behaviour.
21 | Authorities have the duty and responsibility to delete, modify, or reject comments, code, wiki updates, problems and other contributions that are not consistent with this Code of Conduct, or temporarily or permanently ban any user for other actions that they find unacceptable, disruptive, offensive or harmful.
22 |
23 | ## Scope
24 | This code of conduct is applicable to all the content on Book Uploader website and all other websites which are linked to it whether offline or online.
25 | The code of conduct is also linked within project spaces and in public spaces where an individual is representing Book Uploader or its community.
26 |
27 | ## Conflict Resolution
28 | Conflict in most cases arises when there is a difference of opinion and people involved are not in a state to listen/understand the point of view of the other person. The severity of conflicts could vary from a mere disagreement to disrespectful exchange of words to more intense situations like physical violence. If you are experiencing any issue, we encourage you to use following strategies:
29 | * Fix the alleged dispute directly with the parties concerned, preferably in a real-time medium.
30 | * If this fails, get a third party (e.g. a mutual friend, and/or someone with background on the issue, but not involved in the conflict) to intervene.
31 | * If you are still unable to resolve the conflict, and you believe it rises to harassment or another code of conduct violation, report it.
32 |
33 |
34 | ## Reporting Violations
35 | Violations of the Code of Conduct can be reported to Book Uploader's concerned team. Then the team will investigate whether the Code of Conduct was violated and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
36 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ## Finding issues and new tasks
2 | - All the issues and tasks are maintained on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R).
3 | - Make sure to assign an issue to yourself from [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R) before working on it.
4 |
5 | ## Fork
6 | You are required to fork this repository and make changes on the forked repository on a new branch other than *develop* and after making all the changes, make a Pull Request to develop branch .
7 | - After you have the project working on your local machine (refer [README.md](https://github.com/coderwassananmol/BUB2/blob/develop/README.md)), make sure you keep your fork up to date by tracking the original "upstream" repo that you forked. To do this, you'll need to add a remote:
8 | ```
9 | # Add 'upstream' repo to list of remotes
10 | $ git remote add upstream https://github.com/coderwassananmol/BUB2.git
11 |
12 | # Verify the new remote named 'upstream'
13 | $ git remote -v
14 | ```
15 | - Whenever you want to update your fork with the latest upstream changes, take pull from the upstream repo to your fork in order to keep it at par with the main project by:
16 | ```
17 | $ git pull upstream develop
18 | ```
19 | - Before making any contribution. Create seperate branch using command:
20 | ```
21 | # It will create a new branch with name Branch_Name and switch to that branch
22 | $ git checkout -b Branch_Name
23 | ```
24 | - After you've made changes or made your contribution to the project add changes to the branch you've just created by:
25 | ```
26 | # To add all new files to branch Branch_Name
27 | $ git add .
28 | ```
29 | - Commit messages should follow a [certain guideline](https://udacity.github.io/git-styleguide/). To commit, give a descriptive message for the convenience of reveiwer by:
30 | ```
31 | # This message get associated with all files you have changed
32 | $ git commit -m 'message'
33 | ```
34 | **NOTE**: A PR should have only one commit. Multiple commits should be squashed.
35 | - Now you are ready to push your work to the remote repository:
36 | ```
37 | # To push your work to your remote repository
38 | $ git push -u origin Branch_Name
39 | ```
40 | ## How to raise a pull request
41 | - Create a Pull Request to merge your branch with the **develop branch** and mention the link to the **Phabricator ticket** you worked on in the description of the Pull Request.
42 | - Once the Pull Request is open, provide the link to it within the comments section of the respective **Phabricator task**, as illustrated in [this sample](https://phabricator.wikimedia.org/T344119).
43 |
44 | ## Code Reviews
45 | - All submissions should come in the form of a PR and it must be reviewed by at least one reviewer before it gets merged.
46 |
47 | ## Did you find a bug?
48 |
49 | * **Ensure the bug was not already reported** by searching on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R).
50 |
51 | * If the bug is not already reported, create a **New Bug Report**. Make sure to include **title, clear description, tags**, and as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring.
52 |
53 |
54 |
--------------------------------------------------------------------------------
/pages/stats.js:
--------------------------------------------------------------------------------
1 | import Header from "../components/Header";
2 | import ShowQueue from "../components/ShowQueue";
3 | import fetch from "isomorphic-fetch";
4 | import { stats_data_endpoint, library } from "../utils/constants";
5 | import { useState } from "react";
6 | import Link from "next/link";
7 |
8 | const emptyObject = {
9 | waiting: 0,
10 | active: 0,
11 | failed: 0,
12 | completed: 0,
13 | delayed: 0,
14 | };
15 | const Stats = (props) => {
16 | const [queueName, setQueueName] = useState("gb");
17 | const onChange = (event) => {
18 | setQueueName(event.target.value);
19 | };
20 | return (
21 |
22 |
36 |
37 |
38 |
39 |
40 |
Select a Queue
41 |
42 |
43 | Google Books
44 |
45 | Panjab Digital Library
46 | Trove Digital Library
47 |
48 |
56 |
92 |
93 |
94 |
95 |
96 | );
97 | };
98 |
99 | export async function getServerSideProps() {
100 | const resp = await fetch(stats_data_endpoint);
101 | if (resp.status !== 200) {
102 | return {};
103 | }
104 | const data = await resp.json();
105 | return { props: { data } };
106 | // Pass data to the page via props
107 | }
108 |
109 | export default Stats;
110 |
--------------------------------------------------------------------------------
/bull/commons-queue/consumer.js:
--------------------------------------------------------------------------------
1 | const config = require("../../utils/bullconfig");
2 | const CommonsQueue = config.getNewQueue("commons-queue");
3 | const winston = require("winston");
4 | const {
5 | downloadFile,
6 | uploadToCommons,
7 | uploadToWikiData,
8 | convertZipToPdf,
9 | } = require("../../utils/helper");
10 | const JSZip = require("jszip");
11 | const logger = winston.loggers.get("defaultLogger");
12 |
13 | CommonsQueue.process(async (job, done) => {
14 | try {
15 | if (job.data.type === "pdlZip") {
16 | const zipBuffer = Buffer.from(job.data.downloadFileURL, "base64");
17 | const zip = await JSZip.loadAsync(zipBuffer);
18 | const convertZipToPdfRes = await convertZipToPdf(
19 | zip,
20 | "commonsFilePayload.pdf"
21 | );
22 | if (convertZipToPdfRes.status !== 200) {
23 | logger.log({
24 | level: "error",
25 | message: `convertZipToPdfRes: ${JSON.stringify(convertZipToPdfRes)}`,
26 | });
27 | process.emit("commonsJobComplete", {
28 | status: false,
29 | value: null,
30 | });
31 | return done(null, true);
32 | }
33 | const commonsResponse = await uploadToCommons(job.data.metadata);
34 |
35 | if (commonsResponse.fileUploadStatus !== 200) {
36 | logger.log({
37 | level: "error",
38 | message: `uploadToCommons: ${commonsResponse}`,
39 | });
40 | process.emit(`commonsJobComplete:${job.id}`, {
41 | status: false,
42 | value: null,
43 | });
44 | return done(null, true);
45 | }
46 | process.emit(`commonsJobComplete:${job.id}`, {
47 | status: true,
48 | value: commonsResponse,
49 | });
50 | return done(null, true);
51 | } else {
52 | const url =
53 | job.data?.metadata?.uri ||
54 | job.data?.downloadFileURL?.uri ||
55 | job.data?.metadata?.pdfUrl;
56 | const downloadFileRes = await downloadFile(url, "commonsFilePayload.pdf");
57 |
58 | if (downloadFileRes.writeFileStatus !== 200) {
59 | logger.log({
60 | level: "error",
61 | message: `downloadFile: ${downloadFileRes}`,
62 | });
63 | process.emit(`commonsJobComplete:${job.id}`, {
64 | status: false,
65 | value: null,
66 | });
67 | return done(null, true);
68 | }
69 | const commonsResponse = await uploadToCommons(job.data.metadata);
70 |
71 | if (commonsResponse.fileUploadStatus !== 200) {
72 | logger.log({
73 | level: "error",
74 | message: `uploadToCommons: ${commonsResponse}`,
75 | });
76 | process.emit(`commonsJobComplete:${job.id}`, {
77 | status: false,
78 | value: null,
79 | });
80 | return done(new Error(commonsResponse));
81 | }
82 | const wikiDataResponse = await uploadToWikiData(
83 | job.data.metadata,
84 | commonsResponse.filename,
85 | job.data.libraryName
86 | );
87 | if (wikiDataResponse !== 404) {
88 | process.emit(`commonsJobComplete:${job.id}`, {
89 | status: true,
90 | value: {
91 | commons: commonsResponse,
92 | wikidata: wikiDataResponse,
93 | },
94 | });
95 | } else {
96 | process.emit(`commonsJobComplete:${job.id}`, {
97 | status: true,
98 | value: {
99 | commons: commonsResponse,
100 | wikidata: 404,
101 | },
102 | });
103 | }
104 | return done(null, true);
105 | }
106 | } catch (error) {
107 | logger.log({
108 | level: "error",
109 | message: err,
110 | });
111 | console.log(error, "::errorCommonsQueue");
112 | }
113 | });
114 |
--------------------------------------------------------------------------------
/pages/api/auth/[...nextauth].js:
--------------------------------------------------------------------------------
1 | import NextAuth from "next-auth";
2 | import WikimediaProvider from "next-auth/providers/wikimedia";
3 | import winston from "winston";
4 | const logger = winston.loggers.get("defaultLogger");
5 |
6 | async function refetchAccessToken(refreshToken) {
7 | try {
8 | const response = await fetch(
9 | process.env.NEXT_PUBLIC_WIKIMEDIA_URL + "/w/rest.php/oauth2/access_token",
10 | {
11 | method: "POST",
12 | headers: {
13 | "Content-Type": "application/x-www-form-urlencoded",
14 | },
15 | body: new URLSearchParams({
16 | grant_type: "refresh_token",
17 | refresh_token: refreshToken,
18 | client_id: process.env.WIKIMEDIA_CLIENT_ID,
19 | client_secret: process.env.WIKIMEDIA_CLIENT_SECRET,
20 | }),
21 | }
22 | );
23 | return await response.json();
24 | } catch (error) {
25 | logger.log({
26 | level: "error",
27 | message: `refetchAccessToken: ${error}`,
28 | });
29 | throw error;
30 | }
31 | }
32 |
33 | export const authOptions = {
34 | providers: [
35 | WikimediaProvider({
36 | clientId: process.env.WIKIMEDIA_CLIENT_ID,
37 | clientSecret: process.env.WIKIMEDIA_CLIENT_SECRET,
38 | token: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/access_token`,
39 | userinfo: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/resource/profile`,
40 | authorization: {
41 | url: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/authorize`,
42 | },
43 | }),
44 | ],
45 | session: {
46 | jwt: true,
47 | },
48 | debug: process.env.NODE_ENV !== "production",
49 | logger: {
50 | debug(code, metadata) {
51 | // store logs for every user logging in using OAuth
52 | logger.log({
53 | level: "info",
54 | message: metadata,
55 | });
56 | if (code === "OAUTH_CALLBACK_RESPONSE" && metadata.account.access_token) {
57 | logger.log({
58 | level: "info",
59 | message: `User ${metadata.profile.name} logged in using ${
60 | metadata.account.provider.charAt(0).toUpperCase() +
61 | metadata.account.provider.slice(1)
62 | } OAuth`,
63 | });
64 | }
65 | },
66 | error(code, metadata) {
67 | // store logs of aborted logins by users using OAuth
68 | if (code === "OAUTH_CALLBACK_HANDLER_ERROR") {
69 | logger.log({
70 | level: "error",
71 | message: `[${code}] ${metadata.error_description}`,
72 | });
73 | }
74 | },
75 | },
76 |
77 | callbacks: {
78 | async jwt({ token, account }) {
79 | const threeHoursThirtyMinutesInMilliseconds = 12600000;
80 | if (account) {
81 | token.accessToken = account.access_token;
82 | token.refreshToken = account.refresh_token;
83 | token.expiresIn = Date.now() + threeHoursThirtyMinutesInMilliseconds;
84 | }
85 | // Refresh the token if it's expired
86 | if (token.expiresIn && Date.now() > token.expiresIn) {
87 | try {
88 | const new_session = await refetchAccessToken(token.refreshToken);
89 | if (new_session.access_token) {
90 | token.accessToken = new_session.access_token;
91 | token.refreshToken = new_session.refresh_token;
92 | token.expiresIn =
93 | Date.now() + threeHoursThirtyMinutesInMilliseconds;
94 | } else {
95 | token.expired = true;
96 | }
97 | return token;
98 | } catch (error) {
99 | logger.log({
100 | level: "error",
101 | message: `jwt callback: ${error}`,
102 | });
103 | }
104 | }
105 | return token;
106 | },
107 | async session({ session, token, user }) {
108 | // Add the access token to the session object
109 | if (token.expired === true) {
110 | session.expired = true;
111 | } else {
112 | session.accessToken = token.accessToken;
113 | session.expiresIn = token.expiresIn;
114 | }
115 | return session;
116 | },
117 | },
118 | };
119 |
120 | export default (req, res) => NextAuth(req, res, authOptions);
121 |
--------------------------------------------------------------------------------
/components/Header.js:
--------------------------------------------------------------------------------
1 | import React, { useState } from "react";
2 | import Link from "next/link";
3 | import { signOut } from "next-auth/react";
4 | import { useSession } from "next-auth/react";
5 |
6 | function Header(props) {
7 | const [isDropDownOpen, setIsDropDownOpen] = useState(false);
8 | const { data: session } = useSession();
9 |
10 | const toggleDropDown = () => {
11 | setIsDropDownOpen(!isDropDownOpen);
12 | };
13 |
14 | return (
15 |
16 |
49 |
50 |
51 |
56 |
57 |
62 |
67 | Upload
68 |
69 |
70 |
71 |
72 |
77 |
82 | Queue
83 |
84 |
85 |
86 |
87 |
92 |
97 | Stats
98 |
99 |
100 |
101 |
102 |
107 |
112 | FAQs
113 |
114 |
115 |
116 |
117 | {session && (
118 |
119 |
toggleDropDown()}>
120 | {session && session.user.name}
121 |
125 |
126 | {isDropDownOpen && (
127 |
128 |
signOut()}
131 | >
132 |
Logout
133 |
134 |
135 |
136 | )}
137 |
138 | )}
139 |
140 |
141 |
142 | );
143 | }
144 | export default Header;
145 |
--------------------------------------------------------------------------------
/components/ShowJobInformation.js:
--------------------------------------------------------------------------------
1 | import { useRouter } from "next/router";
2 | import { useEffect, useState, useRef } from "react";
3 | import _ from "lodash";
4 | import {
5 | Card,
6 | CardActionArea,
7 | CardActions,
8 | CardContent,
9 | CardMedia,
10 | Button,
11 | Typography,
12 | } from "@mui/material";
13 | import Link from "next/link";
14 | import { CircularProgress } from "@mui/material";
15 | import { host } from "../utils/constants";
16 |
17 | const ShowJobInformation = (props) => {
18 | const styles = {
19 | root: {
20 | maxWidth: 365,
21 | height: "fit-content",
22 | },
23 | cardContentContainer: {
24 | height: "200px",
25 | overflow: "auto",
26 | },
27 | cardContainer: {
28 | display: "flex",
29 | justifyContent: "center",
30 | alignContent: "center",
31 | marginTop: "0px",
32 | },
33 | uploadProgress: {
34 | marginLeft: "16.5px",
35 | },
36 | button: {
37 | fontSize: "11px",
38 | },
39 | cardImage: {
40 | maxHeight: "400px",
41 | },
42 | };
43 |
44 | const router = useRouter();
45 |
46 | const [data, setData] = useState({
47 | title: "",
48 | description: "",
49 | previewLink: "https://bub2.wmcloud.org",
50 | imageLinks: {},
51 | uploadStatus: {
52 | isUploaded: false,
53 | uploadLink: "",
54 | },
55 | queueName: props.queue_name,
56 | wikimedia_links: "",
57 | });
58 |
59 | const [progress, setProgress] = useState(0);
60 |
61 | const [loading, setLoading] = useState(false);
62 |
63 | useEffect(() => {
64 | try {
65 | if (props.queue_name && props.job_id) {
66 | setLoading(true);
67 | fetch(
68 | `${host}/getJobInformation?queue_name=${props.queue_name}&job_id=${props.job_id}`
69 | )
70 | .then((resp) => resp.json())
71 | .then((resp) => {
72 | setData(resp);
73 | setProgress(resp.progress);
74 | })
75 | .catch((err) => console.error(err));
76 | }
77 | } catch (err) {
78 | console.log(err, "::err");
79 | } finally {
80 | setLoading(false);
81 | }
82 | }, [props.queue_name, props.job_id]);
83 |
84 | if (loading) {
85 | return ;
86 | } else {
87 | return (
88 |
89 |
90 |
91 |
98 | 0
101 | ? styles.cardContentContainer
102 | : null
103 | }
104 | >
105 |
106 | {data.title}
107 |
108 |
109 | {data.description}
110 |
111 |
112 |
117 | Upload Progress: {progress}
118 |
119 |
120 |
121 |
122 | {data.uploadStatus.isUploaded ? (
123 |
124 |
130 | View on Internet Archive
131 |
132 |
133 | ) : null}
134 |
135 |
141 | View on {data.queueName}
142 |
143 |
144 |
145 |
146 |
147 | {data.wikimedia_links.commons !== "Not Integrated" ? (
148 |
152 |
158 | View on Wikimedia Commons
159 |
160 |
161 | ) : null}
162 |
163 |
164 | {data.wikimedia_links.wikidata !== "Not Integrated" ? (
165 |
169 |
175 | View on Wikidata
176 |
177 |
178 | ) : null}
179 |
180 |
181 |
182 | );
183 | }
184 | };
185 |
186 | export default ShowJobInformation;
187 |
--------------------------------------------------------------------------------
/pages/queue.js:
--------------------------------------------------------------------------------
1 | import { useRouter } from "next/router";
2 | import Header from "../components/Header";
3 | import QueueSection from "../components/QueueSection";
4 | import QueueTable from "../components/QueueTable";
5 | import { host, queue_data_endpoint } from "../utils/constants";
6 | import { useEffect, useState } from "react";
7 |
8 | const Queue = ({ data }) => {
9 | const router = useRouter();
10 | const [queueName, setQueueName] = useState("gb");
11 | const [tableDataArchive, setTableDataArchive] = useState([]);
12 | const [searchResult, setSearchResult] = useState([]);
13 | const [isSearch, setIsSearch] = useState(false);
14 | // initially, the page itself is refreshed every 15 seconds (according to GB queue)
15 | const [refreshSSPropsInterval, setSSPropsInterval] = useState(15000);
16 | const onChange = (event) => {
17 | setQueueName(event.target.value);
18 | // refresh server side props on queue change
19 | router.replace(router.asPath);
20 | // This time interval has been chosed based on speed of upload
21 | // For GB Queue, refresh server side props every 15 seconds
22 | // For PDL, refresh server side props every 50 seconds
23 | // For Trove, refresh server side props every 30 seconds
24 | if (event.target.value === "gb") {
25 | setSSPropsInterval(15000);
26 | } else if (event.target.value === "pdl") {
27 | setSSPropsInterval(50000);
28 | } else if (event.target.value === "trove") {
29 | setSSPropsInterval(30000);
30 | }
31 | };
32 |
33 | /**
34 | * The `onSearch` function filters the table data based on a search parameter(Book-title, username or status) and updates the
35 | * searchResult state which then gets passed to the QueueTable Component. If the search parameter is empty, all the table data is set to the searchResult state and returned to the QueueTable Component without filtering.
36 | * The unfiltered tableData is stored in the tableDataArchive state and is used to reset the search if the search parameter is empty.
37 | */
38 | const onSearch = (e) => {
39 | const searchParam = e.target.value.toLowerCase();
40 |
41 | if (searchParam === "") {
42 | setIsSearch(false);
43 | setSearchResult(tableDataArchive);
44 | return;
45 | }
46 |
47 | setIsSearch(true);
48 | const filteredData = tableDataArchive.filter((item) => {
49 | return (
50 | item.title.toLowerCase().includes(searchParam) ||
51 | item.userName.toLowerCase().includes(searchParam) ||
52 | item.status.toLowerCase().includes(searchParam) ||
53 | item.id.toString().includes(searchParam)
54 | );
55 | });
56 | setSearchResult(filteredData);
57 | };
58 |
59 | const fetchQueueData = () => {
60 | if (queueName)
61 | fetch(`${host}/allJobs?queue_name=${queueName}`)
62 | .then((resp) => resp.json())
63 | .then((resp) => {
64 | setTableDataArchive(resp);
65 | setSearchResult(resp);
66 | });
67 | };
68 |
69 | // This useEffect runs on page load and
70 | // is responsible for fetching the initial
71 | // queue data
72 | useEffect(() => {
73 | if (queueName) {
74 | fetchQueueData();
75 | }
76 | }, [queueName]);
77 |
78 | // This useEffect runs every `refreshSSPropsInterval` milliseconds
79 | // to refresh the server side props which contain
80 | // details about books in the active and waiting queue
81 | // Condition: only when queueName changes
82 | useEffect(() => {
83 | const intervalId = setInterval(() => {
84 | router.replace(router.asPath);
85 | }, refreshSSPropsInterval);
86 | // clear the setInterval
87 | return () => clearInterval(intervalId);
88 | }, [queueName]);
89 |
90 | // This useEffect runs every 5000 milliseconds
91 | // to refresh the queue itself, thereby providing
92 | // user with near real time upload progress
93 | // Condition: only when queue is active
94 | useEffect(() => {
95 | if (
96 | data[`${queueName}-queue`]["active"] === null &&
97 | data["commons-queue"]["active"] === null
98 | ) {
99 | return;
100 | }
101 | const intervalId = setInterval(() => {
102 | if (data[`${queueName}-queue`]["active"] !== null) {
103 | fetchQueueData();
104 | }
105 | }, 5000);
106 | // clear the setInterval
107 | return () => clearInterval(intervalId);
108 | }, [data[`${queueName}-queue`]["active"]]);
109 |
110 | return (
111 |
116 |
117 |
118 |
119 |
Select a Queue
120 |
121 |
122 | Google Books
123 |
124 | Panjab Digital Library
125 | Trove Digital Library
126 |
127 |
131 |
132 |
133 |
139 |
140 |
144 | onSearch(e)}
146 | className="cdx-text-input__input"
147 | type="search"
148 | placeholder="Search by Job ID, Title, Username or Status"
149 | style={{
150 | height: "48px",
151 | width: "100%",
152 | }}
153 | />
154 |
155 |
156 |
157 |
158 |
159 |
164 |
165 |
166 |
167 | );
168 | };
169 |
170 | export async function getServerSideProps() {
171 | const resp = await fetch(queue_data_endpoint);
172 | const data = await resp.json();
173 | return { props: { data } };
174 | // Pass data to the page via props
175 | }
176 |
177 | export default Queue;
178 |
--------------------------------------------------------------------------------
/pages/_app.js:
--------------------------------------------------------------------------------
1 | import Head from "next/head";
2 | // import global styles
3 | import "./../styles/global.less";
4 | import Footer from "../components/Footer";
5 | import { SessionProvider, useSession, signOut } from "next-auth/react";
6 | import { useEffect } from "react";
7 |
8 | function SessionWrapper({ children }) {
9 | const { data: session } = useSession();
10 | useEffect(() => {
11 | console.log(session, "::session");
12 | if (session?.expired === true) {
13 | signOut({ redirect: false });
14 | }
15 | }, [session?.accessToken]);
16 | return <>{children}>;
17 | }
18 |
19 | function MyApp({ Component, pageProps }) {
20 | return (
21 |
22 |
176 |
177 |
181 | {/* include bootstrap stylesheet */}
182 |
188 |
Book Uploader Bot
189 | {/* Favicons */}
190 |
194 |
195 |
196 |
201 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 | );
214 | }
215 |
216 | export default MyApp;
217 |
--------------------------------------------------------------------------------
/hooks/useMetadataForUI.js:
--------------------------------------------------------------------------------
1 | import { host, permission } from "../utils/constants";
2 |
3 | export default function useMetadataForUI() {
4 | const getMetadataForUI = async (
5 | library,
6 | bookID,
7 | categoryID = null,
8 | IAIdentifier = ""
9 | ) => {
10 | try {
11 | switch (library) {
12 | case "gb":
13 | const gbRes = await fetch(
14 | `${host}/getMetadata?option=${"gb"}&bookID=${bookID}`
15 | );
16 | const gbMetadata = await gbRes.json();
17 | let {
18 | title: gb_title,
19 | subtitle: gb_subtitle,
20 | authors: gb_authors,
21 | publisher: gb_publisher,
22 | publishedDate: gb_publishedDate,
23 | language: gb_language,
24 | pageCount: gb_pageCount,
25 | infoLink: gb_infoLink,
26 | } = gbMetadata.volumeInfo;
27 |
28 | const gb_authorsFormatted = gb_authors
29 | ? gb_authors.join().trim()
30 | : "";
31 | const gb_commonsMetadata = `== {{int:filedesc}} ==
32 |
33 | {{Book
34 | | Author = ${gb_authorsFormatted}\n
35 | | Translator =\n
36 | | Editor =\n
37 | | Illustrator =\n
38 | | Title = ${gb_title || ""}\n
39 | | Series title =\n
40 | | Volume =\n
41 | | Edition =\n
42 | | Publisher = ${gb_publisher || ""}\n
43 | | Printer =\n
44 | | Publication date = ${gb_publishedDate || ""}\n
45 | | City =\n
46 | | Language = ${gb_language || ""}\n
47 | | Description = ${gb_subtitle || ""}\n
48 | | Source = ${gb_infoLink || ""}\n
49 | | Permission = ${permission}\n
50 | | Image =\n
51 | | Image page =\n
52 | | Pageoverview =\n
53 | | Wikisource =\n
54 | | Homecat =\n
55 | | Other_versions =\n
56 | | ISBN =\n
57 | | LCCN =\n
58 | | OCLC =\n
59 | | References =\n
60 | | Linkback =\n
61 | | Wikidata =\n
62 | | noimage =\n
63 | | Other_fields_1 = {{Information field|name=Rights|value=${
64 | gbMetadata.accessInfo.accessViewStatus || ""
65 | }|name=Pages|value=${gb_pageCount || ""}}}
66 | }}
67 |
68 | == {{int:license-header}} ==
69 |
70 | {{PD-scan}}
71 |
72 | [[Category:Files uploaded with BUB2]]
73 | `;
74 | return gb_commonsMetadata.replace(/&/g, "_");
75 | case "trove":
76 | const troveRes = await fetch(
77 | `${host}/getMetadata?option=${"trove"}&bookID=${bookID}`
78 | );
79 | const troveJson = await troveRes.json();
80 | const troveMetadata = troveJson.article;
81 | let {
82 | title: trove_title,
83 | date: trove_date,
84 | troveUrl: trove_url,
85 | page: trove_page,
86 | identifier: trove_identifier,
87 | heading: trove_heading,
88 | category: trove_category,
89 | } = troveMetadata;
90 |
91 | const trove_commonsMetadata = `== {{int:filedesc}} ==
92 |
93 | {{Book
94 | | Author =\n
95 | | Translator =\n
96 | | Editor =\n
97 | | Illustrator =\n
98 | | Title = ${trove_heading || ""}\n
99 | | Series title =\n
100 | | Volume =\n
101 | | Edition =\n
102 | | Publisher =\n
103 | | Printer =\n
104 | | Publication date = ${trove_date || ""}\n
105 | | City =\n
106 | | Language =\n
107 | | Description = ${trove_title.value || ""}\n
108 | | Source = ${trove_url || ""}\n
109 | | Permission = ${permission}\n
110 | | Image =\n
111 | | Image page =\n
112 | | Pageoverview =\n
113 | | Wikisource =\n
114 | | Homecat =\n
115 | | Other_versions =\n
116 | | ISBN =\n
117 | | LCCN =\n
118 | | OCLC =\n
119 | | References =\n
120 | | Linkback =\n
121 | | Wikidata =\n
122 | | noimage =\n
123 | | Other_fields_1 = {{Information field|name=Identifier|value=${
124 | trove_identifier || ""
125 | }|name=Pages|value=${trove_page || ""}|name=Category|value=${
126 | trove_category || ""
127 | }}}
128 | }}
129 |
130 | == {{int:license-header}} ==
131 |
132 | {{PD-scan}}
133 |
134 | [[Category:Files uploaded with BUB2]]
135 | `;
136 | return trove_commonsMetadata;
137 | case "pdl":
138 | const pdlRes = await fetch(
139 | `${host}/getMetadata?option=${"pdl"}&bookID=${bookID}&categoryID=${categoryID}&IAIdentifier=${IAIdentifier}`
140 | );
141 | const pdlMetadata = await pdlRes.json();
142 | let {
143 | Script: pdl_script,
144 | Langauge: pdl_language,
145 | Publisher: pdl_publisher,
146 | Pages: pdl_pages,
147 | description: pdl_description,
148 | title: pdl_title,
149 | coverImage: pdl_coverImage,
150 | pdfUrl: pdl_pdfUrl,
151 | IAIdentifier: pdl_identifier,
152 | } = pdlMetadata;
153 | const pdl_commonsMetadata = `== {{int:filedesc}} ==
154 |
155 | {{Book
156 | | Author =\n
157 | | Translator =\n
158 | | Editor =\n
159 | | Illustrator =\n
160 | | Title = ${pdl_title || ""}\n
161 | | Series title =\n
162 | | Volume =\n
163 | | Edition =\n
164 | | Publisher = ${pdl_publisher || ""}\n
165 | | Printer =\n
166 | | Publication date =\n
167 | | City =\n
168 | | Language = ${pdl_language || ""}\n
169 | | Description = ${pdl_description || ""}\n
170 | | Source = ${pdl_pdfUrl || ""}\n
171 | | Permission = ${permission}\n
172 | | Image = ${pdl_coverImage || ""}\n
173 | | Image page =\n
174 | | Pageoverview =\n
175 | | Wikisource =\n
176 | | Homecat =\n
177 | | Other_versions =\n
178 | | ISBN =\n
179 | | LCCN =\n
180 | | OCLC =\n
181 | | References =\n
182 | | Linkback =\n
183 | | Wikidata =\n
184 | | noimage =\n
185 | | Other_fields_1 = {{Information field|name=Identifier|value=${
186 | pdl_identifier || ""
187 | }|name=Pages|value=${pdl_pages || ""}|name=Script|value=${
188 | pdl_script || ""
189 | }}}
190 | }}
191 |
192 | == {{int:license-header}} ==
193 |
194 | {{PD-scan}}
195 |
196 | [[Category:Files uploaded with BUB2]]
197 | `;
198 | return pdl_commonsMetadata;
199 | }
200 | } catch (error) {
201 | return error;
202 | }
203 | };
204 |
205 | return { getMetadataForUI };
206 | }
207 |
--------------------------------------------------------------------------------
/bull/google-books-queue/consumer.js:
--------------------------------------------------------------------------------
1 | const request = require("request");
2 | const EmailProducer = require("../email-queue/producer");
3 | const CommonsProducer = require("../commons-queue/producer");
4 | const config = require("../../utils/bullconfig");
5 | const GoogleBooksQueue = config.getNewQueue("google-books-queue");
6 | const winston = require("winston");
7 | const logger = winston.loggers.get("defaultLogger");
8 | const {
9 | logUserData,
10 | uploadToCommons,
11 | downloadFile,
12 | } = require("./../../utils/helper");
13 |
14 | let responseSize,
15 | dataSize = 0;
16 |
17 | GoogleBooksQueue.on("active", (job, jobPromise) => {
18 | logger.log({
19 | level: "info",
20 | message: `Consumer(next): Job ${job.id} is active!`,
21 | });
22 | });
23 |
24 | GoogleBooksQueue.on("completed", (job, result) => {
25 | logger.log({
26 | level: "info",
27 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`,
28 | });
29 | });
30 |
31 | GoogleBooksQueue.process((job, done) => {
32 | const requestURI = request(job.data.uri);
33 | const { id, volumeInfo, accessInfo } = job.data.details;
34 | const jobLogs = volumeInfo;
35 | let {
36 | authors,
37 | publisher,
38 | publishedDate,
39 | imageLinks,
40 | previewLink,
41 | title,
42 | language,
43 | pageCount,
44 | infoLink,
45 | } = volumeInfo;
46 | const { accessViewStatus } = accessInfo;
47 | const bucketTitle = job.data.IAIdentifier;
48 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`;
49 | const trueURI = `http://archive.org/details/${bucketTitle}`;
50 | jobLogs["trueURI"] = trueURI;
51 | jobLogs["userName"] = job.data.userName;
52 | job.log(JSON.stringify(jobLogs));
53 | logUserData(jobLogs["userName"], "Google Books");
54 | requestURI.pipe(
55 | request(
56 | {
57 | method: "PUT",
58 | preambleCRLF: true,
59 | postambleCRLF: true,
60 | uri: IAuri,
61 | headers: {
62 | Authorization: `LOW ${process.env.access_key}:${process.env.secret_key}`,
63 | "Content-type": "application/pdf; charset=utf-8",
64 | "Accept-Charset": "utf-8",
65 | "X-Amz-Auto-Make-Bucket": "1",
66 | "X-Archive-Meta-Collection": "opensource",
67 | "X-Archive-Ignore-Preexisting-Bucket": 1,
68 | "X-archive-meta-title": `uri(${encodeURI(title.trim())})`,
69 | "X-archive-meta-date": publishedDate ? publishedDate.trim() : "",
70 | "X-archive-meta-language": language.trim(),
71 | "X-archive-meta-mediatype": "texts",
72 | "X-archive-meta-licenseurl":
73 | "https://creativecommons.org/publicdomain/mark/1.0/",
74 | "X-archive-meta-publisher": publisher.trim(),
75 | "X-archive-meta-Author": authors
76 | ? `uri(${encodeURI(authors.join().trim())})`
77 | : "",
78 | "X-archive-meta-rights": accessViewStatus.trim(),
79 | "X-archive-meta-Google-id": id,
80 | "X-archive-meta-Source": infoLink.trim(),
81 | },
82 | },
83 | async (error, response, body) => {
84 | if (error || response.statusCode != 200) {
85 | const errorMessage = !body ? error : body;
86 | logger.log({
87 | level: "error",
88 | message: `IA Failure GB ${errorMessage}`,
89 | });
90 | if (job.data.isEmailNotification === "true") {
91 | EmailProducer(job.data.userName, title, trueURI, {
92 | archive: false,
93 | commons: false,
94 | });
95 | }
96 | done(new Error(errorMessage));
97 | } else {
98 | job.progress({
99 | step: "Upload To IA",
100 | value: `(${100}%)`,
101 | });
102 | if (
103 | job.data.isUploadCommons !== "true" &&
104 | job.data.isEmailNotification !== "true"
105 | ) {
106 | done(null, true);
107 | }
108 | if (
109 | job.data.isEmailNotification === "true" &&
110 | job.data.isUploadCommons !== "true"
111 | ) {
112 | EmailProducer(job.data.userName, title, trueURI, {
113 | archive: true,
114 | commons: false,
115 | });
116 | done(null, true);
117 | }
118 | if (job.data.isUploadCommons === "true") {
119 | job.progress({
120 | step: "Uploading to Wikimedia Commons",
121 | value: `(${50}%)`,
122 | });
123 | CommonsProducer(
124 | null,
125 | null,
126 | job.data,
127 | "gb",
128 | async (commonsResponse) => {
129 | if (commonsResponse.status === true) {
130 | job.progress({
131 | step: "Upload to Wikimedia Commons",
132 | value: `(${100}%)`,
133 | wikiLinks: {
134 | commons:
135 | (await commonsResponse.value.filename) ||
136 | commonsResponse.filename ||
137 | commonsResponse.value.commons.filename,
138 | wikidata:
139 | (await commonsResponse.value.wikidata) !== 404
140 | ? await commonsResponse.value.wikidata
141 | : 404,
142 | },
143 | });
144 | if (job.data.isEmailNotification === "true") {
145 | const commonsLink =
146 | process.env.NEXT_PUBLIC_COMMONS_URL +
147 | `/wiki/File:${
148 | commonsResponse.value.filename ||
149 | commonsResponse.filename ||
150 | commonsResponse.value.commons.filename
151 | }`;
152 | EmailProducer(
153 | job.data.userName,
154 | title,
155 | { archiveLink: trueURI, commonsLink: commonsLink },
156 | { archive: true, commons: true }
157 | );
158 | }
159 | return done(null, true);
160 | } else {
161 | job.progress({
162 | step: "Upload To IA (100%), Upload To Commons",
163 | value: `(Failed)`,
164 | });
165 | if (job.data.isEmailNotification === "true") {
166 | EmailProducer(job.data.userName, title, trueURI, {
167 | archive: true,
168 | commons: false,
169 | });
170 | }
171 | return done(null, true);
172 | }
173 | }
174 | );
175 | }
176 | }
177 | }
178 | )
179 | );
180 |
181 | requestURI.on("response", function (data) {
182 | responseSize = Number(data.headers["content-length"]);
183 | dataSize = 0;
184 | });
185 |
186 | requestURI.on("data", function (chunk) {
187 | dataSize += Number(chunk.length);
188 | job.progress({
189 | step: "Uploading to Internet Archive",
190 | value: `(${Math.round((dataSize / responseSize) * 100)}%)`,
191 | });
192 | });
193 | });
194 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # BUB2   
4 |
5 |  
6 |     
7 | A book uploader bot that transfers documents from public libraries such as Google Books, Punjab Digital Library and Trove Digital Library to [Internet Archive](https://archive.org). Built for Wikimedia Tool Labs. Check out [BUB2 on Wikimedia Cloud](https://bub2.wmcloud.org)!
8 |
9 | # Table of Contents
10 |
11 | - [Getting Started](#getting-started)
12 | - [Prerequisites](#prerequisites)
13 | - [Local setup](#local-setup)
14 | - [Clone the repo](#clone-the-repo)
15 | - [Set environment variables](#set-environment-variables)
16 | - [Run Redis server](#run-redis-server)
17 | - [Start the server](#start-the-server)
18 | - [Example](#example)
19 | - [Contributing](#contributing)
20 | - [Request to Contribute](#request-to-contribute)
21 | - [Keep Supporting](#keep-supporting)
22 | - [License](#license)
23 |
24 |
25 | ## Getting Started
26 |
27 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system.
28 |
29 |
30 | ### Prerequisites
31 |
32 | - [Node.JS](https://nodejs.org/en/download/) ( check for installation with `npm -v` and `node -v` on terminal )
33 | - [Redis](https://redis.io/)
34 |
35 |
36 | ### Local setup
37 |
38 |
39 | #### Clone the repo
40 |
41 | - Clone the repository `git clone https://github.com/coderwassananmol/BUB2`
42 | - Navigate to the project directory on the terminal: `cd BUB2`.
43 | - For developers, checkout to develop branch: `git checkout develop`
44 | - Run `npm install`
45 |
46 |
47 | #### Set environment variables
48 |
49 | Rename `.env.example` to `.env`. Then, to fill the credentials,
50 |
51 | - Make an account on [archive.org](https://archive.org).
52 | - Go to https://archive.org/account/s3.php . Generate the **access** and **secret** keys and fill
53 | them in the `.env` file in the corresponding fields.
54 | - Go to [Google Developers console](https://console.developers.google.com/getting-started). Make a new project to run the app. In that Google Developers project, search for 'Books API' in the Google API console, then **enable** the API for the project, then generate the **API keys**, and then copy and paste the API key in the `GB_Key` fields.
55 | - Enter the `redishost` field. If it's hosted locally, enter **127.0.0.1**, which is the default localhost.
56 | - Enter the `redisport` field with **6379**, which is the default port number for redis.
57 |
58 | - You need Beta wiki [OAuth 2.0](https://oauth.net/2/) credentials to login with MediaWiki during development. Steps to obtain the OAuth credentials for the `WIKIMEDIA_CLIENT_ID` and `WIKIMEDIA_CLIENT_SECRET` fields :
59 | - Go to [Beta-wiki](https://meta.wikimedia.beta.wmflabs.org/wiki/Special:OAuthConsumerRegistration) and click **Request a token for a new OAuth 2.0 client**.
60 | - Enter the details (Application name, details, callback url, and applicable grants). Refer [this](https://meta.wikimedia.beta.wmflabs.org/wiki/Special:OAuthListConsumers/view/e70de440468d7140914e4a57e3660cf2) as a sample.
61 | - After submitting, note the client application key and client application secret and wait for the proposed consumer to get **approved**.
62 | - Go to [this file](pages/api/auth/[...nextauth].js).
63 | - Add the following code after the clientId and clientSecret :
64 | ```
65 | token: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/access_token",
66 | userinfo: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/resource/profile",
67 | authorization: {
68 | url: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/authorize",
69 | params: { scope: "" },
70 | }
71 | ```
72 | - Enter the client application key and client application secret in the `WIKIMEDIA_CLIENT_ID` and `WIKIMEDIA_CLIENT_SECRET` respectively
73 | - Enter the `NEXTAUTH_URL` with http://localhost:5000.
74 | - Go to [wikisource Bot] (https://meta.wikimedia.beta.wmflabs.org/wiki/Special:BotPasswords) to generate your `EMAIL_BOT_USERNAME` and `EMAIL_BOT_PASSWORD`. When creating your bot, tick 'send email to users' under the Applicable Grants section. Go ahead and enter the generated credentials in the `.env` file.
75 | - Enter the `NEXT_PUBLIC_WIKIMEDIA_URL` which is used to authenticate with Wikimedia environment for login, send emails etc. For example - https://meta.wikimedia.beta.wmflabs.org
76 | - Enter the `NEXT_PUBLIC_COMMONS_URL` which is used to upload the files to Commons. For example - https://meta.commons.beta.wmflabs.org for local setup.
77 | - Enter the `NEXT_PUBLIC_IA_USER` which is the username of Internet Archive account.
78 | - Enter the `IA_EMAIL` which is the email of the Internet Archive account.
79 | - Go to [Trove API](https://trove.nla.gov.au/about/create-something/using-api) and follow the instructions on how to get a trove key. Fill in `trove_key` with the trove key you generated.
80 |
81 | ### Run Redis server
82 |
83 | - Refer to [Redis](https://redis.io/download) for download and setup documentation
84 |
85 |
86 | #### Start the server
87 |
88 | - Run `npm run dev` for development and `npm run start` for production.
89 | - Open your browser and navigate to http://localhost:5000
90 |
91 |
92 | ## Contributing
93 |
94 | Please read [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute to BUB2.
95 |
96 |
97 | ## Request to Contribute
98 | 1. Fork the repository.
99 | 2. Clone the repository to your local system by using the command : `git clone "https://github.com//BUB2"`.
100 | 3. The issues are maintained on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R).
101 | 4. Assign an issue to yourself from Phabricator.
102 | 5. Create a new branch and start working on the issue locally.
103 | 6. Create a PULL REQUEST to merge your branch with the main branch and mention the Phab task in the description.
104 | 7. The issue will be considered closed and resolved once the PR is accepted.
105 | Please read [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute to BUB2.
106 |
107 |
108 | ## Contribute
109 | We actively welcome pull requests. Learn how to [contribute.](CONTRIBUTING.md)
110 | ## Keep Supporting
111 |
112 | There was no Node.js wrapper available for Internet Archive, so I decided to write the Node implementation to upload books to Internet Archive. If you like this repository, show your support by starring the project. Cheers!
113 |
114 |
115 | ## License
116 | [](https://github.com/coderwassananmol/BUB2/blob/develop/LICENSE.md)
117 |
118 | Please read [license](https://github.com/coderwassananmol/BUB2/blob/develop/LICENSE.md) for more information.
119 |
--------------------------------------------------------------------------------
/components/QueueTable.js:
--------------------------------------------------------------------------------
1 | import { useRouter } from "next/router";
2 | import { useEffect, useState } from "react";
3 | import {
4 | Paper,
5 | Table,
6 | TableBody,
7 | TableCell,
8 | TableContainer,
9 | TableHead,
10 | TablePagination,
11 | TableRow,
12 | Backdrop,
13 | } from "@mui/material";
14 | import { host } from "../utils/constants";
15 | import ShowJobInformation from "../components/ShowJobInformation";
16 |
17 | const ShowUploadQueue = (props) => {
18 | const styles = {
19 | backdrop: {
20 | zIndex: 5,
21 | color: "#fff",
22 | },
23 | head: {
24 | backgroundColor: "#f8f9fa",
25 | color: "#202122",
26 | fontSize: "14px",
27 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif",
28 | },
29 | body: {
30 | fontSize: "14px",
31 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif",
32 | color: "#54595d",
33 | },
34 | root: {
35 | marginTop: "20px",
36 | width: "100%",
37 | zIndex: 0,
38 | },
39 | row: {
40 | "&:nth-of-type(odd)": {
41 | backgroundColor: "#fff",
42 | },
43 | "&:nth-of-type(even)": {
44 | backgroundColor: "#f8f9fa",
45 | },
46 | },
47 | container: {
48 | maxHeight: 330,
49 | },
50 | toolbar: {
51 | marginTop: "8px",
52 | fontSize: "12px",
53 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif",
54 | color: "#54595d",
55 | },
56 | selectIcon: {
57 | fontSize: "12px",
58 | top: "calc(50% - 8px)",
59 | },
60 | };
61 |
62 | const router = useRouter();
63 | // const classes = useStyles();
64 | const [jobId, setJobId] = useState("");
65 |
66 | const onClick = (id) => {
67 | setJobId(id);
68 | setOpen(true);
69 | };
70 |
71 | const columns = [
72 | {
73 | id: "id",
74 | label: "Job ID",
75 | minWidth: 50,
76 | align: "left",
77 | format: (value) => (
78 | onClick(value)}>
79 | {value}
80 |
81 | ),
82 | },
83 | {
84 | id: "title",
85 | label: "Title",
86 | minWidth: 300,
87 | align: "left",
88 | format: (value, label) => (
89 | onClick(value)}>
90 | {label}
91 |
92 | ),
93 | },
94 | {
95 | id: "userName",
96 | label: "Wiki Username",
97 | minWidth: 150,
98 | align: "left",
99 | format: (value) =>
100 | value !== "-" ? (
101 |
105 | {value}
106 |
107 | ) : (
108 | value
109 | ),
110 | },
111 | {
112 | id: "upload_progress",
113 | label: "Upload Progress",
114 | minWidth: 50,
115 | align: "left",
116 | format: (value) => value,
117 | },
118 | {
119 | id: "status",
120 | label: "Status",
121 | minWidth: 30,
122 | align: "left",
123 | format: (value) => {
124 | const isPDLMissingPage = /]*>([^<]+)<\/a>/;
125 | const missingPageLink = isPDLMissingPage.exec(value);
126 | return missingPageLink ? (
127 |
128 | Failed! (Reason: Upload to Internet Archive failed because {""}
129 |
130 | {missingPageLink[1]}
131 | {" "}
132 | is not reachable. Please try again or contact Panjab Digital Library
133 | for more details. )
134 |
135 | ) : (
136 | value
137 | );
138 | },
139 | },
140 | {
141 | id: "timestamp",
142 | label: "Timestamp",
143 | minWidth: 150,
144 | align: "left",
145 | format: (value) => value,
146 | },
147 | ];
148 |
149 | const [open, setOpen] = useState(false);
150 | const [page, setPage] = useState(0);
151 | const [rowsPerPage, setRowsPerPage] = useState(10);
152 | const rows = props.tableData ? props.tableData : [];
153 |
154 | const handleClose = (e) => {
155 | setOpen(false);
156 | };
157 |
158 | const handleChangeRowsPerPage = (event) => {
159 | setRowsPerPage(+event.target.value);
160 | setPage(0);
161 | };
162 |
163 | const handleChangePage = (event, newPage) => {
164 | setPage(newPage);
165 | };
166 |
167 | const conditionalRender = (column, value, row) => {
168 | if (column.id === "id" || column.id === "upload_progress") {
169 | return column.format(value);
170 | } else if (column.id === "title") {
171 | return column.format(row["id"], value);
172 | } else if (column.id === "userName") {
173 | return column.format((value === "-" ? "" : "User:") + value);
174 | } else if (column.id === "date") {
175 | return column.format(value);
176 | } else if (column.id === "status") {
177 | return column.format(value);
178 | } else if (column.id === "wikimedia_links") {
179 | return column.format(value);
180 | } else {
181 | return value;
182 | }
183 | };
184 |
185 | useEffect(() => {
186 | setPage(0);
187 | }, [props.isSearch]);
188 |
189 | return (
190 |
191 |
192 |
193 |
194 |
195 |
196 | {columns.map((column) => (
197 |
202 | {column.label}
203 |
204 | ))}
205 |
206 |
207 |
208 | {rows
209 | .slice(page * rowsPerPage, page * rowsPerPage + rowsPerPage)
210 | .map((row) => {
211 | return (
212 |
213 | {columns.map((column) => {
214 | const value = row[column.id];
215 | return (
216 |
221 | {conditionalRender(column, value, row)}
222 |
223 | );
224 | })}
225 |
226 | );
227 | })}
228 |
229 |
230 |
231 | Rows per page }
240 | labelDisplayedRows={({ from, to, count }) => (
241 |
242 | {`${from}–${to} of ${count !== -1 ? count : `more than ${to}`}`}
243 |
244 | )}
245 | sx={{ display: "flex", justifyContent: "end" }}
246 | />
247 |
248 |
249 | {open ? (
250 |
251 | ) : null}
252 |
253 |
254 | );
255 | };
256 |
257 | export default ShowUploadQueue;
258 |
--------------------------------------------------------------------------------
/bull/trove-queue/consumer.js:
--------------------------------------------------------------------------------
1 | const EmailProducer = require("../email-queue/producer");
2 | const CommonsProducer = require("../commons-queue/producer");
3 | const config = require("../../utils/bullconfig");
4 | const TroveQueue = config.getNewQueue("trove-queue");
5 | const rp = require("request-promise");
6 | const request = require("request");
7 | const _ = require("lodash");
8 | const winston = require("winston");
9 | const logger = winston.loggers.get("defaultLogger");
10 | const {
11 | logUserData,
12 | downloadFile,
13 | uploadToCommons,
14 | } = require("./../../utils/helper");
15 |
16 | let responseSize,
17 | dataSize = 0;
18 |
19 | TroveQueue.on("active", (job, jobPromise) => {
20 | logger.log({
21 | level: "info",
22 | message: `Consumer(next): Job ${job.id} is active!`,
23 | });
24 | });
25 |
26 | TroveQueue.on("completed", (job, result) => {
27 | logger.log({
28 | level: "info",
29 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`,
30 | });
31 | });
32 |
33 | TroveQueue.process((job, done) => {
34 | const currentTimestamp = Date.now();
35 | request(
36 | `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}/prep?_=${currentTimestamp}`,
37 | {},
38 | async (error, response, body) => {
39 | if (error || response.statusCode != 200) {
40 | logger.log({
41 | level: "error",
42 | message: `trove API ${body}`,
43 | });
44 | } else {
45 | const requestURI = request(
46 | `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}.pdf?followup=${body}`
47 | );
48 | const downloadFileUrl = `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}.pdf?followup=${body}`;
49 | const jobLogs = job.data.details;
50 | let {
51 | name,
52 | date,
53 | id,
54 | troveUrl,
55 | IAIdentifier,
56 | userName,
57 | isEmailNotification,
58 | } = job.data.details;
59 | const bucketTitle = IAIdentifier;
60 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`;
61 | const trueURI = `http://archive.org/details/${bucketTitle}`;
62 | jobLogs["trueURI"] = trueURI;
63 | jobLogs["userName"] = job.data.details.userName;
64 | job.log(JSON.stringify(jobLogs));
65 | logUserData(jobLogs["userName"], "Trove");
66 | requestURI.pipe(
67 | request(
68 | {
69 | method: "PUT",
70 | preambleCRLF: true,
71 | postambleCRLF: true,
72 | uri: IAuri,
73 | headers: {
74 | Authorization: `LOW ${process.env.access_key}:${process.env.secret_key}`,
75 | "Content-type": "application/pdf; charset=utf-8",
76 | "Content-Length": responseSize,
77 | "Accept-Charset": "utf-8",
78 | "X-Amz-Auto-Make-Bucket": "1",
79 | "X-Archive-Meta-Collection": "opensource",
80 | "X-Archive-Ignore-Preexisting-Bucket": 1,
81 | "X-archive-meta-title": name.trim(),
82 | "X-archive-meta-date": date.trim(),
83 | "X-archive-meta-mediatype": "texts",
84 | "X-archive-meta-licenseurl":
85 | "https://creativecommons.org/publicdomain/mark/1.0/",
86 | "X-archive-meta-Trove-issueid": id,
87 | "X-archive-meta-Identifier": `bub_trove_${id}`,
88 | "X-archive-meta-TroveURL": troveUrl,
89 | },
90 | },
91 | async (error, response, body) => {
92 | if (error || response.statusCode != 200) {
93 | const errorMessage = !body ? error : body;
94 | logger.log({
95 | level: "error",
96 | message: `IA Failure Trove ${errorMessage}`,
97 | });
98 | if (isEmailNotification === "true") {
99 | EmailProducer(userName, name, trueURI, {
100 | archive: false,
101 | commons: false,
102 | });
103 | }
104 | done(new Error(errorMessage));
105 | } else {
106 | job.progress({
107 | step: "Upload To IA",
108 | value: `(${100}%)`,
109 | });
110 | if (
111 | isEmailNotification !== "true" &&
112 | job.data.details.isUploadCommons !== "true"
113 | ) {
114 | done(null, true);
115 | }
116 | if (
117 | isEmailNotification === "true" &&
118 | job.data.details.isUploadCommons !== "true"
119 | ) {
120 | EmailProducer(userName, name, trueURI, {
121 | archive: true,
122 | commons: false,
123 | });
124 | }
125 | if (job.data.details.isUploadCommons === "true") {
126 | job.progress({
127 | step: "Uploading to Wikimedia Commons",
128 | value: `(50%)`,
129 | });
130 | CommonsProducer(
131 | null,
132 | downloadFileUrl,
133 | job.data.details,
134 | "trove",
135 | async (commonsResponse) => {
136 | if (commonsResponse.status === true) {
137 | job.progress({
138 | step: "Upload to Wikimedia Commons",
139 | value: `(${100}%)`,
140 | wikiLinks: {
141 | commons: await commonsResponse.value.filename,
142 | wikidata:
143 | (await commonsResponse.value.wikidata) !== 404
144 | ? await commonsResponse.value.wikidata
145 | : 404,
146 | },
147 | });
148 | if (job.data.isEmailNotification === "true") {
149 | const commonsLink =
150 | process.env.NEXT_PUBLIC_COMMONS_URL +
151 | `/wiki/File:${commonsResponse.value.filename}`;
152 | EmailProducer(
153 | userName,
154 | name,
155 | {
156 | archiveLink: trueURI,
157 | commonsLink: commonsLink,
158 | },
159 | { archive: true, commons: true }
160 | );
161 | }
162 | } else {
163 | job.progress({
164 | step: "Upload To IA (100%), Upload To Commons",
165 | value: `(Failed)`,
166 | });
167 | EmailProducer(userName, name, trueURI, {
168 | archive: true,
169 | commons: false,
170 | });
171 | }
172 | }
173 | );
174 | }
175 | done(null, true);
176 | }
177 | }
178 | )
179 | );
180 | requestURI.on("response", function (data) {
181 | responseSize = Number(data.headers["content-length"]);
182 | dataSize = 0;
183 | });
184 |
185 | requestURI.on("data", function (chunk) {
186 | dataSize += Number(chunk.length);
187 | const progress = Math.round((dataSize / responseSize) * 100);
188 | if (progress !== null)
189 | job.progress({
190 | step: "Uploading to Internet Archive",
191 | value: `(${progress || 0}%)`,
192 | });
193 | });
194 | }
195 | }
196 | );
197 | });
198 |
--------------------------------------------------------------------------------
/bull/pdl-queue/consumer.js:
--------------------------------------------------------------------------------
1 | const EmailProducer = require("../email-queue/producer");
2 | const CommonsProducer = require("../commons-queue/producer");
3 | const config = require("../../utils/bullconfig");
4 | const PDLQueue = config.getNewQueue("pdl-queue");
5 | const rp = require("request-promise");
6 | const request = require("request");
7 | const _ = require("lodash");
8 | const winston = require("winston");
9 | const logger = winston.loggers.get("defaultLogger");
10 | const { logUserData } = require("./../../utils/helper");
11 | const { customFetch } = require("../../utils/helper");
12 | const stream = require("stream");
13 |
14 | var JSZip = require("jszip");
15 | PDLQueue.on("active", (job, jobPromise) => {
16 | logger.log({
17 | level: "info",
18 | message: `Consumer(next): Job ${job.id} is active!`,
19 | });
20 | });
21 |
22 | PDLQueue.on("completed", (job, result) => {
23 | logger.log({
24 | level: "info",
25 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`,
26 | });
27 | });
28 |
29 | async function getZipAndBytelength(no_of_pages, id, title, job) {
30 | var zip = new JSZip();
31 | title = title.replace(/ /g, "_");
32 | var img = zip.folder(`${title}_images`);
33 | let temp_pages = no_of_pages;
34 | let downloadImageStatus;
35 | let errorFlag = { status: false, page: "" };
36 | var download_image = async function (uri, filename) {
37 | try {
38 | const body = await rp({
39 | method: "GET",
40 | uri,
41 | encoding: null,
42 | transform: function (body, response) {
43 | return { headers: response.headers, data: body };
44 | },
45 | });
46 | if (/image/.test(body.headers["content-type"])) {
47 | var data = Buffer.from(body.data);
48 | img.file(filename, data.toString("base64"), { base64: true });
49 | }
50 | return 200;
51 | } catch (err) {
52 | --no_of_pages;
53 | return err.statusCode;
54 | }
55 | };
56 | for (let i = 1; i <= temp_pages; ++i) {
57 | const str = `http://www.panjabdigilib.org/images?ID=${id}&page=${i}&pagetype=null&Searched=W3GX`;
58 | downloadImageStatus = await download_image(str, `${title}_${i}.jpeg`);
59 | job.progress({
60 | step: "Uploading to Internet Archive",
61 | value: `(${Math.round((i / temp_pages) * 82)}%)`,
62 | });
63 | if (downloadImageStatus >= 200 && downloadImageStatus < 300) {
64 | continue;
65 | } else {
66 | errorFlag = { status: true, page: str };
67 | break;
68 | }
69 | }
70 | let { byteLength } = await zip.generateAsync({ type: "nodebuffer" });
71 | byteLength = Number(byteLength + no_of_pages * 16); //No. of pages * 16
72 | return [zip, byteLength, errorFlag];
73 | }
74 |
75 | function setHeaders(metadata, contentLength, title, contentType) {
76 | let headers = {};
77 | const restrictedHeaders = [
78 | "trueuri",
79 | "isemailnotification",
80 | "iaidentifier",
81 | "contenttype",
82 | "pdfurl",
83 | ];
84 | headers[
85 | "Authorization"
86 | ] = `LOW ${process.env.access_key}:${process.env.secret_key}`;
87 | if (contentType === "pdf") {
88 | headers["Content-type"] = `application/${contentType}; charset=utf-8`;
89 | headers["Accept-Charset"] = "utf-8";
90 | } else {
91 | headers["Content-type"] = `application/${contentType}`;
92 | }
93 | headers["Content-length"] = contentLength;
94 | headers["X-Amz-Auto-Make-Bucket"] = 1;
95 | headers["X-Archive-meta-collection"] = "opensource";
96 | headers["X-Archive-Ignore-Preexisting-Bucket"] = 1;
97 | headers["X-archive-meta-identifier"] = title;
98 | headers["X-archive-meta-mediatype"] = "texts";
99 | headers["X-archive-meta-uploader"] = process.env.IA_EMAIL; //To be added
100 | headers["X-archive-meta-contributor"] = "Panjab Digital Library"; //To be added
101 | headers["X-archive-meta-betterpdf"] = true; //To be added
102 | headers[
103 | "X-archive-meta-external-identifier"
104 | ] = `urn:pdl:${metadata["bookID"]}:${metadata["categoryID"]}`; //To be added
105 | for (var key in metadata) {
106 | let meta_key = key.trim().replace(/ /g, "-").toLowerCase();
107 | if (!_.includes(restrictedHeaders, meta_key))
108 | headers[`X-archive-meta-${meta_key}`] = metadata[key];
109 | }
110 | headers["X-archive-meta-title"] = metadata["title"];
111 | headers[`X-archive-meta-description`] = metadata.description
112 | ? `uri(${encodeURI(metadata.description?.trim())})`
113 | : "";
114 | return headers;
115 | }
116 |
117 | async function uploadZipToIA(
118 | zip,
119 | metadata,
120 | byteLength,
121 | email,
122 | job,
123 | trueURI,
124 | onError
125 | ) {
126 | const bucketTitle = metadata.IAIdentifier;
127 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}_images.zip`;
128 | metadata = _.omit(metadata, [
129 | "coverImage",
130 | "commonsMetadata",
131 | "isUploadCommons",
132 | "oauthToken",
133 | "userName",
134 | ]);
135 | let headers = setHeaders(
136 | metadata,
137 | byteLength,
138 | metadata.title,
139 | job.data.details.contentType
140 | );
141 | await zip.generateNodeStream({ type: "nodebuffer", streamFiles: true }).pipe(
142 | request(
143 | {
144 | method: "PUT",
145 | preambleCRLF: true,
146 | postambleCRLF: true,
147 | uri: IAuri,
148 | headers: headers,
149 | },
150 | (error, response, body) => {
151 | if (response.statusCode === 200) {
152 | onError(false, null);
153 | } else {
154 | const errorMessage = !body ? error : body;
155 | logger.log({
156 | level: "error",
157 | message: `IA Failure PDL ${errorMessage}`,
158 | });
159 | onError(true, errorMessage);
160 | }
161 | }
162 | )
163 | );
164 | }
165 |
166 | function uploadPdfToIA(pdfUrl, job, metadata, trueURI, done) {
167 | const getPdf = request(pdfUrl);
168 | let bufferLength = 0;
169 | const chunks = [];
170 | const bucketTitle = metadata.IAIdentifier;
171 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`;
172 | getPdf.on("response", function (data) {
173 | if (data.statusCode !== 200) {
174 | logger.log({
175 | level: "error",
176 | message: `Failure PDL: Failed to download PDF. Status Code: ${data.statusCode}`,
177 | });
178 | done(new Error("Failed to download PDF."));
179 | } else {
180 | job.progress({
181 | step: "Uploading to Internet Archive",
182 | value: `(${20}%)`,
183 | });
184 | }
185 | });
186 |
187 | getPdf.on("end", function () {
188 | const IAMetadata = { ...metadata };
189 | delete IAMetadata["commonsMetadata"];
190 | delete IAMetadata["isUploadCommons"];
191 | delete IAMetadata["oauthToken"];
192 | delete IAMetadata["userName"];
193 | const newBuffer = Buffer.concat(chunks);
194 | var bufferStream = new stream.PassThrough();
195 | bufferStream.end(newBuffer);
196 | job.progress({
197 | step: "Uploading to Internet Archive",
198 | value: `(${80}%)`,
199 | });
200 | let headers = setHeaders(
201 | IAMetadata,
202 | bufferLength,
203 | metadata.title,
204 | job.data.details.contentType
205 | );
206 | bufferStream.pipe(
207 | request(
208 | {
209 | method: "PUT",
210 | preambleCRLF: true,
211 | postambleCRLF: true,
212 | uri: IAuri,
213 | headers,
214 | },
215 | async (error, response, body) => {
216 | if (error || response.statusCode != 200) {
217 | const errorMessage = !body ? error : body;
218 | logger.log({
219 | level: "error",
220 | message: `IA Failure PDL ${errorMessage}`,
221 | });
222 | if (metadata.isEmailNotification === "true") {
223 | EmailProducer(
224 | job.data.details.userName,
225 | metadata.title,
226 | trueURI,
227 | {
228 | archive: false,
229 | commons: false,
230 | }
231 | );
232 | }
233 | done(new Error(errorMessage));
234 | } else {
235 | job.progress({
236 | step: "Upload To IA",
237 | value: `(${100}%)`,
238 | });
239 | if (
240 | job.data.details.isUploadCommons !== "true" &&
241 | metadata.isEmailNotification !== "true"
242 | ) {
243 | done(null, true);
244 | }
245 | if (
246 | job.data.details.isUploadCommons !== "true" &&
247 | metadata.isEmailNotification === "true"
248 | ) {
249 | EmailProducer(
250 | job.data.details.userName,
251 | metadata.title,
252 | trueURI,
253 | {
254 | archive: true,
255 | commons: false,
256 | }
257 | );
258 | done(null, true);
259 | }
260 | if (job.data.details.isUploadCommons === "true") {
261 | job.progress({
262 | step: "Uploading to Wikimedia Commons",
263 | value: `(50%)`,
264 | });
265 | CommonsProducer(
266 | null,
267 | null,
268 | job.data.details,
269 | "pdl",
270 | async (commonsResponse) => {
271 | if (commonsResponse.status === true) {
272 | job.progress({
273 | step: "Upload to Wikimedia Commons",
274 | value: `(100%)`,
275 | wikiLinks: {
276 | commons: await commonsResponse.value.filename,
277 | },
278 | });
279 | if (metadata.isEmailNotification === "true") {
280 | const commonsLink = `https://commons.wikimedia.org/wiki/File:${commonsResponse.value.filename}`;
281 | EmailProducer(
282 | job.data.details.userName,
283 | metadata.title,
284 | { archiveLink: trueURI, commonsLink: commonsLink },
285 | {
286 | archive: true,
287 | commons: true,
288 | }
289 | );
290 | }
291 | } else {
292 | job.progress({
293 | step: "Upload To IA (100%), Upload To Commons",
294 | value: `(Failed)`,
295 | });
296 | if (metadata.isEmailNotification === "true") {
297 | EmailProducer(
298 | job.data.details.userName,
299 | metadata.title,
300 | trueURI,
301 | {
302 | archive: true,
303 | commons: false,
304 | }
305 | );
306 | }
307 | }
308 | return done(null, true);
309 | }
310 | );
311 | }
312 | }
313 | }
314 | )
315 | );
316 | });
317 |
318 | getPdf.on("data", function (chunk) {
319 | bufferLength += chunk.length;
320 | chunks.push(chunk);
321 | });
322 | }
323 |
324 | PDLQueue.process(async (job, done) => {
325 | try {
326 | const jobLogs = job.data.details;
327 | const trueURI = `http://archive.org/details/${job.data.details.IAIdentifier}`;
328 | jobLogs["trueURI"] = trueURI;
329 | jobLogs["userName"] = job.data.details.userName;
330 | job.log(JSON.stringify(jobLogs));
331 | logUserData(jobLogs["userName"], "Panjab Digital Library");
332 |
333 | if (job.data.details.pdfUrl) {
334 | uploadPdfToIA(
335 | job.data.details.pdfUrl,
336 | job,
337 | job.data.details,
338 | trueURI,
339 | done
340 | );
341 | } else {
342 | const [zip, byteLength, errorFlag] = await getZipAndBytelength(
343 | job.data.details.Pages,
344 | job.data.details.bookID,
345 | job.data.details.title,
346 | job
347 | );
348 | if (errorFlag.status) {
349 | logger.log({
350 | level: "error",
351 | message: `Failure PDL: Failed to download ${errorFlag.page}`,
352 | });
353 | done(new Error(`Failure PDL: Failed to download ${errorFlag.page}`));
354 | }
355 | job.progress({
356 | step: "Uploading to Internet Archive",
357 | value: `(${90}%)`,
358 | });
359 | await uploadZipToIA(
360 | zip,
361 | job.data.details,
362 | byteLength,
363 | job.data.details.email,
364 | job,
365 | trueURI,
366 | async (isError, error) => {
367 | if (isError) {
368 | logger.log({
369 | level: "error",
370 | message: `IA Failure PDL: ${error}`,
371 | });
372 | if (job.data.details.isEmailNotification === "true") {
373 | EmailProducer(
374 | job.data.details.userName,
375 | job.data.details.title,
376 | trueURI,
377 | {
378 | archive: false,
379 | commons: false,
380 | }
381 | );
382 | }
383 | done(new Error(error));
384 | } else {
385 | job.progress({
386 | step: "Upload To IA",
387 | value: `(${100}%)`,
388 | });
389 | if (
390 | job.data.details.isUploadCommons !== "true" &&
391 | job.data.details.isEmailNotification !== "true"
392 | ) {
393 | done(null, true);
394 | }
395 | if (
396 | job.data.details.isUploadCommons !== "true" &&
397 | job.data.details.isEmailNotification === "true"
398 | ) {
399 | EmailProducer(
400 | job.data.details.userName,
401 | job.data.details.title,
402 | trueURI,
403 | {
404 | archive: true,
405 | commons: false,
406 | }
407 | );
408 | done(null, true);
409 | }
410 | if (job.data.details.isUploadCommons === "true") {
411 | job.progress({
412 | step: "Uploading to Wikimedia Commons",
413 | value: `(50%)`,
414 | });
415 | const base64Zip = await zip.generateAsync({ type: "base64" });
416 | CommonsProducer(
417 | "pdlZip",
418 | base64Zip,
419 | job.data.details,
420 | async (commonsResponse) => {
421 | if (commonsResponse.status === true) {
422 | job.progress({
423 | step: "Upload to Wikimedia Commons",
424 | value: `(100%)`,
425 | wikiLinks: {
426 | commons: await commonsResponse.value.filename,
427 | },
428 | });
429 | if (job.data.details.isEmailNotification === "true") {
430 | const commonsLink = `https://commons.wikimedia.org/wiki/File:${commonsResponse.value.filename}`;
431 | EmailProducer(
432 | job.data.details.userName,
433 | job.data.details.title,
434 | { archiveLink: trueURI, commonsLink: commonsLink },
435 | {
436 | archive: true,
437 | commons: true,
438 | }
439 | );
440 | }
441 | } else {
442 | job.progress({
443 | step: "Upload To IA (100%), Upload To Commons",
444 | value: `(Failed)`,
445 | });
446 | if (job.data.details.isEmailNotification === "true") {
447 | EmailProducer(
448 | job.data.details.userName,
449 | job.data.details.title,
450 | trueURI,
451 | {
452 | archive: true,
453 | commons: false,
454 | }
455 | );
456 | }
457 | }
458 | }
459 | );
460 | }
461 | return done(null, true);
462 | }
463 | }
464 | );
465 | }
466 | } catch (error) {
467 | logger.log({
468 | level: "error",
469 | message: `Failure PDL Queue: ${error}`,
470 | });
471 | done(new Error(error));
472 | }
473 | });
474 |
--------------------------------------------------------------------------------
/utils/helper.js:
--------------------------------------------------------------------------------
1 | /* Helper functions to modularize the code */
2 | const fetch = require("isomorphic-fetch");
3 | const rp = require("request-promise");
4 | const _ = require("lodash");
5 | const winston = require("winston");
6 | const { truncate } = require("fs");
7 | const logger = winston.loggers.get("defaultLogger");
8 | const fs = require("fs");
9 | const { Mwn } = require("mwn");
10 | const JSZip = require("jszip");
11 | const PDFDocument = require("pdfkit");
12 | const path = require("path");
13 | const { PDFDocument: PDFLibDocument } = require("pdf-lib");
14 |
15 | module.exports = {
16 | checkIfFileExistsAtIA: async (ID) => {
17 | const fetchCall = await fetch(`https://archive.org/metadata/${ID}`);
18 | const resp = await fetchCall.json();
19 | if (!_.isEmpty(resp)) {
20 | if (_.has(resp, "metadata.uploader") === true) {
21 | return resp.metadata.uploader !== process.env.IA_EMAIL;
22 | } else {
23 | return true;
24 | }
25 | } else {
26 | return false;
27 | }
28 | },
29 |
30 | replaceTitle: (title) => {
31 | return title.replace(/[ \(\)\[\],:]/g, "");
32 | },
33 |
34 | customFetch: async (
35 | URI,
36 | method = "GET",
37 | headers = new Headers(),
38 | contentType = "other"
39 | ) => {
40 | return fetch(URI, {
41 | method: method,
42 | headers: headers,
43 | })
44 | .then(
45 | (res) => {
46 | if (res.status === 404) {
47 | return 404;
48 | } else {
49 | const result = contentType === "file" ? res : res.json();
50 | return result;
51 | }
52 | },
53 | (err) => {
54 | logger.log({
55 | level: "error",
56 | message: `customFetch ${err}`,
57 | });
58 | return 404;
59 | }
60 | )
61 | .catch((err) => {
62 | logger.log({
63 | level: "error",
64 | message: `customFetch catch ${err}`,
65 | });
66 | return 404;
67 | });
68 | },
69 |
70 | queueData: async (job, queue) => {
71 | if (!job) return null;
72 | const jobid = job.id;
73 | const { logs } = await queue.getJobLogs(jobid, 0);
74 | if (logs[0]) return JSON.parse(logs[0]);
75 | else return [];
76 | },
77 |
78 | bookTitle: {
79 | gb: "volumeInfo.title",
80 | pdl: "title",
81 | trove: "name",
82 | },
83 |
84 | userNameLocation: {
85 | gb: "userName",
86 | pdl: "details.userName",
87 | trove: "details.userName",
88 | },
89 |
90 | jobData: (job, queue) => {
91 | const bookTitlePath = {
92 | gb: "volumeInfo.title",
93 | pdl: "title",
94 | trove: "name",
95 | };
96 | if (!job) return null;
97 | return _.get(job.data.details, bookTitlePath[`${queue}`]);
98 | },
99 |
100 | statusConfig: (processedOn, sum) => {
101 | return {
102 | [sum]: "Completed",
103 | [processedOn]: "Active",
104 | 0: "In Queue",
105 | };
106 | },
107 |
108 | getPreviewLink: (queue_name, book_id, category_id = null) => {
109 | const previewLinks = {
110 | gb: `http://books.google.co.in/books?id=${book_id}&hl=&source=gbs_api`,
111 | pdl: `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${book_id}&page=1&CategoryID=${category_id}&Searched=W3GX`,
112 | trove: `https://trove.nla.gov.au/ndp/del/title/${book_id}`,
113 | };
114 | return previewLinks[queue_name];
115 | },
116 |
117 | getPDLMetaData: async (cheerioOptions, bookid, categoryID) => {
118 | const $ = await rp(cheerioOptions);
119 | let PNdetails = {};
120 | const keys = $(".ubhypers");
121 | const values = $(".dhypers");
122 | const downloadPdfLink = $("#downloadpdf a")[0]?.attribs.href;
123 | let pagesLabel = $(".ubhypers:contains('Pages')");
124 | let pagesValue = pagesLabel.parent().next().find(".dhypers").text();
125 | let contentType = "zip";
126 | function addOtherMetaData(limit, keys, values, PNdetails) {
127 | let value;
128 | for (let i = 0; i < values.length; i++) {
129 | if ($(values[i]).attr("href")) {
130 | if (!$(values[i]).attr("href").includes("Keywords")) {
131 | value = i;
132 | break;
133 | }
134 | }
135 | }
136 |
137 | if (value <= limit) {
138 | const add = limit - value;
139 | for (let i = value; i < values.length; i++) {
140 | PNdetails[[$(keys[i + add]).text()]] = $(values[i]).text().trim();
141 | }
142 | } else {
143 | const sub = value - limit;
144 | for (let i = value; i < values.length; i++) {
145 | PNdetails[[$(keys[i - sub]).text()]] = $(values[i]).text().trim();
146 | }
147 | }
148 | }
149 |
150 | if ($(values[0]).text().trim() === "Click here to add description") {
151 | if ($(values[1]).text().trim() === "Click here to suggest keywords") {
152 | for (let i = 2; i < values.length; i++) {
153 | PNdetails[[$(keys[i + 1]).text()]] = $(values[i]).text().trim();
154 | }
155 | } else {
156 | addOtherMetaData(4, keys, values, PNdetails);
157 | }
158 | } else if (
159 | $(values[0]).text().trim() === "Click here to suggest keywords"
160 | ) {
161 | for (let i = 1; i < values.length; i++) {
162 | PNdetails[[$(keys[i + 2]).text()]] = $(values[i]).text().trim();
163 | }
164 | PNdetails.description = $(
165 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td:nth-child(2)"
166 | )
167 | .text()
168 | .trim();
169 | } else {
170 | addOtherMetaData(5, keys, values, PNdetails);
171 | PNdetails.description = $(
172 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td:nth-child(2)"
173 | )
174 | .text()
175 | .trim();
176 | PNdetails.description = PNdetails.description.replace(/\n/g, "");
177 | PNdetails.description = PNdetails.description.replace(/\[edit]/g, "");
178 | }
179 |
180 | PNdetails.title = $(
181 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > a"
182 | )
183 | .text()
184 | .trim();
185 | PNdetails.bookID = bookid;
186 | PNdetails.categoryID = categoryID;
187 | let src = $(
188 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(1) > table > tbody > tr:nth-child(1) > td > table > tbody > tr > td > a > img"
189 | ).attr("src");
190 | src = src.match(/pdl.*/gm);
191 | PNdetails.coverImage = `http://panjabdigilib.org/${src}`;
192 |
193 | if (downloadPdfLink?.length) {
194 | contentType = "pdf";
195 | PNdetails.pdfUrl = `http://www.panjabdigilib.org/webuser/searches/${downloadPdfLink}`;
196 | }
197 | PNdetails.contentType = contentType;
198 | PNdetails.Pages = pagesValue;
199 | delete PNdetails[""];
200 | return PNdetails;
201 | },
202 |
203 | getPDLTitle: async (cheerioOptions) => {
204 | const $ = await rp(cheerioOptions);
205 | return $(
206 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > a"
207 | )
208 | .text()
209 | .trim();
210 | },
211 |
212 | getTroveMetaData: async (cheerioOptions) => {
213 | const $ = await rp(cheerioOptions);
214 | const issueRenditionId = $(".issueRendition")
215 | .attr("data-prepurl")
216 | .match(/\d+/);
217 | if (issueRenditionId && issueRenditionId !== null)
218 | return issueRenditionId[0];
219 | else
220 | logger.log({
221 | level: "error",
222 | message: `issueRenditionId not found ${issueRenditionId}`,
223 | });
224 | },
225 | checkForPublicDomain: (data, res) => {
226 | if (data === 404) {
227 | res.send({ error: true, message: "Invalid Book ID" });
228 | return {
229 | error: true,
230 | };
231 | }
232 | if (data.error) {
233 | if (data.error.code === 503) {
234 | //Google Books error
235 | res.send({ error: true, message: "Invalid Book ID" });
236 | return {
237 | error: true,
238 | };
239 | }
240 | } else {
241 | const { publicDomain } = data.accessInfo; //Response object destructuring
242 | if (publicDomain === false) {
243 | //Checking if the book belongs to publicDomain
244 | res.send({ error: true, message: "Not in public domain." });
245 | return {
246 | error: true,
247 | };
248 | } else {
249 | return {
250 | error: false,
251 | data,
252 | };
253 | }
254 | }
255 | },
256 |
257 | convertZipToPdf: async (targetZip, localFilePath) => {
258 | async function mergePdf(pdfDataArray) {
259 | try {
260 | const mergedPdf = await PDFLibDocument.create();
261 | for (const pdfData of pdfDataArray) {
262 | const pdfDoc = await PDFLibDocument.load(pdfData);
263 | const pages = await mergedPdf.copyPages(
264 | pdfDoc,
265 | pdfDoc.getPageIndices()
266 | );
267 | for (const page of pages) {
268 | mergedPdf.addPage(page);
269 | }
270 | }
271 |
272 | const mergedPdfFile = await mergedPdf.save();
273 | await fs.promises.writeFile(localFilePath, mergedPdfFile);
274 | return { status: 200 };
275 | } catch (error) {
276 | logger.log({
277 | level: "error",
278 | message: `PDL - convertZipToPdf/mergePdf: ${error}`,
279 | });
280 | return { status: 404, error: error };
281 | }
282 | }
283 |
284 | async function zipToPdf() {
285 | try {
286 | const pdfInstances = [];
287 | await Promise.all(
288 | Object.values(targetZip.files).map(async (file, index) => {
289 | if (file.dir) return;
290 | if ([".jpg", ".jpeg", ".png"].includes(path.extname(file.name))) {
291 | const data = await file.async("nodebuffer");
292 | const pdfDoc = new PDFDocument();
293 | const buffers = [];
294 | const writeStream = new require("stream").Writable({
295 | write(chunk, encoding, callback) {
296 | buffers.push(chunk);
297 | callback();
298 | },
299 | });
300 | pdfDoc.pipe(writeStream);
301 | pdfDoc.image(data, 0, 0, { fit: [595.28, 841.89] }); // A4 size
302 | pdfDoc.end();
303 | return new Promise((resolve) => {
304 | writeStream.on("finish", () => {
305 | pdfInstances.push({
306 | index,
307 | pdfInstance: Buffer.concat(buffers),
308 | });
309 | resolve();
310 | });
311 | });
312 | }
313 | })
314 | );
315 |
316 | pdfInstances.sort((a, b) => a.index - b.index);
317 |
318 | const sortedPdfInstances = pdfInstances.map(
319 | ({ pdfInstance }) => pdfInstance
320 | );
321 | return await mergePdf(sortedPdfInstances);
322 | } catch (error) {
323 | logger.log({
324 | level: "error",
325 | message: `PDL - convertZipToPdf/zipToPdf: ${error}`,
326 | });
327 | return { status: 404, error: error };
328 | }
329 | }
330 | return await zipToPdf();
331 | },
332 |
333 | logUserData: (userName, libraryName) => {
334 | logger.log({
335 | level: "info",
336 | message: `User ${userName} uploaded using ${libraryName}`,
337 | });
338 | },
339 | downloadFile: async (downloadUrl, localFilepath) => {
340 | try {
341 | const fileRes = await fetch(downloadUrl, {
342 | method: "GET",
343 | headers: new Headers({
344 | "Content-Type": "application/pdf",
345 | }),
346 | });
347 | const fileBuffer = await fileRes.buffer();
348 | await fs.promises.writeFile(localFilepath, fileBuffer);
349 | return {
350 | writeFileStatus: 200,
351 | };
352 | } catch (error) {
353 | logger.log({
354 | level: "error",
355 | message: `downloadFile: ${error}`,
356 | });
357 | return error;
358 | }
359 | },
360 | uploadToCommons: async (metadata) => {
361 | try {
362 | const bot = await Mwn.init({
363 | apiUrl: process.env.NEXT_PUBLIC_COMMONS_URL + "/w/api.php",
364 | OAuth2AccessToken: metadata.oauthToken,
365 | userAgent: "bub2.wmcloud ([[https://bub2.wmcloud.org]])",
366 | defaultParams: {
367 | assert: "user",
368 | },
369 | });
370 |
371 | bot.userinfo();
372 |
373 | const commonsFilePayload = "commonsFilePayload.pdf";
374 | let title =
375 | metadata.details?.volumeInfo?.title || metadata.name || metadata.title;
376 | title = title.replaceAll(".", "");
377 | const response = await bot.upload(
378 | commonsFilePayload,
379 | title,
380 | metadata.commonsMetadata
381 | );
382 | if (await response.filename) {
383 | await fs.promises.unlink(commonsFilePayload);
384 | }
385 | logger.log({
386 | level: "info",
387 | message: `uploadToCommons: Upload of ${metadata.IAIdentifier} to commons successful`,
388 | });
389 | return {
390 | fileUploadStatus: 200,
391 | filename: response.filename,
392 | };
393 | } catch (error) {
394 | await fs.promises.unlink("commonsFilePayload.pdf");
395 | logger.log({
396 | level: "error",
397 | message: `uploadToCommons (catch): ${error}`,
398 | });
399 | logger.log({
400 | level: "error",
401 | message: `accessToken: ${metadata.oauthToken}`,
402 | });
403 | return error;
404 | }
405 | },
406 |
407 | uploadToWikiData: async (metadata, commonsItemFilename, libraryName) => {
408 | if (libraryName !== "gb") {
409 | //support only for Google Books for now
410 | return 404;
411 | }
412 | try {
413 | const title = metadata.details.volumeInfo.title || "";
414 | const id = metadata.details.id || "";
415 | const authorsArr = metadata.details.volumeInfo.authors
416 | ? metadata.details.volumeInfo.authors.join().trim()
417 | : null;
418 |
419 | const GBWikiDataPayload = {
420 | item: {
421 | labels: {
422 | en: title,
423 | },
424 | descriptions: {
425 | en: "edition of a written work",
426 | },
427 | statements: {
428 | P675: [
429 | {
430 | rank: "normal",
431 | property: {
432 | id: "P675",
433 | },
434 | value: {
435 | content: id,
436 | type: "value",
437 | },
438 | qualifiers: [],
439 | references: [],
440 | },
441 | ],
442 | P31: [
443 | {
444 | rank: "normal",
445 | property: {
446 | id: "P31",
447 | "data-type": "wikibase-item",
448 | },
449 | value: {
450 | type: "value",
451 | content: "Q47461344", //wikidata id for 'written work'
452 | },
453 | qualifiers: [],
454 | references: [],
455 | },
456 | ],
457 | P996: [
458 | {
459 | rank: "normal",
460 | property: {
461 | id: "P996",
462 | "data-type": "commonsMedia",
463 | },
464 | value: {
465 | content: commonsItemFilename,
466 | type: "value",
467 | },
468 | qualifiers: [],
469 | references: [],
470 | },
471 | ],
472 | P2093: [
473 | {
474 | rank: "normal",
475 | property: {
476 | id: "P2093",
477 | },
478 | value: {
479 | content: authorsArr,
480 | type: "value",
481 | },
482 | qualifiers: [],
483 | references: [],
484 | },
485 | ],
486 | P373: [
487 | {
488 | rank: "normal",
489 | property: {
490 | id: "P373",
491 | },
492 | value: {
493 | content: "Files_uploaded_with_BUB2",
494 | type: "value",
495 | },
496 | qualifiers: [],
497 | references: [],
498 | },
499 | ],
500 | P1476: [
501 | {
502 | rank: "normal",
503 | property: {
504 | id: "P1476",
505 | "data-type": "monolingualtext",
506 | },
507 | value: {
508 | type: "value",
509 | content: {
510 | text: title,
511 | language: "en",
512 | },
513 | },
514 | qualifiers: [],
515 | references: [],
516 | },
517 | ],
518 | },
519 | },
520 | };
521 |
522 | const TestPayload = {
523 | item: {
524 | labels: {
525 | en: "test12",
526 | },
527 | descriptions: {
528 | en: "test1123",
529 | },
530 | statements: {
531 | P97012: [
532 | {
533 | rank: "normal",
534 | property: {
535 | id: "P97012",
536 | },
537 | value: {
538 | content: "hello 1",
539 | type: "value",
540 | },
541 | qualifiers: [],
542 | references: [],
543 | },
544 | ],
545 | },
546 | },
547 | tags: [],
548 | bot: false,
549 | comment: "Metadata updated by BUB2",
550 | };
551 |
552 | const wikiDataAPI = await fetch(
553 | `${process.env.NEXT_PUBLIC_WIKIDATA_URL}/w/rest.php/wikibase/v0/entities/items`,
554 | {
555 | method: "POST",
556 | headers: {
557 | Authorization:
558 | "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIzM2I4ODg5NTJhOGQ3ZWVjMTNiNmVkNjMxNmQ2YjdiMSIsImp0aSI6IjU1NjY1MTM4M2Y1NmIwZTQ4YWVlNWE4NGYxYmZlNjAzZWIyYzRlYTk5MWZhN2M4YzY1ZGZkMTMxMGRkMzYyMTNmNmM2N2FjNzQ0MDVmOGRhIiwiaWF0IjoxNzE3ODcwMjQ5LjI5Mzc0NywibmJmIjoxNzE3ODcwMjQ5LjI5Mzc1LCJleHAiOjE3MTc4ODQ2NDkuMjg0MTg3LCJzdWIiOiI0NDc1MDA3NiIsImlzcyI6Imh0dHBzOi8vbWV0YS53aWtpbWVkaWEub3JnIiwicmF0ZWxpbWl0Ijp7InJlcXVlc3RzX3Blcl91bml0Ijo1MDAwLCJ1bml0IjoiSE9VUiJ9LCJzY29wZXMiOlsiYmFzaWMiLCJlZGl0cGFnZSIsImNyZWF0ZWVkaXRtb3ZlcGFnZSIsInVwbG9hZGZpbGUiLCJ1cGxvYWRlZGl0bW92ZWZpbGUiLCJzZW5kZW1haWwiXX0.M3OHvdO37MjlcfqAVzLHuXNodO87BrgS5YjIZ5VJn9_Tp1oEvBBydnJH5wyJdxSfHCAay7c8NdBglbrNCTRdOnFAWw2LbfMK8D8W53x2ilFmgq7oXG3EMRICgztYgA0YUCHvbq2TlpnizfrMqVcSeiSidDUH9s1DiT2xce1110e5VfFTDh1l0YB3BGXPHNezEnXsaLm_90dobrZeSiW6T94CCwpQ7dy88SEOfYPjNLRUTTmeAlOgV1ogdoDkJVUumzCnBj-05l_GVbPCQ6VbV-m4aDurnBli2Fjj_Nl4CV8K14ce1HxSi8MuNgbZsSwpNm73PVqFF_0aqBquGURdw2ysep61_MaPxGY9suNTW3uZ8pVVAypbrLeI8aczIbepbc-Vf8k0gVJXaJzTOo_l-xRNAXOdTzMd-6dMnypk4u4o0SITPD1prO8_kzgKtSdAUrrEQZgoexg1RUWQvwdk2cSlwStnIUjY-5qY9g2Y-W2qQJXq4I1-UTF8NL5DPjTpfdl0Qm2BdUNWyvKtqxFBi_96g9lmO8-vFOcuCSiFPM2nY1dHcnGgh7pzqQYmEEJ1p1YXYsHB4_rHujNP0NrlkCOk_zCieL0pRhDS-qZLZnNwVBb1fZj6dlV260TSRwWHIgF1fjCC2uJVgMTpR2-IO2bJUJoUgbQE9tyMbrlqxOE",
559 | "Content-Type": "application/json",
560 | },
561 | body: JSON.stringify(TestPayload),
562 | }
563 | );
564 | console.log(metadata.oauthToken, "::oauthToken");
565 | if (wikiDataAPI.status === 201) {
566 | console.log("success");
567 | const data = await wikiDataAPI.json();
568 | return data.id;
569 | } else {
570 | const errorData = await wikiDataAPI.json();
571 | logger.log({
572 | level: "error",
573 | message: `wikiDataAPIFailure (fetch):${JSON.stringify(errorData)}`,
574 | });
575 | return 404;
576 | }
577 | } catch (error) {
578 | logger.log({
579 | level: "error",
580 | message: `uploadToWikidata:${error}`,
581 | });
582 | return 404;
583 | }
584 | },
585 | };
586 |
--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | const express = require("express");
3 | const next = require("next");
4 | const bodyParser = require("body-parser");
5 | const cors = require("cors");
6 | const open = require("open");
7 | const compression = require("compression");
8 | require("dotenv").config();
9 | const dev = process.env.NODE_ENV !== "production";
10 | const PORT = process.env.PORT || 5000;
11 | const GB_KEY = process.env.GB_KEY;
12 | const trove_key = process.env.trove_key;
13 | const winston = require("winston");
14 | const cheerio = require("cheerio"); // Basically jQuery for node.js
15 | const app = next({
16 | dev,
17 | });
18 |
19 | const logger = winston.loggers.add("defaultLogger", {
20 | level: "info",
21 | format: winston.format.combine(
22 | winston.format.timestamp(),
23 | winston.format.json()
24 | ),
25 | transports: [
26 | //
27 | // - Write all logs with importance level of `error` or less to `error.log`
28 | // - Write all logs with importance level of `info` or less to `combined.log`
29 | //
30 | new winston.transports.File({ filename: "error.log", level: "error" }),
31 | new winston.transports.File({ filename: "combined.log" }),
32 | ],
33 | });
34 |
35 | const handle = app.getRequestHandler();
36 | var emailaddr = "";
37 | var authUserName = "";
38 | const {
39 | customFetch,
40 | queueData,
41 | statusConfig,
42 | bookTitle,
43 | userNameLocation,
44 | getPreviewLink,
45 | jobData,
46 | checkForPublicDomain,
47 | checkIfFileExistsAtIA,
48 | replaceTitle,
49 | getPDLTitle,
50 | getPDLMetaData,
51 | } = require("./utils/helper.js");
52 | const GoogleBooksProducer = require("./bull/google-books-queue/producer");
53 | const PDLProducer = require("./bull/pdl-queue/producer");
54 | const TroveProducer = require("./bull/trove-queue/producer");
55 | const { exec } = require("child_process");
56 | const config = require("./utils/bullconfig");
57 | const _ = require("lodash");
58 |
59 | app
60 | .prepare()
61 | .then(() => {
62 | const server = express();
63 |
64 | //Parse application/x-www-form-urlencoded
65 | server.use(
66 | bodyParser.urlencoded({
67 | extended: true,
68 | })
69 | );
70 |
71 | //Parse application/json
72 | server.use(bodyParser.json());
73 |
74 | //Enable and use CORS
75 | server.use(
76 | cors({
77 | credentials: true,
78 | origin: true,
79 | })
80 | );
81 |
82 | server.use(compression());
83 |
84 | /**
85 | * Every custom route that we build needs to arrive before the * wildcard.
86 | * This is necessary because otherwise the server won't recognise the route.
87 | */
88 |
89 | server.get("/getstats", async (req, res) => {
90 | const pdl_queue = config.getNewQueue("pdl-queue");
91 | const google_books_queue = config.getNewQueue("google-books-queue");
92 | const trove_queue = config.getNewQueue("trove-queue");
93 | const pdl_queue_count = await pdl_queue.getJobCounts();
94 | const google_books_queue_count = await google_books_queue.getJobCounts();
95 | const trove_queue_count = await trove_queue.getJobCounts();
96 | const queueStats = {
97 | pdl: pdl_queue_count,
98 | gb: google_books_queue_count,
99 | trove: trove_queue_count,
100 | };
101 | const commonsRes = await customFetch(
102 | process.env.NEXT_PUBLIC_COMMONS_URL +
103 | "/w/api.php?action=query&prop=categoryinfo&titles=Category:Files_uploaded_with_BUB2&format=json",
104 | "GET"
105 | );
106 | customFetch(
107 | `https://archive.org/advancedsearch.php?q=${process.env.IA_EMAIL}+&rows=0&output=json`,
108 | "GET"
109 | ).then((resp) => {
110 | if (resp && resp.response && resp.response.numFound) {
111 | const pages = commonsRes?.query?.pages;
112 | const page_no = pages[`${_.keys(pages)}`];
113 | res.send({
114 | queueStats: queueStats,
115 | totalUploadedCount: resp.response.numFound,
116 | commonsUploadedCount: page_no?.categoryinfo?.files
117 | ? page_no.categoryinfo.files
118 | : "0",
119 | });
120 | }
121 | });
122 | });
123 |
124 | server.get("/getJobInformation", async (req, res) => {
125 | try {
126 | let queue, queueName;
127 | switch (req.query.queue_name) {
128 | case "gb":
129 | queue = config.getNewQueue("google-books-queue");
130 | queueName = "Google Books";
131 | break;
132 |
133 | case "pdl":
134 | queue = config.getNewQueue("pdl-queue");
135 | queueName = "Panjab Digital Library";
136 | break;
137 |
138 | case "trove":
139 | queue = config.getNewQueue("trove-queue");
140 | queueName = "Trove Digital Library";
141 | break;
142 |
143 | default:
144 | throw "Invalid queue";
145 | }
146 | if (req.query.job_id) {
147 | const job = await queue.getJob(req.query.job_id);
148 | if (job) {
149 | const queue_data = await queueData(job, queue);
150 | const progress = job.progress().value
151 | ? `${job.progress().step}${job.progress().value}`
152 | : job.progress();
153 | const jobState = await job.getState();
154 | const book_id = job.data.details.id || job.data.details.bookID;
155 | const categoryID = job.data.details.categoryID;
156 | const trueURI = _.get(queue_data, "trueURI");
157 | if (req.query.queue_name === "trove") {
158 | _.set(
159 | queue_data,
160 | "coverImage",
161 | "https://assets.nla.gov.au/logos/trove/trove-colour.svg"
162 | );
163 | }
164 | function getUploadLink(job, trueURI) {
165 | if (job.progress().step) {
166 | const link =
167 | (job.progress().step.includes("Upload To IA") ||
168 | job.progress().step.includes("Upload to Wikimedia")) &&
169 | trueURI
170 | ? trueURI
171 | : "";
172 | return link;
173 | } else {
174 | const link = job.progress() === 100 ? trueURI : "";
175 | return link;
176 | }
177 | }
178 | const obj = {
179 | progress: progress,
180 | queueName: queueName,
181 | previewLink: getPreviewLink(
182 | req.query.queue_name,
183 | book_id,
184 | categoryID
185 | ),
186 | uploadStatus: {
187 | uploadLink: getUploadLink(job, trueURI),
188 | isUploaded: jobState === "completed" ? true : false,
189 | },
190 | wikimedia_links: {
191 | commons: job.progress().wikiLinks?.commons
192 | ? job.progress().wikiLinks.commons
193 | : "Not Integrated",
194 | wikidata: job.progress().wikiLinks?.wikidata
195 | ? job.progress().wikiLinks.wikidata !== 404
196 | ? job.progress().wikiLinks.wikidata
197 | : "Not Integrated"
198 | : "Not Integrated",
199 | },
200 | };
201 | res.send(
202 | Object.assign(
203 | {},
204 | _.pick(queue_data, [
205 | "title",
206 | "description",
207 | "imageLinks",
208 | "coverImage",
209 | ]),
210 | obj
211 | )
212 | );
213 | } else {
214 | res.send({});
215 | }
216 | } else {
217 | res.send({});
218 | }
219 | } catch (err) {
220 | res.send({});
221 | logger.log({
222 | level: "error",
223 | message: `getJobInformation ${err}`,
224 | });
225 | }
226 | });
227 |
228 | server.get("/getJobProgress", async (req, res) => {
229 | let queue;
230 | switch (req.query.queue_name) {
231 | case "gb":
232 | queue = config.getNewQueue("google-books-queue");
233 | break;
234 |
235 | case "pdl":
236 | queue = config.getNewQueue("pdl-queue");
237 | queueName = "Panjab Digital Library";
238 | break;
239 |
240 | case "trove":
241 | queue = config.getNewQueue("trove-queue");
242 | queueName = "Trove Digital Library";
243 | break;
244 |
245 | default:
246 | throw "Invalid queue";
247 | }
248 | if (req.query.job_id) {
249 | const job = await queue.getJob(req.query.job_id);
250 | if (job) {
251 | return job.progress().value;
252 | }
253 | return null;
254 | }
255 | return null;
256 | });
257 |
258 | server.get("/allJobs", async (req, res) => {
259 | String.prototype.capitalize = function () {
260 | return this.charAt(0).toUpperCase() + this.slice(1);
261 | };
262 |
263 | const returnJobStatus = (failedReason, finishedOn, processedOn) => {
264 | if (failedReason) return `Failed! (Reason: ${failedReason})`;
265 | if (!finishedOn) finishedOn = null;
266 | if (!processedOn) processedOn = null;
267 | const sum = processedOn + finishedOn;
268 | return statusConfig(processedOn, sum)[sum];
269 | };
270 |
271 | try {
272 | let queue;
273 | switch (req.query.queue_name) {
274 | case "gb":
275 | queue = config.getNewQueue("google-books-queue");
276 | break;
277 |
278 | case "pdl":
279 | queue = config.getNewQueue("pdl-queue");
280 | break;
281 |
282 | case "trove":
283 | queue = config.getNewQueue("trove-queue");
284 | break;
285 |
286 | default:
287 | throw "Invalid queue";
288 | }
289 | queue
290 | .getJobs([
291 | "active",
292 | "waiting",
293 | "completed",
294 | "failed",
295 | "delayed",
296 | "paused",
297 | ])
298 | .then((jobs) => {
299 | let filteredJobs = jobs.map((job) => {
300 | let date = new Date(job.timestamp);
301 | let userName = _.get(
302 | job.data,
303 | userNameLocation[req.query.queue_name]
304 | );
305 | return {
306 | id: Number(job.id),
307 | title: _.get(job.data.details, bookTitle[req.query.queue_name]),
308 | userName: userName ? userName : "-",
309 | timestamp:
310 | date.getUTCFullYear() +
311 | "-" +
312 | parseInt(date.getUTCMonth() + 1)
313 | .toString()
314 | .padStart(2, "0") +
315 | "-" +
316 | date.getUTCDate().toLocaleString(undefined, {
317 | minimumIntegerDigits: 2,
318 | }) +
319 | " " +
320 | date.getUTCHours() +
321 | ":" +
322 | date.getUTCMinutes().toLocaleString(undefined, {
323 | minimumIntegerDigits: 2,
324 | }) +
325 | " (UTC)",
326 | upload_progress: job.progress().step
327 | ? `${job.progress().step}:${job.progress().value}`
328 | : `${job.progress()}%`,
329 | status: returnJobStatus(
330 | job.failedReason,
331 | job.finishedOn,
332 | job.processedOn
333 | ),
334 | wikimedia_links: job.progress().wikiLinks?.commons
335 | ? job.progress().wikiLinks.commons
336 | : "Not Integrated",
337 | };
338 | });
339 | res.send(_.orderBy(filteredJobs, "id", "desc"));
340 | })
341 | .catch((err) => {
342 | res.send([]);
343 | logger.log({
344 | level: "error",
345 | message: `allJobs getJobs ${err}`,
346 | });
347 | });
348 | } catch (err) {
349 | res.send([]);
350 | logger.log({
351 | level: "error",
352 | message: `allJobs ${err}`,
353 | });
354 | }
355 | });
356 |
357 | server.get("/getqueue", async (req, res) => {
358 | const pdl_queue = await config.getNewQueue("pdl-queue");
359 | const google_books_queue = await config.getNewQueue("google-books-queue");
360 | const trove_queue = await config.getNewQueue("trove-queue");
361 | const commons_queue = await config.getNewQueue("commons-queue");
362 |
363 | const queryParams = {
364 | "gb-queue": {
365 | active: "",
366 | waiting: "",
367 | },
368 | "pdl-queue": {
369 | active: "",
370 | waiting: "",
371 | },
372 | "trove-queue": {
373 | active: "",
374 | waiting: "",
375 | },
376 | "commons-queue": {
377 | active: "",
378 | waiting: "",
379 | },
380 | };
381 | const pdlqueue_active_job = await pdl_queue.getActive(0, 0);
382 | const pdlqueue_waiting_job = await pdl_queue.getWaiting(0, 0);
383 |
384 | const gbqueue_active_job = await google_books_queue.getActive(0, 0);
385 | const gbqueue_waiting_job = await google_books_queue.getWaiting(0, 0);
386 |
387 | const trovequeue_active_job = await trove_queue.getActive(0, 0);
388 | const trovequeue_waiting_job = await trove_queue.getWaiting(0, 0);
389 |
390 | const commonsqueue_active_job = await commons_queue.getActive(0, 0);
391 | const commonsqueue_waiting_job = await commons_queue.getWaiting(0, 0);
392 |
393 | queryParams["pdl-queue"]["active"] = jobData(
394 | pdlqueue_active_job[0],
395 | "pdl"
396 | );
397 | queryParams["pdl-queue"]["waiting"] = jobData(
398 | pdlqueue_waiting_job[0],
399 | "pdl"
400 | );
401 |
402 | queryParams["gb-queue"]["active"] = jobData(gbqueue_active_job[0], "gb");
403 | queryParams["gb-queue"]["waiting"] = jobData(
404 | gbqueue_waiting_job[0],
405 | "gb"
406 | );
407 |
408 | queryParams["trove-queue"]["active"] = jobData(
409 | trovequeue_active_job[0],
410 | "trove"
411 | );
412 | queryParams["trove-queue"]["waiting"] = jobData(
413 | trovequeue_waiting_job[0],
414 | "trove"
415 | );
416 |
417 | queryParams["commons-queue"]["active"] = jobData(
418 | commonsqueue_active_job[0],
419 | "commons"
420 | );
421 | queryParams["commons-queue"]["waiting"] = jobData(
422 | commonsqueue_waiting_job[0],
423 | "commons"
424 | );
425 | res.send(queryParams);
426 | });
427 |
428 | let GBdetails = {};
429 | let GBreq;
430 | let GBcommonsMetaData;
431 | const isAlphanumericLess50 = /^[a-zA-Z0-9]{1,50}$/;
432 | server.post("/check", async (req, res) => {
433 | const {
434 | bookid,
435 |
436 | option,
437 |
438 | email,
439 |
440 | userName,
441 |
442 | IAtitle,
443 | isEmailNotification,
444 |
445 | isUploadCommons,
446 | oauthToken,
447 | } = req.query;
448 | const commonsMetadata = req.body.commonsMetadata;
449 | emailaddr = email;
450 | authUserName = userName;
451 | switch (option) {
452 | case "gb":
453 | customFetch(
454 | `https://www.googleapis.com/books/v1/volumes/${bookid}?key=${GB_KEY}`,
455 | "GET",
456 | new Headers({
457 | "Content-Type": "application/json",
458 | })
459 | ).then(async (data) => {
460 | const { error } = checkForPublicDomain(data, res);
461 | if (!error) {
462 | const titleInIA =
463 | IAtitle.trim() !== ""
464 | ? replaceTitle(IAtitle.trim())
465 | : replaceTitle(data.volumeInfo.title);
466 | if (isAlphanumericLess50.test(titleInIA) === false) {
467 | res.send({
468 | isInValidIdentifier: true,
469 | titleInIA,
470 | });
471 | } else if ((await checkIfFileExistsAtIA(titleInIA)) === true) {
472 | res.send({
473 | isDuplicate: true,
474 | titleInIA,
475 | });
476 | } else {
477 | GBdetails = data;
478 | GBreq = req;
479 | GBcommonsMetaData = commonsMetadata;
480 | res.send({
481 | error: false,
482 | message: "In public domain.",
483 | url: data.accessInfo.pdf.downloadLink,
484 | title: data.volumeInfo.title,
485 | IAIdentifier: titleInIA,
486 | });
487 | }
488 | }
489 | });
490 | break;
491 |
492 | case "obp":
493 | res.send({
494 | error: false,
495 | message: "You will be mailed with the details soon!",
496 | });
497 |
498 | case "pn":
499 | //Check for duplicates
500 | const { categoryID } = req.query;
501 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookid}&page=1&CategoryID=${categoryID}&Searched=W3GX`;
502 | var options = {
503 | uri,
504 | transform: function (body) {
505 | return cheerio.load(body);
506 | },
507 | };
508 | const titleInIA =
509 | IAtitle.trim() !== ""
510 | ? replaceTitle(IAtitle.trim())
511 | : replaceTitle(await getPDLTitle(options));
512 | if (titleInIA === "") {
513 | res.send({
514 | error: true,
515 | message: "Not able to fetch title.",
516 | });
517 | } else if (isAlphanumericLess50.test(titleInIA) === false) {
518 | res.send({
519 | isInValidIdentifier: true,
520 | titleInIA,
521 | });
522 | } else {
523 | if ((await checkIfFileExistsAtIA(titleInIA)) === true) {
524 | res.send({
525 | isDuplicate: true,
526 | titleInIA,
527 | });
528 | } else {
529 | res.send({
530 | error: false,
531 | message: "You will be mailed with the details soon!",
532 | });
533 | PDLProducer(
534 | bookid,
535 | titleInIA,
536 | categoryID,
537 | email,
538 | authUserName,
539 | isEmailNotification,
540 | isUploadCommons,
541 | oauthToken,
542 | commonsMetadata
543 | );
544 | }
545 | }
546 | // const isDuplicate = checkForDuplicatesFromIA(`bub_pn_${bookid}`);
547 | // isDuplicate.then(resp => {
548 | // if (resp.response.numFound != 0) {
549 | // res.send({
550 | // error: true,
551 | // message: "The document already exists on Internet Archive."
552 | // })
553 | // }
554 | // else {
555 |
556 | // }
557 | // })
558 | break;
559 |
560 | case "trove":
561 | customFetch(
562 | `https://api.trove.nla.gov.au/v2/newspaper/${bookid}?key=${trove_key}&encoding=json&reclevel=full`,
563 | "GET",
564 | new Headers({
565 | "Content-Type": "application/json",
566 | })
567 | ).then(async (data) => {
568 | if (data === 404) {
569 | res.send({
570 | error: true,
571 | message: "Invalid Newspaper/Gazette ID",
572 | });
573 | } else {
574 | const name = _.get(data, "article.title.value");
575 | const titleInIA =
576 | IAtitle.trim() !== ""
577 | ? replaceTitle(IAtitle.trim())
578 | : replaceTitle(name);
579 | if (isAlphanumericLess50.test(titleInIA) === false) {
580 | res.send({
581 | isInValidIdentifier: true,
582 | titleInIA,
583 | });
584 | } else if ((await checkIfFileExistsAtIA(titleInIA)) === true) {
585 | res.send({
586 | isDuplicate: true,
587 | titleInIA,
588 | });
589 | } else {
590 | troveUrl = `https://trove.nla.gov.au/ndp/del/title/${data.article.title.id}`;
591 | const id = _.get(data, "article.title.id");
592 | const date = _.get(data, "article.date");
593 | const troveData = {
594 | id,
595 | name,
596 | troveUrl,
597 | date,
598 | };
599 | res.send({
600 | error: false,
601 | message: "You will be mailed with the details soon!",
602 | });
603 | TroveProducer(
604 | bookid,
605 |
606 | titleInIA,
607 |
608 | troveData,
609 |
610 | email,
611 |
612 | userName,
613 | isEmailNotification,
614 | isUploadCommons,
615 | oauthToken,
616 | commonsMetadata
617 | );
618 | }
619 | }
620 | });
621 | break;
622 | }
623 | });
624 | server.get("/checkPublicDomain", async (req, res) => {
625 | const { bookid } = req.query;
626 | customFetch(
627 | `https://www.googleapis.com/books/v1/volumes/${bookid}?key=${GB_KEY}`,
628 | "GET",
629 | new Headers({
630 | "Content-Type": "application/json",
631 | })
632 | ).then(async (data) => {
633 | const { error } = checkForPublicDomain(data, res);
634 | if (error === false) {
635 | res.send({ error: false });
636 | }
637 | });
638 | });
639 |
640 | server.get("/checkEmailableStatus", async (req, res) => {
641 | const { username } = req.query;
642 | const usersQuery = await customFetch(
643 | process.env.NEXT_PUBLIC_WIKIMEDIA_URL +
644 | `/w/api.php?action=query&list=users&ususers=${username}&usprop=emailable&format=json`,
645 | "GET"
646 | );
647 | const emailableStatus =
648 | usersQuery?.query?.users[0]?.emailable === undefined ? false : true;
649 | res.send(emailableStatus);
650 | });
651 |
652 | server.get("/getMetadata", async (req, res) => {
653 | const { option, bookID, categoryID, IAIdentifier } = req.query;
654 | switch (option) {
655 | case "gb":
656 | const gbRes = await customFetch(
657 | `https://www.googleapis.com/books/v1/volumes/${bookID}?key=${GB_KEY}`,
658 | "GET"
659 | );
660 | res.send(gbRes);
661 | break;
662 | case "trove":
663 | const troveRes = await customFetch(
664 | `https://api.trove.nla.gov.au/v2/newspaper/${bookID}?key=${trove_key}&encoding=json&reclevel=full`,
665 | "GET"
666 | );
667 | res.send(troveRes);
668 | break;
669 | case "pdl":
670 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookID}&page=1&CategoryID=${categoryID}&Searched=W3GX`;
671 | var options = {
672 | uri,
673 | transform: function (body) {
674 | return cheerio.load(body);
675 | },
676 | };
677 | const pdlRes = await getPDLMetaData(options, bookID, categoryID);
678 | const titleInIA =
679 | IAIdentifier?.trim() !== ""
680 | ? replaceTitle(IAIdentifier?.trim())
681 | : replaceTitle(await getPDLTitle(options));
682 | pdlRes.IAIdentifier = titleInIA;
683 | res.send(pdlRes);
684 | break;
685 | }
686 | });
687 |
688 | server.post("/webhook", async (req, res) => {
689 | exec(
690 | "cd www/js; git pull origin master; yes | npm install; webservice --backend kubernetes node16 restart",
691 | (err, stdout, stderr) => {
692 | if (err) {
693 | logger.log({
694 | level: "error",
695 | message: `webhook err ${err}`,
696 | });
697 | } else if (stderr) {
698 | logger.log({
699 | level: "error",
700 | message: `webhook stderr ${stderr}`,
701 | });
702 | } else {
703 | logger.log({
704 | level: "info",
705 | message: `webhook ${stdout}`,
706 | });
707 | }
708 | }
709 | );
710 | res.send();
711 | });
712 |
713 | server.post("/download", async (req, res) => {
714 | const regex = /https:\/\/books\.googleusercontent\.com\/books\/content\?req=*/;
715 | if (regex.test(req.body.url)) {
716 | res.send({
717 | error: false,
718 | message: "You will be mailed with the details soon!",
719 | });
720 | GoogleBooksProducer(
721 | req.body.url,
722 | req.body.titleInIA,
723 | GBdetails,
724 | emailaddr,
725 | authUserName,
726 | GBreq.query.isEmailNotification,
727 | GBreq.query.isUploadCommons,
728 | GBreq.query.oauthToken,
729 | GBcommonsMetaData
730 | );
731 | } else {
732 | res.send({
733 | error: true,
734 | message: "Invalid URL.",
735 | });
736 | }
737 | });
738 |
739 | /**
740 | * The express handler for default routes.
741 | */
742 | server.get("*", (req, res) => {
743 | return handle(req, res);
744 | });
745 |
746 | /**
747 | * The express handler for default POST routes (for next-auth)
748 | */
749 | server.post("*", (req, res) => {
750 | return handle(req, res);
751 | });
752 |
753 | server.listen(PORT, (err) => {
754 | if (err) throw err;
755 | if (dev) {
756 | (async () => {
757 | await open(`http://localhost:${PORT}/`);
758 | })();
759 | }
760 | });
761 | })
762 | .catch((ex) => {
763 | console.error(ex.stack);
764 | process.exit(1);
765 | });
766 |
--------------------------------------------------------------------------------
/components/Books.js:
--------------------------------------------------------------------------------
1 | import React, { useEffect, useState } from "react";
2 | import Swal from "sweetalert2";
3 | import { host } from "../utils/constants";
4 | import { useSession, signIn } from "next-auth/react";
5 | import ChangeIdentifier from "./ChangeIdentifier";
6 | import useMetadataForUI from "../hooks/useMetadataForUI";
7 | import BooksWrapper from "./BooksWrapper";
8 | import { Box, Tooltip } from "@mui/material";
9 |
10 | const Books = () => {
11 | const { data: session } = useSession();
12 | const [option, setOption] = useState("gb");
13 | const [bookid, setBookId] = useState("");
14 | const [email, setEmail] = useState("");
15 | const [loader, setLoader] = useState(false);
16 | const [isDuplicate, setIsDuplicate] = useState(false);
17 | const [isInValidIdentifier, setIsInValidIdentifier] = useState(false);
18 | const [isEmailNotification, setIsEmailNotification] = useState(false);
19 | const [isUploadCommons, setIsUploadCommons] = useState(false);
20 | const [IATitle, setIATitle] = useState("");
21 | const [IAIdentifier, setIAIdentifier] = useState("");
22 | const [inputDisabled, setInputDisabled] = useState(false);
23 | const [isUserEmailable, setIsUserEmailable] = useState(false);
24 | const [isCommonsMetadataReady, setIsCommonsMetadataReady] = useState(false);
25 | const [hasCommonsMetadataUpdated, setHasCommonsMetadataUpdated] = useState(
26 | false
27 | );
28 | const [commonsMetadata, setCommonsMetadata] = useState();
29 | const { getMetadataForUI } = useMetadataForUI();
30 |
31 | const handleChange = (event) => {
32 | setOption(event.target.value);
33 | setBookId("");
34 | setIsDuplicate(false);
35 | setIsEmailNotification(false);
36 | setIsInValidIdentifier(false);
37 | setIATitle("");
38 | setIAIdentifier("");
39 | setInputDisabled(false);
40 | setIsUploadCommons(false);
41 | };
42 |
43 | const onResetButtonClicked = () => {
44 | setIsDuplicate(false);
45 | setIsEmailNotification(false);
46 | setIsInValidIdentifier(false);
47 | setInputDisabled(false);
48 | setIATitle("");
49 | setIAIdentifier("");
50 | setIsUploadCommons(false);
51 | setIsCommonsMetadataReady(false);
52 | setHasCommonsMetadataUpdated(false);
53 | };
54 |
55 | const onSwalClosed = () => {
56 | setInputDisabled(false);
57 | setIAIdentifier("");
58 | setIATitle("");
59 | };
60 | const renderContent = (option) => {
61 | switch (option) {
62 | case "gb":
63 | return (
64 | <>
65 | 2. Enter Google Books ID
66 |
67 |
68 | https://books.google.co.in/books?id=
69 |
70 | setBookId(event.target.value)}
79 | aria-describedby="bid"
80 | />
81 |
82 | >
83 | );
84 | case "pn":
85 | return (
86 | <>
87 | 2. Enter URI
88 |
89 | setBookId(event.target.value)}
96 | required
97 | placeholder="http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=9073&page=1&CategoryID=1&Searched="
98 | />
99 |
100 | >
101 | );
102 | case "trove":
103 | return (
104 | <>
105 | 2. Enter Newspaper/Gazette Article ID
106 |
107 |
108 | https://trove.nla.gov.au/newspaper/article/
109 |
110 | setBookId(event.target.value)}
118 | required
119 | aria-describedby="bid"
120 | />
121 |
122 | >
123 | );
124 | }
125 | };
126 |
127 | const isPDLValidUrl = (urlString) => {
128 | var urlPattren = new RegExp(
129 | "((http|https)\\:\\/\\/)(www.)?(panjabdigilib\\.org\\/webuser\\/searches\\/displayPage\\.jsp\\?ID\\=)([0-9]*)(\\&page\\=)([0-9]*)(\\&CategoryID\\=)([0-9]*)(\\&Searched\\=)([a-zA-Z0-9@:%._+~#?&//=]*)"
130 | );
131 | return urlPattren.test(urlString);
132 | };
133 |
134 | const checkEmailableStatus = async (username) => {
135 | const response = await fetch(
136 | `${host}/checkEmailableStatus?username=${username}`
137 | );
138 | const isEmailable = await response.json();
139 | return isEmailable;
140 | };
141 |
142 | const onSubmit = async (event) => {
143 | event?.preventDefault();
144 |
145 | if (!session.user.name || session.user.name === "") {
146 | Swal("Error!", "Log in with Wikimedia to continue", "error");
147 | return;
148 | }
149 |
150 | setLoader(true);
151 | setIsDuplicate(false);
152 | setIsInValidIdentifier(false);
153 |
154 | let url = "";
155 | switch (option) {
156 | case "gb":
157 | if (isUploadCommons && !hasCommonsMetadataUpdated) {
158 | const checkPublicDomainURL = `${host}/checkPublicDomain?bookid=${bookid}`;
159 | const checkPublicDomainRes = await fetch(checkPublicDomainURL);
160 | const checkPublicDomainStatus = await checkPublicDomainRes.json();
161 | if (checkPublicDomainStatus.error === false) {
162 | const commonsMetadata = await getMetadataForUI("gb", bookid);
163 | setCommonsMetadata(commonsMetadata);
164 | setIsCommonsMetadataReady(true);
165 | } else {
166 | Swal("Error!", checkPublicDomainStatus.message, "error");
167 | setLoader(false);
168 | }
169 | } else {
170 | url = `${host}/check?bookid=${bookid}&option=${
171 | option + (email ? "&email=" + email : "")
172 | }&userName=${
173 | session?.user?.name
174 | }&IAtitle=${IAIdentifier}&isEmailNotification=${isEmailNotification}&isUploadCommons=${isUploadCommons}&oauthToken=${
175 | session?.accessToken
176 | }`;
177 | fetch(url, {
178 | method: "POST",
179 | headers: {
180 | "Content-Type": "application/json",
181 | },
182 | body: JSON.stringify({
183 | commonsMetadata: commonsMetadata,
184 | }),
185 | })
186 | .then((response) => response.json())
187 | .then(async (response) => {
188 | setLoader(false);
189 | if (response.isDuplicate) {
190 | setIsDuplicate(true);
191 | setIATitle(response.titleInIA);
192 | setInputDisabled(true);
193 | } else if (response.isInValidIdentifier) {
194 | setIsInValidIdentifier(true);
195 | setIATitle(response.titleInIA);
196 | setInputDisabled(true);
197 | } else {
198 | if (response.error) {
199 | Swal("Error!", response.message, "error");
200 | } else {
201 | setIsCommonsMetadataReady(false);
202 | const { value: url } = await Swal({
203 | input: "url",
204 | backdrop: true,
205 | width: "50%",
206 | allowEscapeKey: false,
207 | allowOutsideClick: false,
208 | showCloseButton: true,
209 | onClose: onSwalClosed,
210 | title: 'Just a few more steps... ',
211 | html:
212 | `` +
213 | `Go to this link: ${response.title} ` +
214 | `Enter the captcha. ` +
215 | `Enter the URL below (https://books.googleusercontent.com/books/content?req=xxx ) `,
216 | });
217 |
218 | if (url && typeof url !== "object") {
219 | setLoader(true);
220 | fetch(`${host}/download`, {
221 | body: JSON.stringify({
222 | url: url,
223 | titleInIA: response.IAIdentifier,
224 | }),
225 | headers: {
226 | "Content-Type": "application/json",
227 | "Access-Control-Allow-Origin": "*",
228 | },
229 | method: "POST",
230 | })
231 | .then((response) => response.json())
232 | .then((response) => {
233 | setLoader(false);
234 | if (response.error)
235 | Swal("Error!", response.message, "error");
236 | else Swal("Voila!", response.message, "success");
237 | });
238 | }
239 | }
240 | }
241 | });
242 | }
243 |
244 | break;
245 | case "pn":
246 | if (isPDLValidUrl(bookid)) {
247 | const searchParams = new URL(bookid).searchParams;
248 | const ID = searchParams.get("ID");
249 | const categoryID = searchParams.get("CategoryID");
250 | if (isUploadCommons && !hasCommonsMetadataUpdated) {
251 | const pdlMetadata = await getMetadataForUI(
252 | "pdl",
253 | ID,
254 | categoryID,
255 | IAIdentifier
256 | );
257 | setCommonsMetadata(pdlMetadata);
258 | setIsCommonsMetadataReady(true);
259 | } else {
260 | url = `${host}/check?bookid=${ID}&option=${
261 | option + (email ? "&email=" + email : "")
262 | }&categoryID=${categoryID}&userName=${
263 | session.user.name
264 | }&IAtitle=${IAIdentifier}&isEmailNotification=${isEmailNotification}&isUploadCommons=${isUploadCommons}&oauthToken=${
265 | session?.accessToken
266 | }`;
267 | fetch(url, {
268 | method: "POST",
269 | headers: {
270 | "Content-Type": "application/json",
271 | },
272 | body: JSON.stringify({
273 | commonsMetadata: commonsMetadata,
274 | }),
275 | })
276 | .then((res) => res.json())
277 | .then((response) => {
278 | setLoader(false);
279 | if (response.isDuplicate) {
280 | setIsDuplicate(true);
281 | setIATitle(response.titleInIA);
282 | setInputDisabled(true);
283 | } else if (response.isInValidIdentifier) {
284 | setIsInValidIdentifier(true);
285 | setIATitle(response.titleInIA);
286 | setInputDisabled(true);
287 | } else {
288 | if (response.error) {
289 | Swal("Error!", response.message, "error");
290 | } else {
291 | setIsCommonsMetadataReady(false);
292 | Swal("Voila!", response.message, "success");
293 | }
294 | }
295 | });
296 | }
297 | } else {
298 | setLoader(false);
299 | Swal("Opps...", "Enter a valid URL", "error");
300 | }
301 | break;
302 | case "trove":
303 | if (isUploadCommons && !hasCommonsMetadataUpdated) {
304 | const commonsMetadata = await getMetadataForUI("trove", bookid);
305 | setCommonsMetadata(commonsMetadata);
306 | setIsCommonsMetadataReady(true);
307 | } else {
308 | url = `${host}/check?bookid=${bookid}&option=${
309 | option + (email ? "&email=" + email : "")
310 | }&userName=${
311 | session.user.name
312 | }&IAtitle=${IAIdentifier}&isUploadCommons=${isUploadCommons}&oauthToken=${
313 | session?.accessToken
314 | }&isEmailNotification=${isEmailNotification}`;
315 | fetch(url, {
316 | method: "POST",
317 | headers: {
318 | "Content-Type": "application/json",
319 | },
320 | body: JSON.stringify({
321 | commonsMetadata: commonsMetadata,
322 | }),
323 | })
324 | .then((res) => res.json())
325 | .then((response) => {
326 | setLoader(false);
327 | if (response.isDuplicate) {
328 | setIsDuplicate(true);
329 | setIATitle(response.titleInIA);
330 | setInputDisabled(true);
331 | } else if (response.isInValidIdentifier) {
332 | setIsInValidIdentifier(true);
333 | setIATitle(response.titleInIA);
334 | setInputDisabled(true);
335 | } else {
336 | if (response.error) {
337 | Swal("Error!", response.message, "error");
338 | } else {
339 | setIsCommonsMetadataReady(false);
340 | Swal("Voila!", response.message, "success");
341 | }
342 | }
343 | });
344 | }
345 |
346 | break;
347 | }
348 | };
349 |
350 | useEffect(async () => {
351 | const isEmailable = await checkEmailableStatus(session?.user?.name);
352 | setIsUserEmailable(isEmailable);
353 | }, [session]);
354 |
355 | useEffect(() => {
356 | window.scrollTo({ top: 0, behavior: "smooth" });
357 | if (
358 | hasCommonsMetadataUpdated &&
359 | isUploadCommons &&
360 | isCommonsMetadataReady
361 | ) {
362 | onSubmit(null, session.user.name);
363 | }
364 | if (isUploadCommons === false && isCommonsMetadataReady) {
365 | onResetButtonClicked();
366 | setLoader(false);
367 | }
368 | }, [hasCommonsMetadataUpdated, isUploadCommons]);
369 |
370 | return (
371 |
372 |
373 |
384 | Book Uploader Bot
385 |
386 |
387 | Upload books, newspapers, magazines etc. from public libraries to
388 | Internet Archive and Wikimedia Commons.
389 |
390 |
391 |
612 | {loader && (!isCommonsMetadataReady || hasCommonsMetadataUpdated) ? (
613 |
614 |
615 | Fetching information. Please wait..
616 |
617 |
625 |
626 | ) : null}
627 |
628 |
629 | {isCommonsMetadataReady && (
630 |
641 |
686 |
687 | )}
688 |
689 |
690 | );
691 | };
692 |
693 | export default Books;
694 |
--------------------------------------------------------------------------------