├── public └── assets │ ├── bub2.png │ ├── web.png │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── mstile-150x150.png │ ├── apple-touch-icon.png │ ├── android-chrome-192x192.png │ ├── android-chrome-384x384.png │ └── browserconfig.xml ├── .gitignore ├── .gitpod.yml ├── next.config.js ├── docker-compose.yml ├── utils ├── bullconfig.js ├── constants.js ├── scraper.js └── helper.js ├── bull ├── email-queue │ ├── producer.js │ └── consumer.js ├── commons-queue │ ├── producer.js │ └── consumer.js ├── google-books-queue │ ├── producer.js │ └── consumer.js ├── pdl-queue │ ├── producer.js │ └── consumer.js └── trove-queue │ ├── producer.js │ └── consumer.js ├── pages ├── index.js ├── faqs.js ├── googleauth.js ├── stats.js ├── api │ └── auth │ │ └── [...nextauth].js ├── queue.js └── _app.js ├── components ├── BooksWrapper.js ├── Answer.js ├── Question.js ├── FaqsSection.js ├── ChangeIdentifier.js ├── QueueSection.js ├── ShowQueue.js ├── Footer.js ├── Header.js ├── ShowJobInformation.js ├── QueueTable.js └── Books.js ├── .gitpod.Dockerfile ├── PR_TEMPLATE.md ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ ├── bug_report.md │ └── issue_template.md ├── .env.example ├── LICENSE.md ├── FAQ.md ├── styles └── global.less ├── GSSOC_INSTRUCTIONS.md ├── HELP_WANTED.md ├── package.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── hooks └── useMetadataForUI.js ├── README.md └── server.js /public/assets/bub2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/bub2.png -------------------------------------------------------------------------------- /public/assets/web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/web.png -------------------------------------------------------------------------------- /public/assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon.ico -------------------------------------------------------------------------------- /public/assets/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon-16x16.png -------------------------------------------------------------------------------- /public/assets/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/favicon-32x32.png -------------------------------------------------------------------------------- /public/assets/mstile-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/mstile-150x150.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .next 2 | node_modules 3 | .vscode 4 | .env 5 | package-lock.json 6 | out/ 7 | *.log 8 | .env.stage 9 | .env.local -------------------------------------------------------------------------------- /public/assets/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/apple-touch-icon.png -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | tasks: 2 | - init: npm install && npm run build 3 | command: npm run start 4 | image: 5 | file: .gitpod.Dockerfile 6 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | const withLess = require("next-with-less"); 2 | 3 | module.exports = withLess({ 4 | lessLoaderOptions: {}, 5 | }); 6 | -------------------------------------------------------------------------------- /public/assets/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/android-chrome-192x192.png -------------------------------------------------------------------------------- /public/assets/android-chrome-384x384.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderwassananmol/BUB2/HEAD/public/assets/android-chrome-384x384.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | redis: 4 | image: redis 5 | ports: 6 | - "6379:6379" 7 | restart: always -------------------------------------------------------------------------------- /public/assets/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | #da532c 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /utils/bullconfig.js: -------------------------------------------------------------------------------- 1 | const Queue = require('bull'); 2 | require("dotenv").config(); 3 | module.exports = { 4 | getNewQueue: (name) => { 5 | return new Queue(name,{ 6 | redis: { 7 | port: process.env.redisport, 8 | host: process.env.redishost 9 | } 10 | }) 11 | } 12 | } -------------------------------------------------------------------------------- /bull/email-queue/producer.js: -------------------------------------------------------------------------------- 1 | const config = require("../../utils/bullconfig"); 2 | require("./consumer"); 3 | 4 | const EmailQueue = config.getNewQueue("email-queue"); 5 | 6 | module.exports = async (userName, title, trueURI, status) => { 7 | EmailQueue.add({ 8 | userName, 9 | title, 10 | trueURI, 11 | status, 12 | }); 13 | }; 14 | -------------------------------------------------------------------------------- /pages/index.js: -------------------------------------------------------------------------------- 1 | import Header from "../components/Header"; 2 | import Books from "../components/Books"; 3 | const init = () => ( 4 |
5 |
6 |
7 |
8 | 9 |
10 |
11 |
12 | ); 13 | 14 | export default init; 15 | -------------------------------------------------------------------------------- /components/BooksWrapper.js: -------------------------------------------------------------------------------- 1 | import { Box } from "@mui/material"; 2 | 3 | export default function BooksWrapper({ isCommonsMetadataReady, children }) { 4 | return ( 5 | 15 | {children} 16 | 17 | ); 18 | } 19 | -------------------------------------------------------------------------------- /.gitpod.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gitpod/workspace-full 2 | USER gitpod 3 | RUN sudo apt-get update -q && \ 4 | sudo apt-get install -yq redis-server 5 | 6 | # Install custom tools, runtime, etc. using apt-get 7 | # For example, the command below would install "bastet" - a command line tetris clone: 8 | # 9 | # RUN sudo apt-get -q update && # sudo apt-get install -yq bastet && # sudo rm -rf /var/lib/apt/lists/* 10 | # 11 | # More information: https://www.gitpod.io/docs/config-docker/ 12 | -------------------------------------------------------------------------------- /PR_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Fixes: [Link to the Issue] 2 | 3 | ## Proposed Changes 4 | - 5 | - 6 | - 7 | 8 | ## Files Added 9 | - 10 | 2 | 3 | 4 | 5 | ## Expected Behavior 6 | 7 | 8 | ## Current Behavior 9 | 10 | 11 | ## Possible Solution 12 | 13 | 14 | ## Steps to Reproduce 15 | 16 | 17 | 1. 18 | 2. 19 | 3. 20 | 4. 21 | 22 | ## Context (Environment) 23 | 24 | 25 | 26 | 27 | 28 | ## Detailed Description 29 | 30 | 31 | ## Possible Implementation 32 | 33 | -------------------------------------------------------------------------------- /bull/pdl-queue/producer.js: -------------------------------------------------------------------------------- 1 | const config = require("../../utils/bullconfig"); 2 | const PDLQueue = config.getNewQueue("pdl-queue"); 3 | const cheerio = require("cheerio"); // Basically jQuery for node.js 4 | require("./consumer"); 5 | const { getPDLMetaData } = require("../../utils/helper.js"); 6 | 7 | module.exports = async ( 8 | bookid, 9 | IAIdentifier, 10 | categoryID, 11 | email, 12 | userName, 13 | isEmailNotification, 14 | isUploadCommons, 15 | oauthToken, 16 | commonsMetadata 17 | ) => { 18 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookid}&page=1&CategoryID=${categoryID}&Searched=W3GX`; 19 | var options = { 20 | uri, 21 | transform: function (body) { 22 | return cheerio.load(body); 23 | }, 24 | }; 25 | 26 | const metaData = await getPDLMetaData(options, bookid, categoryID); 27 | 28 | metaData["email"] = email; 29 | metaData["userName"] = userName; 30 | metaData["IAIdentifier"] = IAIdentifier; 31 | metaData["isEmailNotification"] = isEmailNotification; 32 | metaData["isUploadCommons"] = isUploadCommons; 33 | metaData["oauthToken"] = oauthToken; 34 | metaData["commonsMetadata"] = commonsMetadata; 35 | const details = { 36 | details: metaData, 37 | }; 38 | PDLQueue.add(details); 39 | }; 40 | -------------------------------------------------------------------------------- /bull/trove-queue/producer.js: -------------------------------------------------------------------------------- 1 | const config = require("../../utils/bullconfig"); 2 | const TroveQueue = config.getNewQueue("trove-queue"); 3 | const { getTroveMetaData } = require("../../utils/helper.js"); 4 | const cheerio = require("cheerio"); // Basically jQuery for node.js 5 | 6 | require("./consumer"); 7 | module.exports = async ( 8 | bookid, 9 | 10 | IAIdentifier, 11 | 12 | metaData, 13 | 14 | email, 15 | 16 | userName, 17 | isEmailNotification, 18 | isUploadCommons, 19 | oauthToken, 20 | commonsMetadata 21 | ) => { 22 | const uri = `https://trove.nla.gov.au/newspaper/article/${bookid}`; 23 | var options = { 24 | uri, 25 | transform: function (body) { 26 | return cheerio.load(body); 27 | }, 28 | }; 29 | 30 | const issueRenditionId = await getTroveMetaData(options); 31 | 32 | metaData["email"] = email; 33 | metaData["issueRenditionId"] = issueRenditionId; 34 | metaData["userName"] = userName; 35 | metaData["IAIdentifier"] = IAIdentifier; 36 | metaData["isEmailNotification"] = isEmailNotification; 37 | metaData["isUploadCommons"] = isUploadCommons; 38 | metaData["oauthToken"] = oauthToken; 39 | metaData["commonsMetadata"] = commonsMetadata; 40 | 41 | const details = { 42 | details: metaData, 43 | }; 44 | TroveQueue.add(details); 45 | }; 46 | -------------------------------------------------------------------------------- /components/ShowQueue.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import Link from "next/link"; 3 | 4 | const ShowQueue = ({ data, library }) => ( 5 |
6 | 20 | 21 | 22 | {library} 23 | 24 | Waiting: {data?.waiting} 25 | 26 | 27 | Active: {data?.active} 28 | 29 | 30 | Completed: {data?.completed} 31 | 32 | 33 | Failed: {data?.failed} 34 | 35 | 36 | Delayed: {data?.delayed} 37 | 38 | 39 | 40 |
41 | ); 42 | 43 | export default ShowQueue; 44 | -------------------------------------------------------------------------------- /GSSOC_INSTRUCTIONS.md: -------------------------------------------------------------------------------- 1 | ## Instructions of GSSOC 2 | 1. This program is for beginners to get started with open source. The only criterion for contribution are your dedication and enthusiasm to contribute to open source. 3 | 2. You can check your contribution scores from the website: 4 | https://www.gssoc.tech/profile.html 5 | 3. Scores will be given as follows as per the issue label on the projects: 6 | * "beginner" : 2 points 7 | * "easy" : 4 points 8 | * "medium" : 7 points 9 | * "hard" : 10 points 10 | 11 | 4. The issues labelled 'taken' are already assigned to other contributors, so do not try to fix those issues until assigned to you. 12 | 5. Please refrain from commenting on the issues that are already assigned to any contributor! 13 | 6. Refer to the issue that a PR solves, so that a PR can be related to an issue. 14 | 7. More issues will be created by the Project admins in the coming weeks so keep on checking the Projects that you would like to contribute! 15 | 8. We request everyone in the Slack community to reply in thread only! 16 | 9. Do not share any personal informations like Contact details, Personal Address, Email IDs etc in public channels! 17 | 10. We request everyone in the community to be kind & respectful as much as you can! Any abuse, hurtful words or wrong language will not be tolerated and the member will be removed immediately without prior warning. 18 | -------------------------------------------------------------------------------- /HELP_WANTED.md: -------------------------------------------------------------------------------- 1 | Following are the list of other sources that can be used to add books to Internet Archive, along with a sample book link and web APIs available on respective websites: 2 | 3 | ### 1. Project Gutenberg 4 | - Website: https://www.gutenberg.org/ 5 | - Sample Link: http://www.gutenberg.org/files/155/155-h/155-h.htm 6 | - Web API for Project Gutenberg ebook metadata: https://gutendex.com (For more info: [click here](https://github.com/garethbjohnson/gutendex)) 7 | 8 | ### 2. Open Book Publishers 9 | - Website: https://www.openbookpublishers.com/ 10 | - Sample Link: https://www.openbookpublishers.com/product/106#0 11 | - API: https://github.com/OpenBookPublishers/obp_institution_api 12 | 13 | ### 3. Library of Congress 14 | - Website: http://www.read.gov/books/ 15 | - Sample Link: http://www.read.gov/books/pageturner/aesops_fables/#page/8/mode/2up 16 | - API: The loc.gov JSON API provides structured data about Library of Congress collections.All URLs start with https://www.loc.gov/ and need to include fo=json as a parameter to get JSON.No API key or authentication is required.(For more info: [click here](https://libraryofcongress.github.io/data-exploration/)) 17 | 18 | ### 4.Bloomsbury Collections 19 | - Website: https://www.bloomsburycollections.com/ 20 | - Sample Link: https://www.bloomsburycollections.com/book/government-communication-cases-and-challenges/#0 21 | 22 | ### 5.Directory of Open Access Journals 23 | - Website: https://doaj.org/ 24 | - Sample Link: https://doaj.org/article/5b3b17834afa41848e52db56c0c9ea78#0 25 | - API: https://doaj.org/api/v1/ (For more info: [click here](https://doaj.org/api/v1/docs)) -------------------------------------------------------------------------------- /components/Footer.js: -------------------------------------------------------------------------------- 1 | export default () => ( 2 |
3 | 15 | 45 | 46 | 51 | 56 |
57 | ); 58 | -------------------------------------------------------------------------------- /utils/constants.js: -------------------------------------------------------------------------------- 1 | export const host = 2 | process.env.NODE_ENV === "production" 3 | ? "https://bub2.wmcloud.org" 4 | : "http://localhost:5000"; //If you have port set in env file, replace 5000 with "process.env.PORT" 5 | export const stats_data_endpoint = `${host}/getstats`; 6 | export const queue_data_endpoint = `${host}/getqueue`; 7 | export const queuelist_data_endpoint = `${host}/getqueuelist`; 8 | export const library = { 9 | gb: "Google Books", 10 | pdl: "Panjab Digital Library", 11 | trove: "Trove Digital Library", 12 | }; 13 | export const permission = `CCO No Rights Reserved https://creativecommons.org/publicdomain/mark/1.0/`; 14 | export const faq_data = [ 15 | { 16 | que: "What is Book Uploader Bot?", 17 | ans: 18 | "A Book Uploader Bot transfers documents from public libraries such as Google Books, and Punjab Digital Library etc to Internet Archive.", 19 | }, 20 | { 21 | que: "What does this tool do?", 22 | ans: 23 | "The tool is built to help the community with free books that are available in the online public libraries. It makes the integration of books easier.", 24 | }, 25 | { 26 | que: "Who can benefit from this tool?", 27 | ans: 28 | "The tool, under the hood, compiles all the book images/pdf and its meta data at one place where it can be accessed by anyone in the world.", 29 | }, 30 | { 31 | que: "What are the future enhancements?", 32 | ans: 33 | "Bulk upload feature, direct upload to Wikimedia Commons and addition of newer libraries are some features being actively worked upon.", 34 | }, 35 | { 36 | que: "Can I upload my own book?", 37 | ans: 38 | "If an appropriate license is attached to the book which allows it to be archived (which doesn't allow copyright infringement suits, etc.), then book can be uploaded.", 39 | }, 40 | { 41 | que: "Can I delete or undo an upload?", 42 | ans: 43 | "Archives are supposed to be read-only and deleting is a 'write' operation. So it is not supported.", 44 | }, 45 | ]; 46 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "BUB2", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "dev": "node server.js", 8 | "build": "next build;", 9 | "start": "npm run build; NODE_ENV=production node server.js" 10 | }, 11 | "husky": { 12 | "hooks": { 13 | "pre-commit": "lint-staged" 14 | } 15 | }, 16 | "lint-staged": { 17 | "*.js": [ 18 | "prettier --write", 19 | "git add" 20 | ] 21 | }, 22 | "keywords": [], 23 | "author": "", 24 | "license": "ISC", 25 | "dependencies": { 26 | "@emotion/react": "^11.11.1", 27 | "@emotion/styled": "^11.11.0", 28 | "@mui/material": "^5.14.12", 29 | "@mui/styles": "^5.14.12", 30 | "@primer/octicons-react": "^9.5.0", 31 | "@wikimedia/codex-icons": "^0.15.0", 32 | "async": "^2.6.3", 33 | "blob-stream": "^0.1.3", 34 | "body-parser": "^1.18.3", 35 | "bull": "^4.11.3", 36 | "cheerio": "^1.0.0-rc.3", 37 | "cli-progress": "^2.1.1", 38 | "compression": "^1.7.4", 39 | "cors": "^2.8.5", 40 | "dotenv": "^6.2.0", 41 | "eslint": "^5.16.0", 42 | "express": "^4.16.4", 43 | "express-fileupload": "^1.1.4", 44 | "fs": "0.0.1-security", 45 | "http": "0.0.0", 46 | "https": "^1.0.0", 47 | "ioredis": "^5.4.1", 48 | "isomorphic-fetch": "^3.0.0", 49 | "isomorphic-unfetch": "^3.0.0", 50 | "jsdom": "^20.0.0", 51 | "jspdf": "^2.5.1", 52 | "jszip": "^3.2.2", 53 | "less": "^4.1.3", 54 | "less-loader": "^11.1.3", 55 | "loaders.css": "^0.1.2", 56 | "lodash": "^4.17.20", 57 | "mwn": "^2.0.1", 58 | "next": "^12.2.5", 59 | "next-auth": "^4.15.1", 60 | "next-with-less": "^3.0.1", 61 | "node-fetch": "^2.2.1", 62 | "nodemailer": "^6.7.8", 63 | "nprogress": "^0.2.0", 64 | "open": "^7.0.3", 65 | "pdf-lib": "^1.17.1", 66 | "pdfkit": "^0.9.1", 67 | "react": "^17.0.2", 68 | "react-dom": "^17.0.2", 69 | "react-loaders": "^3.0.1", 70 | "react-table": "^7.0.0-rc.16", 71 | "request": "^2.88.0", 72 | "request-promise": "^4.2.4", 73 | "scissors": "^0.2.5", 74 | "stream-length": "^1.0.2", 75 | "styled-jsx": "^3.2.1", 76 | "sweetalert2": "^7.33.1", 77 | "url": "^0.11.0", 78 | "winston": "^3.9.0" 79 | }, 80 | "devDependencies": { 81 | "@wikimedia/codex": "^0.13.0", 82 | "@wikimedia/codex-design-tokens": "^0.13.0", 83 | "husky": "^4.2.3", 84 | "lint-staged": "^10.0.9", 85 | "prettier": "2.0.2" 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /pages/googleauth.js: -------------------------------------------------------------------------------- 1 | import Header from "../components/Header"; 2 | import React, { useState } from "react"; 3 | const GoogleAuth = () => { 4 | const CLIENT_ID = 5 | "267327767504-fbtmbl5kkf8m9bjahlv3umu4q74as560.apps.googleusercontent.com"; // Replace with your client ID 6 | const SCOPES = "https://www.googleapis.com/auth/adwords"; 7 | 8 | const [tokens, setTokens] = useState({ 9 | accessToken: null, 10 | refreshToken: null, 11 | }); 12 | 13 | const handleGoogleLogin = () => { 14 | // Create the authorization URL 15 | const authUrl = `https://accounts.google.com/o/oauth2/v2/auth?client_id=${CLIENT_ID}&redirect_uri=${encodeURIComponent( 16 | window.location.origin 17 | )}&response_type=token&scope=${encodeURIComponent( 18 | SCOPES 19 | )}&include_granted_scopes=true`; 20 | 21 | // Open a popup for Google Login 22 | const authWindow = window.open( 23 | authUrl, 24 | "google-auth", 25 | "width=500,height=600" 26 | ); 27 | 28 | // Listen for the authentication response 29 | const pollTimer = setInterval(() => { 30 | try { 31 | if (authWindow.closed) { 32 | clearInterval(pollTimer); 33 | } 34 | 35 | // Check for URL containing tokens 36 | const urlParams = new URLSearchParams( 37 | authWindow.location.hash.replace("#", "?") 38 | ); 39 | console.log(urlParams, "::urlParams"); 40 | if (urlParams.has("access_token")) { 41 | const accessToken = urlParams.get("access_token"); 42 | // Refresh tokens aren't returned in implicit grant flow; use the code flow for a backend server 43 | setTokens({ accessToken, refreshToken: null }); 44 | authWindow.close(); 45 | } 46 | } catch (e) { 47 | // Security constraints: cannot access cross-origin data until redirected to same origin 48 | } 49 | }, 500); 50 | }; 51 | 52 | return ( 53 |
54 |

Google OAuth2 Authentication

55 | 61 | 62 | {tokens.accessToken && ( 63 |
64 |

Tokens Received

65 |

66 | Access Token: {tokens.accessToken} 67 |

68 |

69 | Refresh Token: (Not available in client-side flow) 70 |

71 |
72 | )} 73 |
74 | ); 75 | }; 76 | 77 | export default GoogleAuth; 78 | -------------------------------------------------------------------------------- /utils/scraper.js: -------------------------------------------------------------------------------- 1 | const request = require("request-promise"); 2 | const cheerio = require("cheerio"); 3 | 4 | export const PdlDetails = async (id, categoryId) => { 5 | const pdlURI = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${id}&page=1&CategoryID=${categoryId}&Searched=W3GX`; 6 | const response = await request({ 7 | uri: pdlURI, 8 | headers: { 9 | Accept: 10 | "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", 11 | "Accept-Encoding": "gzip, deflate", 12 | "Accept-Language": "en-US,en;q=0.9,hi;q=0.8", 13 | Host: "www.panjabdigilib.org", 14 | "Upgrade-Insecure-Requests": 1, 15 | }, 16 | }); 17 | 18 | let $ = cheerio.load(response); 19 | let title = $( 20 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(1)>td>a" 21 | ) 22 | .text() 23 | .trim(); 24 | let authorLabel = $( 25 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(3)>td>table>tbody>tr>td:nth-child(1)" 26 | ) 27 | .text() 28 | .trim(); 29 | let author = $( 30 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(3)>td>table>tbody>tr>td:nth-child(2)>a" 31 | ) 32 | .text() 33 | .trim(); 34 | let description = $( 35 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>div>table>tbody>tr:nth-child(2)>td>table>tbody>tr:nth-child(1)>td:nth-child(2)" 36 | ) 37 | .text() 38 | .trim(); 39 | let preview = $( 40 | "#Nanakshahi>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(2)>table:nth-child(22)>tbody>tr:nth-child(3)>td>table>tbody>tr:nth-child(2)>td>table>tbody>tr>td:nth-child(1)>table>tbody>tr:nth-child(1)>td>table>tbody>tr>td>a>img" 41 | ) 42 | .attr("src") 43 | .slice(8); 44 | 45 | const details = { 46 | title: title, 47 | author: authorLabel != "Author" ? "" : author, 48 | description: 49 | description == "Click here to add description" 50 | ? "No description available" 51 | : description, 52 | preview: "http://www.panjabdigilib.org" + preview, 53 | }; 54 | 55 | return details; 56 | }; 57 | -------------------------------------------------------------------------------- /bull/email-queue/consumer.js: -------------------------------------------------------------------------------- 1 | const config = require("../../utils/bullconfig"); 2 | const EmailQueue = config.getNewQueue("email-queue"); 3 | const winston = require("winston"); 4 | const logger = winston.loggers.get("defaultLogger"); 5 | require("dotenv").config(); 6 | const { Mwn } = require("mwn"); 7 | 8 | function generateMessage(status, title, trueURI) { 9 | let message; 10 | if (status.archive && status.commons) { 11 | message = `Your file "${title}" has been uploaded to Internet Archive and Wikimedia Commons successfully. Take a look at, Internet Archive - ${trueURI.archiveLink}, Wikimedia Commons - ${trueURI.commonsLink}`; 12 | } else if (status.archive && !status.commons) { 13 | message = `Your file "${title}" has been uploaded to Internet Archive successfully! Take a look at ${trueURI}`; 14 | } else if (!status.archive && !status.commons) { 15 | message = `Your file "${title}" was not uploaded to Internet Archive! Please try again later.`; 16 | } 17 | return message; 18 | } 19 | 20 | async function mediawikiEmail(username, title, trueURI, status) { 21 | try { 22 | const bot = await Mwn.init({ 23 | apiUrl: process.env.NEXT_PUBLIC_WIKIMEDIA_URL + "/w/api.php", 24 | username: process.env.EMAIL_BOT_USERNAME, 25 | password: process.env.EMAIL_BOT_PASSWORD, 26 | // Set your user agent (required for WMF wikis, see https://meta.wikimedia.org/wiki/User-Agent_policy): 27 | userAgent: "BUB2/1.0 (https://bub2.wmcloud.org)", 28 | // Set default parameters to be sent to be included in every API request 29 | defaultParams: { 30 | assert: "user", // ensure we're logged in 31 | }, 32 | }); 33 | 34 | const csrf_token = await bot.getCsrfToken(); 35 | 36 | bot 37 | .request({ 38 | action: "emailuser", 39 | target: username, 40 | subject: "BUB2 upload status", 41 | text: generateMessage(status, title, trueURI), 42 | token: csrf_token, 43 | format: "json", 44 | }) 45 | .then((data) => { 46 | logger.log({ 47 | level: "info", 48 | message: `Email Sent Successfully! Result : ${data}`, 49 | }); 50 | return 200; 51 | }) 52 | .catch((error) => { 53 | logger.log({ 54 | level: "error", 55 | message: `Failed to send email with error: ${error}`, 56 | }); 57 | return error; 58 | }); 59 | } catch (error) { 60 | logger.log({ 61 | level: "error", 62 | message: `mediawikiEmail: ${JSON.stringify(error)}`, 63 | }); 64 | return error; 65 | } 66 | } 67 | 68 | EmailQueue.process(async (job, done) => { 69 | const emailResponse = await mediawikiEmail( 70 | job.data.userName, 71 | job.data.title, 72 | job.data.trueURI, 73 | job.data.status 74 | ); 75 | if (emailResponse !== 200) { 76 | logger.log({ 77 | level: "error", 78 | message: `EmailQueue: ${JSON.stringify(emailResponse)}`, 79 | }); 80 | done(new Error(`EmailQueue: ${emailResponse}`)); 81 | } 82 | done(null, true); 83 | }); 84 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Book Uploader Bot Code of Conduct 2 | In order to promote an open and welcoming atmosphere, we as contributors and maintainers commit ourselves to make involvement in our project and in our group a harassment-free experience for all irrespective of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 3 | 4 | ## Our Standards 5 | Examples of behaviour which contributes to the development of a positive environment include: 6 | * There should be clear communication. 7 | * Listen to everyone's ideas. 8 | * Learn all the aspects before coming to conclusion. 9 | * Using inoffensive language 10 | * Attaining positive work environment should be one of everybody's goals 11 | 12 | Examples of unacceptable participant actions include: 13 | * Use of unethical means to attain an end. 14 | * Putting self interest above group interest. 15 | * Disrespecting an individual on the basis of gender, religion or culture. 16 | * Publishing private details of people, such as physical or email addresses, without their prior consent 17 | * Undertaking the action of insider trading 18 | 19 | ## Our Responsibilities 20 | Authorities are responsible for creating an environment where everyone is treated equally and strict actions should be taken in case of unacceptable behaviour. 21 | Authorities have the duty and responsibility to delete, modify, or reject comments, code, wiki updates, problems and other contributions that are not consistent with this Code of Conduct, or temporarily or permanently ban any user for other actions that they find unacceptable, disruptive, offensive or harmful. 22 | 23 | ## Scope 24 | This code of conduct is applicable to all the content on Book Uploader website and all other websites which are linked to it whether offline or online. 25 | The code of conduct is also linked within project spaces and in public spaces where an individual is representing Book Uploader or its community. 26 | 27 | ## Conflict Resolution 28 | Conflict in most cases arises when there is a difference of opinion and people involved are not in a state to listen/understand the point of view of the other person. The severity of conflicts could vary from a mere disagreement to disrespectful exchange of words to more intense situations like physical violence. If you are experiencing any issue, we encourage you to use following strategies: 29 | * Fix the alleged dispute directly with the parties concerned, preferably in a real-time medium. 30 | * If this fails, get a third party (e.g. a mutual friend, and/or someone with background on the issue, but not involved in the conflict) to intervene. 31 | * If you are still unable to resolve the conflict, and you believe it rises to harassment or another code of conduct violation, report it. 32 | 33 | 34 | ## Reporting Violations 35 | Violations of the Code of Conduct can be reported to Book Uploader's concerned team. Then the team will investigate whether the Code of Conduct was violated and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report. 36 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Finding issues and new tasks 2 | - All the issues and tasks are maintained on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R). 3 | - Make sure to assign an issue to yourself from [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R) before working on it. 4 | 5 | ## Fork 6 | You are required to fork this repository and make changes on the forked repository on a new branch other than *develop* and after making all the changes, make a Pull Request to develop branch. 7 | - After you have the project working on your local machine (refer [README.md](https://github.com/coderwassananmol/BUB2/blob/develop/README.md)), make sure you keep your fork up to date by tracking the original "upstream" repo that you forked. To do this, you'll need to add a remote: 8 | ``` 9 | # Add 'upstream' repo to list of remotes 10 | $ git remote add upstream https://github.com/coderwassananmol/BUB2.git 11 | 12 | # Verify the new remote named 'upstream' 13 | $ git remote -v 14 | ``` 15 | - Whenever you want to update your fork with the latest upstream changes, take pull from the upstream repo to your fork in order to keep it at par with the main project by: 16 | ``` 17 | $ git pull upstream develop 18 | ``` 19 | - Before making any contribution. Create seperate branch using command: 20 | ``` 21 | # It will create a new branch with name Branch_Name and switch to that branch 22 | $ git checkout -b Branch_Name 23 | ``` 24 | - After you've made changes or made your contribution to the project add changes to the branch you've just created by: 25 | ``` 26 | # To add all new files to branch Branch_Name 27 | $ git add . 28 | ``` 29 | - Commit messages should follow a [certain guideline](https://udacity.github.io/git-styleguide/). To commit, give a descriptive message for the convenience of reveiwer by: 30 | ``` 31 | # This message get associated with all files you have changed 32 | $ git commit -m 'message' 33 | ``` 34 | **NOTE**: A PR should have only one commit. Multiple commits should be squashed. 35 | - Now you are ready to push your work to the remote repository: 36 | ``` 37 | # To push your work to your remote repository 38 | $ git push -u origin Branch_Name 39 | ``` 40 | ## How to raise a pull request 41 | - Create a Pull Request to merge your branch with the **develop branch** and mention the link to the **Phabricator ticket** you worked on in the description of the Pull Request. 42 | - Once the Pull Request is open, provide the link to it within the comments section of the respective **Phabricator task**, as illustrated in [this sample](https://phabricator.wikimedia.org/T344119). 43 | 44 | ## Code Reviews 45 | - All submissions should come in the form of a PR and it must be reviewed by at least one reviewer before it gets merged. 46 | 47 | ## Did you find a bug? 48 | 49 | * **Ensure the bug was not already reported** by searching on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R). 50 | 51 | * If the bug is not already reported, create a **New Bug Report**. Make sure to include **title, clear description, tags**, and as much relevant information as possible, and a **code sample** or an **executable test case** demonstrating the expected behavior that is not occurring. 52 | 53 | 54 | -------------------------------------------------------------------------------- /pages/stats.js: -------------------------------------------------------------------------------- 1 | import Header from "../components/Header"; 2 | import ShowQueue from "../components/ShowQueue"; 3 | import fetch from "isomorphic-fetch"; 4 | import { stats_data_endpoint, library } from "../utils/constants"; 5 | import { useState } from "react"; 6 | import Link from "next/link"; 7 | 8 | const emptyObject = { 9 | waiting: 0, 10 | active: 0, 11 | failed: 0, 12 | completed: 0, 13 | delayed: 0, 14 | }; 15 | const Stats = (props) => { 16 | const [queueName, setQueueName] = useState("gb"); 17 | const onChange = (event) => { 18 | setQueueName(event.target.value); 19 | }; 20 | return ( 21 |
22 | 36 |
37 |
38 |
39 |
40 |

Select a Queue

41 | 48 | 56 | 92 |
93 |
94 |
95 |
96 | ); 97 | }; 98 | 99 | export async function getServerSideProps() { 100 | const resp = await fetch(stats_data_endpoint); 101 | if (resp.status !== 200) { 102 | return {}; 103 | } 104 | const data = await resp.json(); 105 | return { props: { data } }; 106 | // Pass data to the page via props 107 | } 108 | 109 | export default Stats; 110 | -------------------------------------------------------------------------------- /bull/commons-queue/consumer.js: -------------------------------------------------------------------------------- 1 | const config = require("../../utils/bullconfig"); 2 | const CommonsQueue = config.getNewQueue("commons-queue"); 3 | const winston = require("winston"); 4 | const { 5 | downloadFile, 6 | uploadToCommons, 7 | uploadToWikiData, 8 | convertZipToPdf, 9 | } = require("../../utils/helper"); 10 | const JSZip = require("jszip"); 11 | const logger = winston.loggers.get("defaultLogger"); 12 | 13 | CommonsQueue.process(async (job, done) => { 14 | try { 15 | if (job.data.type === "pdlZip") { 16 | const zipBuffer = Buffer.from(job.data.downloadFileURL, "base64"); 17 | const zip = await JSZip.loadAsync(zipBuffer); 18 | const convertZipToPdfRes = await convertZipToPdf( 19 | zip, 20 | "commonsFilePayload.pdf" 21 | ); 22 | if (convertZipToPdfRes.status !== 200) { 23 | logger.log({ 24 | level: "error", 25 | message: `convertZipToPdfRes: ${JSON.stringify(convertZipToPdfRes)}`, 26 | }); 27 | process.emit("commonsJobComplete", { 28 | status: false, 29 | value: null, 30 | }); 31 | return done(null, true); 32 | } 33 | const commonsResponse = await uploadToCommons(job.data.metadata); 34 | 35 | if (commonsResponse.fileUploadStatus !== 200) { 36 | logger.log({ 37 | level: "error", 38 | message: `uploadToCommons: ${commonsResponse}`, 39 | }); 40 | process.emit(`commonsJobComplete:${job.id}`, { 41 | status: false, 42 | value: null, 43 | }); 44 | return done(null, true); 45 | } 46 | process.emit(`commonsJobComplete:${job.id}`, { 47 | status: true, 48 | value: commonsResponse, 49 | }); 50 | return done(null, true); 51 | } else { 52 | const url = 53 | job.data?.metadata?.uri || 54 | job.data?.downloadFileURL?.uri || 55 | job.data?.metadata?.pdfUrl; 56 | const downloadFileRes = await downloadFile(url, "commonsFilePayload.pdf"); 57 | 58 | if (downloadFileRes.writeFileStatus !== 200) { 59 | logger.log({ 60 | level: "error", 61 | message: `downloadFile: ${downloadFileRes}`, 62 | }); 63 | process.emit(`commonsJobComplete:${job.id}`, { 64 | status: false, 65 | value: null, 66 | }); 67 | return done(null, true); 68 | } 69 | const commonsResponse = await uploadToCommons(job.data.metadata); 70 | 71 | if (commonsResponse.fileUploadStatus !== 200) { 72 | logger.log({ 73 | level: "error", 74 | message: `uploadToCommons: ${commonsResponse}`, 75 | }); 76 | process.emit(`commonsJobComplete:${job.id}`, { 77 | status: false, 78 | value: null, 79 | }); 80 | return done(new Error(commonsResponse)); 81 | } 82 | const wikiDataResponse = await uploadToWikiData( 83 | job.data.metadata, 84 | commonsResponse.filename, 85 | job.data.libraryName 86 | ); 87 | if (wikiDataResponse !== 404) { 88 | process.emit(`commonsJobComplete:${job.id}`, { 89 | status: true, 90 | value: { 91 | commons: commonsResponse, 92 | wikidata: wikiDataResponse, 93 | }, 94 | }); 95 | } else { 96 | process.emit(`commonsJobComplete:${job.id}`, { 97 | status: true, 98 | value: { 99 | commons: commonsResponse, 100 | wikidata: 404, 101 | }, 102 | }); 103 | } 104 | return done(null, true); 105 | } 106 | } catch (error) { 107 | logger.log({ 108 | level: "error", 109 | message: err, 110 | }); 111 | console.log(error, "::errorCommonsQueue"); 112 | } 113 | }); 114 | -------------------------------------------------------------------------------- /pages/api/auth/[...nextauth].js: -------------------------------------------------------------------------------- 1 | import NextAuth from "next-auth"; 2 | import WikimediaProvider from "next-auth/providers/wikimedia"; 3 | import winston from "winston"; 4 | const logger = winston.loggers.get("defaultLogger"); 5 | 6 | async function refetchAccessToken(refreshToken) { 7 | try { 8 | const response = await fetch( 9 | process.env.NEXT_PUBLIC_WIKIMEDIA_URL + "/w/rest.php/oauth2/access_token", 10 | { 11 | method: "POST", 12 | headers: { 13 | "Content-Type": "application/x-www-form-urlencoded", 14 | }, 15 | body: new URLSearchParams({ 16 | grant_type: "refresh_token", 17 | refresh_token: refreshToken, 18 | client_id: process.env.WIKIMEDIA_CLIENT_ID, 19 | client_secret: process.env.WIKIMEDIA_CLIENT_SECRET, 20 | }), 21 | } 22 | ); 23 | return await response.json(); 24 | } catch (error) { 25 | logger.log({ 26 | level: "error", 27 | message: `refetchAccessToken: ${error}`, 28 | }); 29 | throw error; 30 | } 31 | } 32 | 33 | export const authOptions = { 34 | providers: [ 35 | WikimediaProvider({ 36 | clientId: process.env.WIKIMEDIA_CLIENT_ID, 37 | clientSecret: process.env.WIKIMEDIA_CLIENT_SECRET, 38 | token: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/access_token`, 39 | userinfo: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/resource/profile`, 40 | authorization: { 41 | url: `${process.env.NEXT_PUBLIC_WIKIMEDIA_URL}/w/rest.php/oauth2/authorize`, 42 | }, 43 | }), 44 | ], 45 | session: { 46 | jwt: true, 47 | }, 48 | debug: process.env.NODE_ENV !== "production", 49 | logger: { 50 | debug(code, metadata) { 51 | // store logs for every user logging in using OAuth 52 | logger.log({ 53 | level: "info", 54 | message: metadata, 55 | }); 56 | if (code === "OAUTH_CALLBACK_RESPONSE" && metadata.account.access_token) { 57 | logger.log({ 58 | level: "info", 59 | message: `User ${metadata.profile.name} logged in using ${ 60 | metadata.account.provider.charAt(0).toUpperCase() + 61 | metadata.account.provider.slice(1) 62 | } OAuth`, 63 | }); 64 | } 65 | }, 66 | error(code, metadata) { 67 | // store logs of aborted logins by users using OAuth 68 | if (code === "OAUTH_CALLBACK_HANDLER_ERROR") { 69 | logger.log({ 70 | level: "error", 71 | message: `[${code}] ${metadata.error_description}`, 72 | }); 73 | } 74 | }, 75 | }, 76 | 77 | callbacks: { 78 | async jwt({ token, account }) { 79 | const threeHoursThirtyMinutesInMilliseconds = 12600000; 80 | if (account) { 81 | token.accessToken = account.access_token; 82 | token.refreshToken = account.refresh_token; 83 | token.expiresIn = Date.now() + threeHoursThirtyMinutesInMilliseconds; 84 | } 85 | // Refresh the token if it's expired 86 | if (token.expiresIn && Date.now() > token.expiresIn) { 87 | try { 88 | const new_session = await refetchAccessToken(token.refreshToken); 89 | if (new_session.access_token) { 90 | token.accessToken = new_session.access_token; 91 | token.refreshToken = new_session.refresh_token; 92 | token.expiresIn = 93 | Date.now() + threeHoursThirtyMinutesInMilliseconds; 94 | } else { 95 | token.expired = true; 96 | } 97 | return token; 98 | } catch (error) { 99 | logger.log({ 100 | level: "error", 101 | message: `jwt callback: ${error}`, 102 | }); 103 | } 104 | } 105 | return token; 106 | }, 107 | async session({ session, token, user }) { 108 | // Add the access token to the session object 109 | if (token.expired === true) { 110 | session.expired = true; 111 | } else { 112 | session.accessToken = token.accessToken; 113 | session.expiresIn = token.expiresIn; 114 | } 115 | return session; 116 | }, 117 | }, 118 | }; 119 | 120 | export default (req, res) => NextAuth(req, res, authOptions); 121 | -------------------------------------------------------------------------------- /components/Header.js: -------------------------------------------------------------------------------- 1 | import React, { useState } from "react"; 2 | import Link from "next/link"; 3 | import { signOut } from "next-auth/react"; 4 | import { useSession } from "next-auth/react"; 5 | 6 | function Header(props) { 7 | const [isDropDownOpen, setIsDropDownOpen] = useState(false); 8 | const { data: session } = useSession(); 9 | 10 | const toggleDropDown = () => { 11 | setIsDropDownOpen(!isDropDownOpen); 12 | }; 13 | 14 | return ( 15 |
16 | 49 |
50 |
51 |
    56 | 57 | 70 | 71 | 72 | 85 | 86 | 87 | 100 | 101 | 102 | 115 | 116 |
117 | {session && ( 118 |
119 | 126 | {isDropDownOpen && ( 127 |
128 |
signOut()} 131 | > 132 |

Logout

133 | 134 |
135 |
136 | )} 137 |
138 | )} 139 |
140 |
141 |
142 | ); 143 | } 144 | export default Header; 145 | -------------------------------------------------------------------------------- /components/ShowJobInformation.js: -------------------------------------------------------------------------------- 1 | import { useRouter } from "next/router"; 2 | import { useEffect, useState, useRef } from "react"; 3 | import _ from "lodash"; 4 | import { 5 | Card, 6 | CardActionArea, 7 | CardActions, 8 | CardContent, 9 | CardMedia, 10 | Button, 11 | Typography, 12 | } from "@mui/material"; 13 | import Link from "next/link"; 14 | import { CircularProgress } from "@mui/material"; 15 | import { host } from "../utils/constants"; 16 | 17 | const ShowJobInformation = (props) => { 18 | const styles = { 19 | root: { 20 | maxWidth: 365, 21 | height: "fit-content", 22 | }, 23 | cardContentContainer: { 24 | height: "200px", 25 | overflow: "auto", 26 | }, 27 | cardContainer: { 28 | display: "flex", 29 | justifyContent: "center", 30 | alignContent: "center", 31 | marginTop: "0px", 32 | }, 33 | uploadProgress: { 34 | marginLeft: "16.5px", 35 | }, 36 | button: { 37 | fontSize: "11px", 38 | }, 39 | cardImage: { 40 | maxHeight: "400px", 41 | }, 42 | }; 43 | 44 | const router = useRouter(); 45 | 46 | const [data, setData] = useState({ 47 | title: "", 48 | description: "", 49 | previewLink: "https://bub2.wmcloud.org", 50 | imageLinks: {}, 51 | uploadStatus: { 52 | isUploaded: false, 53 | uploadLink: "", 54 | }, 55 | queueName: props.queue_name, 56 | wikimedia_links: "", 57 | }); 58 | 59 | const [progress, setProgress] = useState(0); 60 | 61 | const [loading, setLoading] = useState(false); 62 | 63 | useEffect(() => { 64 | try { 65 | if (props.queue_name && props.job_id) { 66 | setLoading(true); 67 | fetch( 68 | `${host}/getJobInformation?queue_name=${props.queue_name}&job_id=${props.job_id}` 69 | ) 70 | .then((resp) => resp.json()) 71 | .then((resp) => { 72 | setData(resp); 73 | setProgress(resp.progress); 74 | }) 75 | .catch((err) => console.error(err)); 76 | } 77 | } catch (err) { 78 | console.log(err, "::err"); 79 | } finally { 80 | setLoading(false); 81 | } 82 | }, [props.queue_name, props.job_id]); 83 | 84 | if (loading) { 85 | return ; 86 | } else { 87 | return ( 88 |
89 | 90 | 91 | 98 | 0 101 | ? styles.cardContentContainer 102 | : null 103 | } 104 | > 105 | 106 | {data.title} 107 | 108 | 109 | {data.description} 110 | 111 | 112 | 117 | Upload Progress: {progress} 118 | 119 | 120 | 121 | 122 | {data.uploadStatus.isUploaded ? ( 123 | 124 | 132 | 133 | ) : null} 134 | 135 | 143 | 144 | 145 | 146 | 147 | {data.wikimedia_links.commons !== "Not Integrated" ? ( 148 | 152 | 160 | 161 | ) : null} 162 | 163 | 164 | {data.wikimedia_links.wikidata !== "Not Integrated" ? ( 165 | 169 | 177 | 178 | ) : null} 179 | 180 | 181 |
182 | ); 183 | } 184 | }; 185 | 186 | export default ShowJobInformation; 187 | -------------------------------------------------------------------------------- /pages/queue.js: -------------------------------------------------------------------------------- 1 | import { useRouter } from "next/router"; 2 | import Header from "../components/Header"; 3 | import QueueSection from "../components/QueueSection"; 4 | import QueueTable from "../components/QueueTable"; 5 | import { host, queue_data_endpoint } from "../utils/constants"; 6 | import { useEffect, useState } from "react"; 7 | 8 | const Queue = ({ data }) => { 9 | const router = useRouter(); 10 | const [queueName, setQueueName] = useState("gb"); 11 | const [tableDataArchive, setTableDataArchive] = useState([]); 12 | const [searchResult, setSearchResult] = useState([]); 13 | const [isSearch, setIsSearch] = useState(false); 14 | // initially, the page itself is refreshed every 15 seconds (according to GB queue) 15 | const [refreshSSPropsInterval, setSSPropsInterval] = useState(15000); 16 | const onChange = (event) => { 17 | setQueueName(event.target.value); 18 | // refresh server side props on queue change 19 | router.replace(router.asPath); 20 | // This time interval has been chosed based on speed of upload 21 | // For GB Queue, refresh server side props every 15 seconds 22 | // For PDL, refresh server side props every 50 seconds 23 | // For Trove, refresh server side props every 30 seconds 24 | if (event.target.value === "gb") { 25 | setSSPropsInterval(15000); 26 | } else if (event.target.value === "pdl") { 27 | setSSPropsInterval(50000); 28 | } else if (event.target.value === "trove") { 29 | setSSPropsInterval(30000); 30 | } 31 | }; 32 | 33 | /** 34 | * The `onSearch` function filters the table data based on a search parameter(Book-title, username or status) and updates the 35 | * searchResult state which then gets passed to the QueueTable Component. If the search parameter is empty, all the table data is set to the searchResult state and returned to the QueueTable Component without filtering. 36 | * The unfiltered tableData is stored in the tableDataArchive state and is used to reset the search if the search parameter is empty. 37 | */ 38 | const onSearch = (e) => { 39 | const searchParam = e.target.value.toLowerCase(); 40 | 41 | if (searchParam === "") { 42 | setIsSearch(false); 43 | setSearchResult(tableDataArchive); 44 | return; 45 | } 46 | 47 | setIsSearch(true); 48 | const filteredData = tableDataArchive.filter((item) => { 49 | return ( 50 | item.title.toLowerCase().includes(searchParam) || 51 | item.userName.toLowerCase().includes(searchParam) || 52 | item.status.toLowerCase().includes(searchParam) || 53 | item.id.toString().includes(searchParam) 54 | ); 55 | }); 56 | setSearchResult(filteredData); 57 | }; 58 | 59 | const fetchQueueData = () => { 60 | if (queueName) 61 | fetch(`${host}/allJobs?queue_name=${queueName}`) 62 | .then((resp) => resp.json()) 63 | .then((resp) => { 64 | setTableDataArchive(resp); 65 | setSearchResult(resp); 66 | }); 67 | }; 68 | 69 | // This useEffect runs on page load and 70 | // is responsible for fetching the initial 71 | // queue data 72 | useEffect(() => { 73 | if (queueName) { 74 | fetchQueueData(); 75 | } 76 | }, [queueName]); 77 | 78 | // This useEffect runs every `refreshSSPropsInterval` milliseconds 79 | // to refresh the server side props which contain 80 | // details about books in the active and waiting queue 81 | // Condition: only when queueName changes 82 | useEffect(() => { 83 | const intervalId = setInterval(() => { 84 | router.replace(router.asPath); 85 | }, refreshSSPropsInterval); 86 | // clear the setInterval 87 | return () => clearInterval(intervalId); 88 | }, [queueName]); 89 | 90 | // This useEffect runs every 5000 milliseconds 91 | // to refresh the queue itself, thereby providing 92 | // user with near real time upload progress 93 | // Condition: only when queue is active 94 | useEffect(() => { 95 | if ( 96 | data[`${queueName}-queue`]["active"] === null && 97 | data["commons-queue"]["active"] === null 98 | ) { 99 | return; 100 | } 101 | const intervalId = setInterval(() => { 102 | if (data[`${queueName}-queue`]["active"] !== null) { 103 | fetchQueueData(); 104 | } 105 | }, 5000); 106 | // clear the setInterval 107 | return () => clearInterval(intervalId); 108 | }, [data[`${queueName}-queue`]["active"]]); 109 | 110 | return ( 111 |
116 |
117 |
118 |
119 |

Select a Queue

120 | 127 | 131 | 132 |
133 |
139 |
140 |
144 | onSearch(e)} 146 | className="cdx-text-input__input" 147 | type="search" 148 | placeholder="Search by Job ID, Title, Username or Status" 149 | style={{ 150 | height: "48px", 151 | width: "100%", 152 | }} 153 | /> 154 | 155 |
156 |
157 |
158 |
159 | 164 |
165 |
166 |
167 | ); 168 | }; 169 | 170 | export async function getServerSideProps() { 171 | const resp = await fetch(queue_data_endpoint); 172 | const data = await resp.json(); 173 | return { props: { data } }; 174 | // Pass data to the page via props 175 | } 176 | 177 | export default Queue; 178 | -------------------------------------------------------------------------------- /pages/_app.js: -------------------------------------------------------------------------------- 1 | import Head from "next/head"; 2 | // import global styles 3 | import "./../styles/global.less"; 4 | import Footer from "../components/Footer"; 5 | import { SessionProvider, useSession, signOut } from "next-auth/react"; 6 | import { useEffect } from "react"; 7 | 8 | function SessionWrapper({ children }) { 9 | const { data: session } = useSession(); 10 | useEffect(() => { 11 | console.log(session, "::session"); 12 | if (session?.expired === true) { 13 | signOut({ redirect: false }); 14 | } 15 | }, [session?.accessToken]); 16 | return <>{children}; 17 | } 18 | 19 | function MyApp({ Component, pageProps }) { 20 | return ( 21 |
22 | 176 | 177 | 181 | {/* include bootstrap stylesheet */} 182 | 188 | Book Uploader Bot 189 | {/* Favicons */} 190 | 194 | 195 | 196 | 201 | 205 | 206 | 207 | 208 | 209 | 210 | 211 |
212 |
213 | ); 214 | } 215 | 216 | export default MyApp; 217 | -------------------------------------------------------------------------------- /hooks/useMetadataForUI.js: -------------------------------------------------------------------------------- 1 | import { host, permission } from "../utils/constants"; 2 | 3 | export default function useMetadataForUI() { 4 | const getMetadataForUI = async ( 5 | library, 6 | bookID, 7 | categoryID = null, 8 | IAIdentifier = "" 9 | ) => { 10 | try { 11 | switch (library) { 12 | case "gb": 13 | const gbRes = await fetch( 14 | `${host}/getMetadata?option=${"gb"}&bookID=${bookID}` 15 | ); 16 | const gbMetadata = await gbRes.json(); 17 | let { 18 | title: gb_title, 19 | subtitle: gb_subtitle, 20 | authors: gb_authors, 21 | publisher: gb_publisher, 22 | publishedDate: gb_publishedDate, 23 | language: gb_language, 24 | pageCount: gb_pageCount, 25 | infoLink: gb_infoLink, 26 | } = gbMetadata.volumeInfo; 27 | 28 | const gb_authorsFormatted = gb_authors 29 | ? gb_authors.join().trim() 30 | : ""; 31 | const gb_commonsMetadata = `== {{int:filedesc}} == 32 | 33 | {{Book 34 | | Author = ${gb_authorsFormatted}\n 35 | | Translator =\n 36 | | Editor =\n 37 | | Illustrator =\n 38 | | Title = ${gb_title || ""}\n 39 | | Series title =\n 40 | | Volume =\n 41 | | Edition =\n 42 | | Publisher = ${gb_publisher || ""}\n 43 | | Printer =\n 44 | | Publication date = ${gb_publishedDate || ""}\n 45 | | City =\n 46 | | Language = ${gb_language || ""}\n 47 | | Description = ${gb_subtitle || ""}\n 48 | | Source = ${gb_infoLink || ""}\n 49 | | Permission = ${permission}\n 50 | | Image =\n 51 | | Image page =\n 52 | | Pageoverview =\n 53 | | Wikisource =\n 54 | | Homecat =\n 55 | | Other_versions =\n 56 | | ISBN =\n 57 | | LCCN =\n 58 | | OCLC =\n 59 | | References =\n 60 | | Linkback =\n 61 | | Wikidata =\n 62 | | noimage =\n 63 | | Other_fields_1 = {{Information field|name=Rights|value=${ 64 | gbMetadata.accessInfo.accessViewStatus || "" 65 | }|name=Pages|value=${gb_pageCount || ""}}} 66 | }} 67 | 68 | == {{int:license-header}} == 69 | 70 | {{PD-scan}} 71 | 72 | [[Category:Files uploaded with BUB2]] 73 | `; 74 | return gb_commonsMetadata.replace(/&/g, "_"); 75 | case "trove": 76 | const troveRes = await fetch( 77 | `${host}/getMetadata?option=${"trove"}&bookID=${bookID}` 78 | ); 79 | const troveJson = await troveRes.json(); 80 | const troveMetadata = troveJson.article; 81 | let { 82 | title: trove_title, 83 | date: trove_date, 84 | troveUrl: trove_url, 85 | page: trove_page, 86 | identifier: trove_identifier, 87 | heading: trove_heading, 88 | category: trove_category, 89 | } = troveMetadata; 90 | 91 | const trove_commonsMetadata = `== {{int:filedesc}} == 92 | 93 | {{Book 94 | | Author =\n 95 | | Translator =\n 96 | | Editor =\n 97 | | Illustrator =\n 98 | | Title = ${trove_heading || ""}\n 99 | | Series title =\n 100 | | Volume =\n 101 | | Edition =\n 102 | | Publisher =\n 103 | | Printer =\n 104 | | Publication date = ${trove_date || ""}\n 105 | | City =\n 106 | | Language =\n 107 | | Description = ${trove_title.value || ""}\n 108 | | Source = ${trove_url || ""}\n 109 | | Permission = ${permission}\n 110 | | Image =\n 111 | | Image page =\n 112 | | Pageoverview =\n 113 | | Wikisource =\n 114 | | Homecat =\n 115 | | Other_versions =\n 116 | | ISBN =\n 117 | | LCCN =\n 118 | | OCLC =\n 119 | | References =\n 120 | | Linkback =\n 121 | | Wikidata =\n 122 | | noimage =\n 123 | | Other_fields_1 = {{Information field|name=Identifier|value=${ 124 | trove_identifier || "" 125 | }|name=Pages|value=${trove_page || ""}|name=Category|value=${ 126 | trove_category || "" 127 | }}} 128 | }} 129 | 130 | == {{int:license-header}} == 131 | 132 | {{PD-scan}} 133 | 134 | [[Category:Files uploaded with BUB2]] 135 | `; 136 | return trove_commonsMetadata; 137 | case "pdl": 138 | const pdlRes = await fetch( 139 | `${host}/getMetadata?option=${"pdl"}&bookID=${bookID}&categoryID=${categoryID}&IAIdentifier=${IAIdentifier}` 140 | ); 141 | const pdlMetadata = await pdlRes.json(); 142 | let { 143 | Script: pdl_script, 144 | Langauge: pdl_language, 145 | Publisher: pdl_publisher, 146 | Pages: pdl_pages, 147 | description: pdl_description, 148 | title: pdl_title, 149 | coverImage: pdl_coverImage, 150 | pdfUrl: pdl_pdfUrl, 151 | IAIdentifier: pdl_identifier, 152 | } = pdlMetadata; 153 | const pdl_commonsMetadata = `== {{int:filedesc}} == 154 | 155 | {{Book 156 | | Author =\n 157 | | Translator =\n 158 | | Editor =\n 159 | | Illustrator =\n 160 | | Title = ${pdl_title || ""}\n 161 | | Series title =\n 162 | | Volume =\n 163 | | Edition =\n 164 | | Publisher = ${pdl_publisher || ""}\n 165 | | Printer =\n 166 | | Publication date =\n 167 | | City =\n 168 | | Language = ${pdl_language || ""}\n 169 | | Description = ${pdl_description || ""}\n 170 | | Source = ${pdl_pdfUrl || ""}\n 171 | | Permission = ${permission}\n 172 | | Image = ${pdl_coverImage || ""}\n 173 | | Image page =\n 174 | | Pageoverview =\n 175 | | Wikisource =\n 176 | | Homecat =\n 177 | | Other_versions =\n 178 | | ISBN =\n 179 | | LCCN =\n 180 | | OCLC =\n 181 | | References =\n 182 | | Linkback =\n 183 | | Wikidata =\n 184 | | noimage =\n 185 | | Other_fields_1 = {{Information field|name=Identifier|value=${ 186 | pdl_identifier || "" 187 | }|name=Pages|value=${pdl_pages || ""}|name=Script|value=${ 188 | pdl_script || "" 189 | }}} 190 | }} 191 | 192 | == {{int:license-header}} == 193 | 194 | {{PD-scan}} 195 | 196 | [[Category:Files uploaded with BUB2]] 197 | `; 198 | return pdl_commonsMetadata; 199 | } 200 | } catch (error) { 201 | return error; 202 | } 203 | }; 204 | 205 | return { getMetadataForUI }; 206 | } 207 | -------------------------------------------------------------------------------- /bull/google-books-queue/consumer.js: -------------------------------------------------------------------------------- 1 | const request = require("request"); 2 | const EmailProducer = require("../email-queue/producer"); 3 | const CommonsProducer = require("../commons-queue/producer"); 4 | const config = require("../../utils/bullconfig"); 5 | const GoogleBooksQueue = config.getNewQueue("google-books-queue"); 6 | const winston = require("winston"); 7 | const logger = winston.loggers.get("defaultLogger"); 8 | const { 9 | logUserData, 10 | uploadToCommons, 11 | downloadFile, 12 | } = require("./../../utils/helper"); 13 | 14 | let responseSize, 15 | dataSize = 0; 16 | 17 | GoogleBooksQueue.on("active", (job, jobPromise) => { 18 | logger.log({ 19 | level: "info", 20 | message: `Consumer(next): Job ${job.id} is active!`, 21 | }); 22 | }); 23 | 24 | GoogleBooksQueue.on("completed", (job, result) => { 25 | logger.log({ 26 | level: "info", 27 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`, 28 | }); 29 | }); 30 | 31 | GoogleBooksQueue.process((job, done) => { 32 | const requestURI = request(job.data.uri); 33 | const { id, volumeInfo, accessInfo } = job.data.details; 34 | const jobLogs = volumeInfo; 35 | let { 36 | authors, 37 | publisher, 38 | publishedDate, 39 | imageLinks, 40 | previewLink, 41 | title, 42 | language, 43 | pageCount, 44 | infoLink, 45 | } = volumeInfo; 46 | const { accessViewStatus } = accessInfo; 47 | const bucketTitle = job.data.IAIdentifier; 48 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`; 49 | const trueURI = `http://archive.org/details/${bucketTitle}`; 50 | jobLogs["trueURI"] = trueURI; 51 | jobLogs["userName"] = job.data.userName; 52 | job.log(JSON.stringify(jobLogs)); 53 | logUserData(jobLogs["userName"], "Google Books"); 54 | requestURI.pipe( 55 | request( 56 | { 57 | method: "PUT", 58 | preambleCRLF: true, 59 | postambleCRLF: true, 60 | uri: IAuri, 61 | headers: { 62 | Authorization: `LOW ${process.env.access_key}:${process.env.secret_key}`, 63 | "Content-type": "application/pdf; charset=utf-8", 64 | "Accept-Charset": "utf-8", 65 | "X-Amz-Auto-Make-Bucket": "1", 66 | "X-Archive-Meta-Collection": "opensource", 67 | "X-Archive-Ignore-Preexisting-Bucket": 1, 68 | "X-archive-meta-title": `uri(${encodeURI(title.trim())})`, 69 | "X-archive-meta-date": publishedDate ? publishedDate.trim() : "", 70 | "X-archive-meta-language": language.trim(), 71 | "X-archive-meta-mediatype": "texts", 72 | "X-archive-meta-licenseurl": 73 | "https://creativecommons.org/publicdomain/mark/1.0/", 74 | "X-archive-meta-publisher": publisher.trim(), 75 | "X-archive-meta-Author": authors 76 | ? `uri(${encodeURI(authors.join().trim())})` 77 | : "", 78 | "X-archive-meta-rights": accessViewStatus.trim(), 79 | "X-archive-meta-Google-id": id, 80 | "X-archive-meta-Source": infoLink.trim(), 81 | }, 82 | }, 83 | async (error, response, body) => { 84 | if (error || response.statusCode != 200) { 85 | const errorMessage = !body ? error : body; 86 | logger.log({ 87 | level: "error", 88 | message: `IA Failure GB ${errorMessage}`, 89 | }); 90 | if (job.data.isEmailNotification === "true") { 91 | EmailProducer(job.data.userName, title, trueURI, { 92 | archive: false, 93 | commons: false, 94 | }); 95 | } 96 | done(new Error(errorMessage)); 97 | } else { 98 | job.progress({ 99 | step: "Upload To IA", 100 | value: `(${100}%)`, 101 | }); 102 | if ( 103 | job.data.isUploadCommons !== "true" && 104 | job.data.isEmailNotification !== "true" 105 | ) { 106 | done(null, true); 107 | } 108 | if ( 109 | job.data.isEmailNotification === "true" && 110 | job.data.isUploadCommons !== "true" 111 | ) { 112 | EmailProducer(job.data.userName, title, trueURI, { 113 | archive: true, 114 | commons: false, 115 | }); 116 | done(null, true); 117 | } 118 | if (job.data.isUploadCommons === "true") { 119 | job.progress({ 120 | step: "Uploading to Wikimedia Commons", 121 | value: `(${50}%)`, 122 | }); 123 | CommonsProducer( 124 | null, 125 | null, 126 | job.data, 127 | "gb", 128 | async (commonsResponse) => { 129 | if (commonsResponse.status === true) { 130 | job.progress({ 131 | step: "Upload to Wikimedia Commons", 132 | value: `(${100}%)`, 133 | wikiLinks: { 134 | commons: 135 | (await commonsResponse.value.filename) || 136 | commonsResponse.filename || 137 | commonsResponse.value.commons.filename, 138 | wikidata: 139 | (await commonsResponse.value.wikidata) !== 404 140 | ? await commonsResponse.value.wikidata 141 | : 404, 142 | }, 143 | }); 144 | if (job.data.isEmailNotification === "true") { 145 | const commonsLink = 146 | process.env.NEXT_PUBLIC_COMMONS_URL + 147 | `/wiki/File:${ 148 | commonsResponse.value.filename || 149 | commonsResponse.filename || 150 | commonsResponse.value.commons.filename 151 | }`; 152 | EmailProducer( 153 | job.data.userName, 154 | title, 155 | { archiveLink: trueURI, commonsLink: commonsLink }, 156 | { archive: true, commons: true } 157 | ); 158 | } 159 | return done(null, true); 160 | } else { 161 | job.progress({ 162 | step: "Upload To IA (100%), Upload To Commons", 163 | value: `(Failed)`, 164 | }); 165 | if (job.data.isEmailNotification === "true") { 166 | EmailProducer(job.data.userName, title, trueURI, { 167 | archive: true, 168 | commons: false, 169 | }); 170 | } 171 | return done(null, true); 172 | } 173 | } 174 | ); 175 | } 176 | } 177 | } 178 | ) 179 | ); 180 | 181 | requestURI.on("response", function (data) { 182 | responseSize = Number(data.headers["content-length"]); 183 | dataSize = 0; 184 | }); 185 | 186 | requestURI.on("data", function (chunk) { 187 | dataSize += Number(chunk.length); 188 | job.progress({ 189 | step: "Uploading to Internet Archive", 190 | value: `(${Math.round((dataSize / responseSize) * 100)}%)`, 191 | }); 192 | }); 193 | }); 194 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # BUB2 ![](https://img.shields.io/github/forks/coderwassananmol/BUB2?style=social) ![](https://img.shields.io/github/stars/coderwassananmol/BUB2?style=social) ![](https://img.shields.io/github/watchers/coderwassananmol/BUB2?style=social)
4 | 5 | ![](https://img.shields.io/github/repo-size/coderwassananmol/BUB) ![](https://img.shields.io/github/license/coderwassananmol/BUB2?color=red)
6 | ![](https://img.shields.io/github/issues/coderwassananmol/BUB2?color=green) ![](https://img.shields.io/github/issues-pr/coderwassananmol/BUB2?color=green) ![](https://img.shields.io/github/downloads/coderwassananmol/BUB2/total) ![](https://img.shields.io/github/last-commit/coderwassananmol/BUB2) ![](https://img.shields.io/github/contributors/coderwassananmol/BUB2)
7 | A book uploader bot that transfers documents from public libraries such as Google Books, Punjab Digital Library and Trove Digital Library to [Internet Archive](https://archive.org). Built for Wikimedia Tool Labs. Check out [BUB2 on Wikimedia Cloud](https://bub2.wmcloud.org)! 8 | 9 | # Table of Contents 10 | 11 | - [Getting Started](#getting-started) 12 | - [Prerequisites](#prerequisites) 13 | - [Local setup](#local-setup) 14 | - [Clone the repo](#clone-the-repo) 15 | - [Set environment variables](#set-environment-variables) 16 | - [Run Redis server](#run-redis-server) 17 | - [Start the server](#start-the-server) 18 | - [Example](#example) 19 | - [Contributing](#contributing) 20 | - [Request to Contribute](#request-to-contribute) 21 | - [Keep Supporting](#keep-supporting) 22 | - [License](#license) 23 | 24 | 25 | ## Getting Started 26 | 27 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. See deployment for notes on how to deploy the project on a live system. 28 | 29 | 30 | ### Prerequisites 31 | 32 | - [Node.JS](https://nodejs.org/en/download/) ( check for installation with `npm -v` and `node -v` on terminal ) 33 | - [Redis](https://redis.io/) 34 | 35 | 36 | ### Local setup 37 | 38 | 39 | #### Clone the repo 40 | 41 | - Clone the repository `git clone https://github.com/coderwassananmol/BUB2` 42 | - Navigate to the project directory on the terminal: `cd BUB2`. 43 | - For developers, checkout to develop branch: `git checkout develop` 44 | - Run `npm install` 45 | 46 | 47 | #### Set environment variables 48 | 49 | Rename `.env.example` to `.env`. Then, to fill the credentials, 50 | 51 | - Make an account on [archive.org](https://archive.org). 52 | - Go to https://archive.org/account/s3.php . Generate the **access** and **secret** keys and fill 53 | them in the `.env` file in the corresponding fields. 54 | - Go to [Google Developers console](https://console.developers.google.com/getting-started). Make a new project to run the app. In that Google Developers project, search for 'Books API' in the Google API console, then **enable** the API for the project, then generate the **API keys**, and then copy and paste the API key in the `GB_Key` fields. 55 | - Enter the `redishost` field. If it's hosted locally, enter **127.0.0.1**, which is the default localhost. 56 | - Enter the `redisport` field with **6379**, which is the default port number for redis. 57 | 58 | - You need Beta wiki [OAuth 2.0](https://oauth.net/2/) credentials to login with MediaWiki during development. Steps to obtain the OAuth credentials for the `WIKIMEDIA_CLIENT_ID` and `WIKIMEDIA_CLIENT_SECRET` fields : 59 | - Go to [Beta-wiki](https://meta.wikimedia.beta.wmflabs.org/wiki/Special:OAuthConsumerRegistration) and click **Request a token for a new OAuth 2.0 client**. 60 | - Enter the details (Application name, details, callback url, and applicable grants). Refer [this](https://meta.wikimedia.beta.wmflabs.org/wiki/Special:OAuthListConsumers/view/e70de440468d7140914e4a57e3660cf2) as a sample. 61 | - After submitting, note the client application key and client application secret and wait for the proposed consumer to get **approved**. 62 | - Go to [this file](pages/api/auth/[...nextauth].js). 63 | - Add the following code after the clientId and clientSecret : 64 | ``` 65 | token: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/access_token", 66 | userinfo: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/resource/profile", 67 | authorization: { 68 | url: "https://meta.wikimedia.beta.wmflabs.org/w/rest.php/oauth2/authorize", 69 | params: { scope: "" }, 70 | } 71 | ``` 72 | - Enter the client application key and client application secret in the `WIKIMEDIA_CLIENT_ID` and `WIKIMEDIA_CLIENT_SECRET` respectively 73 | - Enter the `NEXTAUTH_URL` with http://localhost:5000. 74 | - Go to [wikisource Bot] (https://meta.wikimedia.beta.wmflabs.org/wiki/Special:BotPasswords) to generate your `EMAIL_BOT_USERNAME` and `EMAIL_BOT_PASSWORD`. When creating your bot, tick 'send email to users' under the Applicable Grants section. Go ahead and enter the generated credentials in the `.env` file. 75 | - Enter the `NEXT_PUBLIC_WIKIMEDIA_URL` which is used to authenticate with Wikimedia environment for login, send emails etc. For example - https://meta.wikimedia.beta.wmflabs.org 76 | - Enter the `NEXT_PUBLIC_COMMONS_URL` which is used to upload the files to Commons. For example - https://meta.commons.beta.wmflabs.org for local setup. 77 | - Enter the `NEXT_PUBLIC_IA_USER` which is the username of Internet Archive account. 78 | - Enter the `IA_EMAIL` which is the email of the Internet Archive account. 79 | - Go to [Trove API](https://trove.nla.gov.au/about/create-something/using-api) and follow the instructions on how to get a trove key. Fill in `trove_key` with the trove key you generated. 80 | 81 | ### Run Redis server 82 | 83 | - Refer to [Redis](https://redis.io/download) for download and setup documentation 84 | 85 | 86 | #### Start the server 87 | 88 | - Run `npm run dev` for development and `npm run start` for production. 89 | - Open your browser and navigate to http://localhost:5000 90 | 91 | 92 | ## Contributing 93 | 94 | Please read [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute to BUB2. 95 | 96 | 97 | ## Request to Contribute 98 | 1. Fork the repository. 99 | 2. Clone the repository to your local system by using the command : `git clone "https://github.com//BUB2"`. 100 | 3. The issues are maintained on [Phabricator](https://phabricator.wikimedia.org/maniphest/?project=PHID-PROJ-wnkiea2sihld2xlpq527&statuses=open()&group=none&order=newest#R). 101 | 4. Assign an issue to yourself from Phabricator. 102 | 5. Create a new branch and start working on the issue locally. 103 | 6. Create a PULL REQUEST to merge your branch with the main branch and mention the Phab task in the description. 104 | 7. The issue will be considered closed and resolved once the PR is accepted. 105 | Please read [CONTRIBUTING.md](CONTRIBUTING.md) for information on how to contribute to BUB2. 106 | 107 | 108 | ## Contribute 109 | We actively welcome pull requests. Learn how to [contribute.](CONTRIBUTING.md) 110 | ## Keep Supporting 111 | 112 | There was no Node.js wrapper available for Internet Archive, so I decided to write the Node implementation to upload books to Internet Archive. If you like this repository, show your support by starring the project. Cheers! 113 | 114 | 115 | ## License 116 | [![license](https://img.shields.io/github/license/DAVFoundation/captain-n3m0.svg?style=flat-square)](https://github.com/coderwassananmol/BUB2/blob/develop/LICENSE.md) 117 | 118 | Please read [license](https://github.com/coderwassananmol/BUB2/blob/develop/LICENSE.md) for more information. 119 | -------------------------------------------------------------------------------- /components/QueueTable.js: -------------------------------------------------------------------------------- 1 | import { useRouter } from "next/router"; 2 | import { useEffect, useState } from "react"; 3 | import { 4 | Paper, 5 | Table, 6 | TableBody, 7 | TableCell, 8 | TableContainer, 9 | TableHead, 10 | TablePagination, 11 | TableRow, 12 | Backdrop, 13 | } from "@mui/material"; 14 | import { host } from "../utils/constants"; 15 | import ShowJobInformation from "../components/ShowJobInformation"; 16 | 17 | const ShowUploadQueue = (props) => { 18 | const styles = { 19 | backdrop: { 20 | zIndex: 5, 21 | color: "#fff", 22 | }, 23 | head: { 24 | backgroundColor: "#f8f9fa", 25 | color: "#202122", 26 | fontSize: "14px", 27 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif", 28 | }, 29 | body: { 30 | fontSize: "14px", 31 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif", 32 | color: "#54595d", 33 | }, 34 | root: { 35 | marginTop: "20px", 36 | width: "100%", 37 | zIndex: 0, 38 | }, 39 | row: { 40 | "&:nth-of-type(odd)": { 41 | backgroundColor: "#fff", 42 | }, 43 | "&:nth-of-type(even)": { 44 | backgroundColor: "#f8f9fa", 45 | }, 46 | }, 47 | container: { 48 | maxHeight: 330, 49 | }, 50 | toolbar: { 51 | marginTop: "8px", 52 | fontSize: "12px", 53 | fontFamily: "Helvetica Neue, Helvetica, Arial, sans-serif", 54 | color: "#54595d", 55 | }, 56 | selectIcon: { 57 | fontSize: "12px", 58 | top: "calc(50% - 8px)", 59 | }, 60 | }; 61 | 62 | const router = useRouter(); 63 | // const classes = useStyles(); 64 | const [jobId, setJobId] = useState(""); 65 | 66 | const onClick = (id) => { 67 | setJobId(id); 68 | setOpen(true); 69 | }; 70 | 71 | const columns = [ 72 | { 73 | id: "id", 74 | label: "Job ID", 75 | minWidth: 50, 76 | align: "left", 77 | format: (value) => ( 78 | onClick(value)}> 79 | {value} 80 | 81 | ), 82 | }, 83 | { 84 | id: "title", 85 | label: "Title", 86 | minWidth: 300, 87 | align: "left", 88 | format: (value, label) => ( 89 | onClick(value)}> 90 | {label} 91 | 92 | ), 93 | }, 94 | { 95 | id: "userName", 96 | label: "Wiki Username", 97 | minWidth: 150, 98 | align: "left", 99 | format: (value) => 100 | value !== "-" ? ( 101 | 105 | {value} 106 | 107 | ) : ( 108 | value 109 | ), 110 | }, 111 | { 112 | id: "upload_progress", 113 | label: "Upload Progress", 114 | minWidth: 50, 115 | align: "left", 116 | format: (value) => value, 117 | }, 118 | { 119 | id: "status", 120 | label: "Status", 121 | minWidth: 30, 122 | align: "left", 123 | format: (value) => { 124 | const isPDLMissingPage = /]*>([^<]+)<\/a>/; 125 | const missingPageLink = isPDLMissingPage.exec(value); 126 | return missingPageLink ? ( 127 | 128 | Failed! (Reason: Upload to Internet Archive failed because {""} 129 | 130 | {missingPageLink[1]} 131 | {" "} 132 | is not reachable. Please try again or contact Panjab Digital Library 133 | for more details. ) 134 | 135 | ) : ( 136 | value 137 | ); 138 | }, 139 | }, 140 | { 141 | id: "timestamp", 142 | label: "Timestamp", 143 | minWidth: 150, 144 | align: "left", 145 | format: (value) => value, 146 | }, 147 | ]; 148 | 149 | const [open, setOpen] = useState(false); 150 | const [page, setPage] = useState(0); 151 | const [rowsPerPage, setRowsPerPage] = useState(10); 152 | const rows = props.tableData ? props.tableData : []; 153 | 154 | const handleClose = (e) => { 155 | setOpen(false); 156 | }; 157 | 158 | const handleChangeRowsPerPage = (event) => { 159 | setRowsPerPage(+event.target.value); 160 | setPage(0); 161 | }; 162 | 163 | const handleChangePage = (event, newPage) => { 164 | setPage(newPage); 165 | }; 166 | 167 | const conditionalRender = (column, value, row) => { 168 | if (column.id === "id" || column.id === "upload_progress") { 169 | return column.format(value); 170 | } else if (column.id === "title") { 171 | return column.format(row["id"], value); 172 | } else if (column.id === "userName") { 173 | return column.format((value === "-" ? "" : "User:") + value); 174 | } else if (column.id === "date") { 175 | return column.format(value); 176 | } else if (column.id === "status") { 177 | return column.format(value); 178 | } else if (column.id === "wikimedia_links") { 179 | return column.format(value); 180 | } else { 181 | return value; 182 | } 183 | }; 184 | 185 | useEffect(() => { 186 | setPage(0); 187 | }, [props.isSearch]); 188 | 189 | return ( 190 |
191 | 192 | 193 | 194 | 195 | 196 | {columns.map((column) => ( 197 | 202 | {column.label} 203 | 204 | ))} 205 | 206 | 207 | 208 | {rows 209 | .slice(page * rowsPerPage, page * rowsPerPage + rowsPerPage) 210 | .map((row) => { 211 | return ( 212 | 213 | {columns.map((column) => { 214 | const value = row[column.id]; 215 | return ( 216 | 221 | {conditionalRender(column, value, row)} 222 | 223 | ); 224 | })} 225 | 226 | ); 227 | })} 228 | 229 |
230 |
231 | Rows per page
} 240 | labelDisplayedRows={({ from, to, count }) => ( 241 |
242 | {`${from}–${to} of ${count !== -1 ? count : `more than ${to}`}`} 243 |
244 | )} 245 | sx={{ display: "flex", justifyContent: "end" }} 246 | /> 247 | 248 | 249 | {open ? ( 250 | 251 | ) : null} 252 | 253 | 254 | ); 255 | }; 256 | 257 | export default ShowUploadQueue; 258 | -------------------------------------------------------------------------------- /bull/trove-queue/consumer.js: -------------------------------------------------------------------------------- 1 | const EmailProducer = require("../email-queue/producer"); 2 | const CommonsProducer = require("../commons-queue/producer"); 3 | const config = require("../../utils/bullconfig"); 4 | const TroveQueue = config.getNewQueue("trove-queue"); 5 | const rp = require("request-promise"); 6 | const request = require("request"); 7 | const _ = require("lodash"); 8 | const winston = require("winston"); 9 | const logger = winston.loggers.get("defaultLogger"); 10 | const { 11 | logUserData, 12 | downloadFile, 13 | uploadToCommons, 14 | } = require("./../../utils/helper"); 15 | 16 | let responseSize, 17 | dataSize = 0; 18 | 19 | TroveQueue.on("active", (job, jobPromise) => { 20 | logger.log({ 21 | level: "info", 22 | message: `Consumer(next): Job ${job.id} is active!`, 23 | }); 24 | }); 25 | 26 | TroveQueue.on("completed", (job, result) => { 27 | logger.log({ 28 | level: "info", 29 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`, 30 | }); 31 | }); 32 | 33 | TroveQueue.process((job, done) => { 34 | const currentTimestamp = Date.now(); 35 | request( 36 | `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}/prep?_=${currentTimestamp}`, 37 | {}, 38 | async (error, response, body) => { 39 | if (error || response.statusCode != 200) { 40 | logger.log({ 41 | level: "error", 42 | message: `trove API ${body}`, 43 | }); 44 | } else { 45 | const requestURI = request( 46 | `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}.pdf?followup=${body}` 47 | ); 48 | const downloadFileUrl = `https://trove.nla.gov.au/newspaper/rendition/nla.news-issue${job.data.details.issueRenditionId}.pdf?followup=${body}`; 49 | const jobLogs = job.data.details; 50 | let { 51 | name, 52 | date, 53 | id, 54 | troveUrl, 55 | IAIdentifier, 56 | userName, 57 | isEmailNotification, 58 | } = job.data.details; 59 | const bucketTitle = IAIdentifier; 60 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`; 61 | const trueURI = `http://archive.org/details/${bucketTitle}`; 62 | jobLogs["trueURI"] = trueURI; 63 | jobLogs["userName"] = job.data.details.userName; 64 | job.log(JSON.stringify(jobLogs)); 65 | logUserData(jobLogs["userName"], "Trove"); 66 | requestURI.pipe( 67 | request( 68 | { 69 | method: "PUT", 70 | preambleCRLF: true, 71 | postambleCRLF: true, 72 | uri: IAuri, 73 | headers: { 74 | Authorization: `LOW ${process.env.access_key}:${process.env.secret_key}`, 75 | "Content-type": "application/pdf; charset=utf-8", 76 | "Content-Length": responseSize, 77 | "Accept-Charset": "utf-8", 78 | "X-Amz-Auto-Make-Bucket": "1", 79 | "X-Archive-Meta-Collection": "opensource", 80 | "X-Archive-Ignore-Preexisting-Bucket": 1, 81 | "X-archive-meta-title": name.trim(), 82 | "X-archive-meta-date": date.trim(), 83 | "X-archive-meta-mediatype": "texts", 84 | "X-archive-meta-licenseurl": 85 | "https://creativecommons.org/publicdomain/mark/1.0/", 86 | "X-archive-meta-Trove-issueid": id, 87 | "X-archive-meta-Identifier": `bub_trove_${id}`, 88 | "X-archive-meta-TroveURL": troveUrl, 89 | }, 90 | }, 91 | async (error, response, body) => { 92 | if (error || response.statusCode != 200) { 93 | const errorMessage = !body ? error : body; 94 | logger.log({ 95 | level: "error", 96 | message: `IA Failure Trove ${errorMessage}`, 97 | }); 98 | if (isEmailNotification === "true") { 99 | EmailProducer(userName, name, trueURI, { 100 | archive: false, 101 | commons: false, 102 | }); 103 | } 104 | done(new Error(errorMessage)); 105 | } else { 106 | job.progress({ 107 | step: "Upload To IA", 108 | value: `(${100}%)`, 109 | }); 110 | if ( 111 | isEmailNotification !== "true" && 112 | job.data.details.isUploadCommons !== "true" 113 | ) { 114 | done(null, true); 115 | } 116 | if ( 117 | isEmailNotification === "true" && 118 | job.data.details.isUploadCommons !== "true" 119 | ) { 120 | EmailProducer(userName, name, trueURI, { 121 | archive: true, 122 | commons: false, 123 | }); 124 | } 125 | if (job.data.details.isUploadCommons === "true") { 126 | job.progress({ 127 | step: "Uploading to Wikimedia Commons", 128 | value: `(50%)`, 129 | }); 130 | CommonsProducer( 131 | null, 132 | downloadFileUrl, 133 | job.data.details, 134 | "trove", 135 | async (commonsResponse) => { 136 | if (commonsResponse.status === true) { 137 | job.progress({ 138 | step: "Upload to Wikimedia Commons", 139 | value: `(${100}%)`, 140 | wikiLinks: { 141 | commons: await commonsResponse.value.filename, 142 | wikidata: 143 | (await commonsResponse.value.wikidata) !== 404 144 | ? await commonsResponse.value.wikidata 145 | : 404, 146 | }, 147 | }); 148 | if (job.data.isEmailNotification === "true") { 149 | const commonsLink = 150 | process.env.NEXT_PUBLIC_COMMONS_URL + 151 | `/wiki/File:${commonsResponse.value.filename}`; 152 | EmailProducer( 153 | userName, 154 | name, 155 | { 156 | archiveLink: trueURI, 157 | commonsLink: commonsLink, 158 | }, 159 | { archive: true, commons: true } 160 | ); 161 | } 162 | } else { 163 | job.progress({ 164 | step: "Upload To IA (100%), Upload To Commons", 165 | value: `(Failed)`, 166 | }); 167 | EmailProducer(userName, name, trueURI, { 168 | archive: true, 169 | commons: false, 170 | }); 171 | } 172 | } 173 | ); 174 | } 175 | done(null, true); 176 | } 177 | } 178 | ) 179 | ); 180 | requestURI.on("response", function (data) { 181 | responseSize = Number(data.headers["content-length"]); 182 | dataSize = 0; 183 | }); 184 | 185 | requestURI.on("data", function (chunk) { 186 | dataSize += Number(chunk.length); 187 | const progress = Math.round((dataSize / responseSize) * 100); 188 | if (progress !== null) 189 | job.progress({ 190 | step: "Uploading to Internet Archive", 191 | value: `(${progress || 0}%)`, 192 | }); 193 | }); 194 | } 195 | } 196 | ); 197 | }); 198 | -------------------------------------------------------------------------------- /bull/pdl-queue/consumer.js: -------------------------------------------------------------------------------- 1 | const EmailProducer = require("../email-queue/producer"); 2 | const CommonsProducer = require("../commons-queue/producer"); 3 | const config = require("../../utils/bullconfig"); 4 | const PDLQueue = config.getNewQueue("pdl-queue"); 5 | const rp = require("request-promise"); 6 | const request = require("request"); 7 | const _ = require("lodash"); 8 | const winston = require("winston"); 9 | const logger = winston.loggers.get("defaultLogger"); 10 | const { logUserData } = require("./../../utils/helper"); 11 | const { customFetch } = require("../../utils/helper"); 12 | const stream = require("stream"); 13 | 14 | var JSZip = require("jszip"); 15 | PDLQueue.on("active", (job, jobPromise) => { 16 | logger.log({ 17 | level: "info", 18 | message: `Consumer(next): Job ${job.id} is active!`, 19 | }); 20 | }); 21 | 22 | PDLQueue.on("completed", (job, result) => { 23 | logger.log({ 24 | level: "info", 25 | message: `Consumer(next): Job ${job.id} completed! Result: ${result}`, 26 | }); 27 | }); 28 | 29 | async function getZipAndBytelength(no_of_pages, id, title, job) { 30 | var zip = new JSZip(); 31 | title = title.replace(/ /g, "_"); 32 | var img = zip.folder(`${title}_images`); 33 | let temp_pages = no_of_pages; 34 | let downloadImageStatus; 35 | let errorFlag = { status: false, page: "" }; 36 | var download_image = async function (uri, filename) { 37 | try { 38 | const body = await rp({ 39 | method: "GET", 40 | uri, 41 | encoding: null, 42 | transform: function (body, response) { 43 | return { headers: response.headers, data: body }; 44 | }, 45 | }); 46 | if (/image/.test(body.headers["content-type"])) { 47 | var data = Buffer.from(body.data); 48 | img.file(filename, data.toString("base64"), { base64: true }); 49 | } 50 | return 200; 51 | } catch (err) { 52 | --no_of_pages; 53 | return err.statusCode; 54 | } 55 | }; 56 | for (let i = 1; i <= temp_pages; ++i) { 57 | const str = `http://www.panjabdigilib.org/images?ID=${id}&page=${i}&pagetype=null&Searched=W3GX`; 58 | downloadImageStatus = await download_image(str, `${title}_${i}.jpeg`); 59 | job.progress({ 60 | step: "Uploading to Internet Archive", 61 | value: `(${Math.round((i / temp_pages) * 82)}%)`, 62 | }); 63 | if (downloadImageStatus >= 200 && downloadImageStatus < 300) { 64 | continue; 65 | } else { 66 | errorFlag = { status: true, page: str }; 67 | break; 68 | } 69 | } 70 | let { byteLength } = await zip.generateAsync({ type: "nodebuffer" }); 71 | byteLength = Number(byteLength + no_of_pages * 16); //No. of pages * 16 72 | return [zip, byteLength, errorFlag]; 73 | } 74 | 75 | function setHeaders(metadata, contentLength, title, contentType) { 76 | let headers = {}; 77 | const restrictedHeaders = [ 78 | "trueuri", 79 | "isemailnotification", 80 | "iaidentifier", 81 | "contenttype", 82 | "pdfurl", 83 | ]; 84 | headers[ 85 | "Authorization" 86 | ] = `LOW ${process.env.access_key}:${process.env.secret_key}`; 87 | if (contentType === "pdf") { 88 | headers["Content-type"] = `application/${contentType}; charset=utf-8`; 89 | headers["Accept-Charset"] = "utf-8"; 90 | } else { 91 | headers["Content-type"] = `application/${contentType}`; 92 | } 93 | headers["Content-length"] = contentLength; 94 | headers["X-Amz-Auto-Make-Bucket"] = 1; 95 | headers["X-Archive-meta-collection"] = "opensource"; 96 | headers["X-Archive-Ignore-Preexisting-Bucket"] = 1; 97 | headers["X-archive-meta-identifier"] = title; 98 | headers["X-archive-meta-mediatype"] = "texts"; 99 | headers["X-archive-meta-uploader"] = process.env.IA_EMAIL; //To be added 100 | headers["X-archive-meta-contributor"] = "Panjab Digital Library"; //To be added 101 | headers["X-archive-meta-betterpdf"] = true; //To be added 102 | headers[ 103 | "X-archive-meta-external-identifier" 104 | ] = `urn:pdl:${metadata["bookID"]}:${metadata["categoryID"]}`; //To be added 105 | for (var key in metadata) { 106 | let meta_key = key.trim().replace(/ /g, "-").toLowerCase(); 107 | if (!_.includes(restrictedHeaders, meta_key)) 108 | headers[`X-archive-meta-${meta_key}`] = metadata[key]; 109 | } 110 | headers["X-archive-meta-title"] = metadata["title"]; 111 | headers[`X-archive-meta-description`] = metadata.description 112 | ? `uri(${encodeURI(metadata.description?.trim())})` 113 | : ""; 114 | return headers; 115 | } 116 | 117 | async function uploadZipToIA( 118 | zip, 119 | metadata, 120 | byteLength, 121 | email, 122 | job, 123 | trueURI, 124 | onError 125 | ) { 126 | const bucketTitle = metadata.IAIdentifier; 127 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}_images.zip`; 128 | metadata = _.omit(metadata, [ 129 | "coverImage", 130 | "commonsMetadata", 131 | "isUploadCommons", 132 | "oauthToken", 133 | "userName", 134 | ]); 135 | let headers = setHeaders( 136 | metadata, 137 | byteLength, 138 | metadata.title, 139 | job.data.details.contentType 140 | ); 141 | await zip.generateNodeStream({ type: "nodebuffer", streamFiles: true }).pipe( 142 | request( 143 | { 144 | method: "PUT", 145 | preambleCRLF: true, 146 | postambleCRLF: true, 147 | uri: IAuri, 148 | headers: headers, 149 | }, 150 | (error, response, body) => { 151 | if (response.statusCode === 200) { 152 | onError(false, null); 153 | } else { 154 | const errorMessage = !body ? error : body; 155 | logger.log({ 156 | level: "error", 157 | message: `IA Failure PDL ${errorMessage}`, 158 | }); 159 | onError(true, errorMessage); 160 | } 161 | } 162 | ) 163 | ); 164 | } 165 | 166 | function uploadPdfToIA(pdfUrl, job, metadata, trueURI, done) { 167 | const getPdf = request(pdfUrl); 168 | let bufferLength = 0; 169 | const chunks = []; 170 | const bucketTitle = metadata.IAIdentifier; 171 | const IAuri = `http://s3.us.archive.org/${bucketTitle}/${bucketTitle}.pdf`; 172 | getPdf.on("response", function (data) { 173 | if (data.statusCode !== 200) { 174 | logger.log({ 175 | level: "error", 176 | message: `Failure PDL: Failed to download PDF. Status Code: ${data.statusCode}`, 177 | }); 178 | done(new Error("Failed to download PDF.")); 179 | } else { 180 | job.progress({ 181 | step: "Uploading to Internet Archive", 182 | value: `(${20}%)`, 183 | }); 184 | } 185 | }); 186 | 187 | getPdf.on("end", function () { 188 | const IAMetadata = { ...metadata }; 189 | delete IAMetadata["commonsMetadata"]; 190 | delete IAMetadata["isUploadCommons"]; 191 | delete IAMetadata["oauthToken"]; 192 | delete IAMetadata["userName"]; 193 | const newBuffer = Buffer.concat(chunks); 194 | var bufferStream = new stream.PassThrough(); 195 | bufferStream.end(newBuffer); 196 | job.progress({ 197 | step: "Uploading to Internet Archive", 198 | value: `(${80}%)`, 199 | }); 200 | let headers = setHeaders( 201 | IAMetadata, 202 | bufferLength, 203 | metadata.title, 204 | job.data.details.contentType 205 | ); 206 | bufferStream.pipe( 207 | request( 208 | { 209 | method: "PUT", 210 | preambleCRLF: true, 211 | postambleCRLF: true, 212 | uri: IAuri, 213 | headers, 214 | }, 215 | async (error, response, body) => { 216 | if (error || response.statusCode != 200) { 217 | const errorMessage = !body ? error : body; 218 | logger.log({ 219 | level: "error", 220 | message: `IA Failure PDL ${errorMessage}`, 221 | }); 222 | if (metadata.isEmailNotification === "true") { 223 | EmailProducer( 224 | job.data.details.userName, 225 | metadata.title, 226 | trueURI, 227 | { 228 | archive: false, 229 | commons: false, 230 | } 231 | ); 232 | } 233 | done(new Error(errorMessage)); 234 | } else { 235 | job.progress({ 236 | step: "Upload To IA", 237 | value: `(${100}%)`, 238 | }); 239 | if ( 240 | job.data.details.isUploadCommons !== "true" && 241 | metadata.isEmailNotification !== "true" 242 | ) { 243 | done(null, true); 244 | } 245 | if ( 246 | job.data.details.isUploadCommons !== "true" && 247 | metadata.isEmailNotification === "true" 248 | ) { 249 | EmailProducer( 250 | job.data.details.userName, 251 | metadata.title, 252 | trueURI, 253 | { 254 | archive: true, 255 | commons: false, 256 | } 257 | ); 258 | done(null, true); 259 | } 260 | if (job.data.details.isUploadCommons === "true") { 261 | job.progress({ 262 | step: "Uploading to Wikimedia Commons", 263 | value: `(50%)`, 264 | }); 265 | CommonsProducer( 266 | null, 267 | null, 268 | job.data.details, 269 | "pdl", 270 | async (commonsResponse) => { 271 | if (commonsResponse.status === true) { 272 | job.progress({ 273 | step: "Upload to Wikimedia Commons", 274 | value: `(100%)`, 275 | wikiLinks: { 276 | commons: await commonsResponse.value.filename, 277 | }, 278 | }); 279 | if (metadata.isEmailNotification === "true") { 280 | const commonsLink = `https://commons.wikimedia.org/wiki/File:${commonsResponse.value.filename}`; 281 | EmailProducer( 282 | job.data.details.userName, 283 | metadata.title, 284 | { archiveLink: trueURI, commonsLink: commonsLink }, 285 | { 286 | archive: true, 287 | commons: true, 288 | } 289 | ); 290 | } 291 | } else { 292 | job.progress({ 293 | step: "Upload To IA (100%), Upload To Commons", 294 | value: `(Failed)`, 295 | }); 296 | if (metadata.isEmailNotification === "true") { 297 | EmailProducer( 298 | job.data.details.userName, 299 | metadata.title, 300 | trueURI, 301 | { 302 | archive: true, 303 | commons: false, 304 | } 305 | ); 306 | } 307 | } 308 | return done(null, true); 309 | } 310 | ); 311 | } 312 | } 313 | } 314 | ) 315 | ); 316 | }); 317 | 318 | getPdf.on("data", function (chunk) { 319 | bufferLength += chunk.length; 320 | chunks.push(chunk); 321 | }); 322 | } 323 | 324 | PDLQueue.process(async (job, done) => { 325 | try { 326 | const jobLogs = job.data.details; 327 | const trueURI = `http://archive.org/details/${job.data.details.IAIdentifier}`; 328 | jobLogs["trueURI"] = trueURI; 329 | jobLogs["userName"] = job.data.details.userName; 330 | job.log(JSON.stringify(jobLogs)); 331 | logUserData(jobLogs["userName"], "Panjab Digital Library"); 332 | 333 | if (job.data.details.pdfUrl) { 334 | uploadPdfToIA( 335 | job.data.details.pdfUrl, 336 | job, 337 | job.data.details, 338 | trueURI, 339 | done 340 | ); 341 | } else { 342 | const [zip, byteLength, errorFlag] = await getZipAndBytelength( 343 | job.data.details.Pages, 344 | job.data.details.bookID, 345 | job.data.details.title, 346 | job 347 | ); 348 | if (errorFlag.status) { 349 | logger.log({ 350 | level: "error", 351 | message: `Failure PDL: Failed to download ${errorFlag.page}`, 352 | }); 353 | done(new Error(`Failure PDL: Failed to download ${errorFlag.page}`)); 354 | } 355 | job.progress({ 356 | step: "Uploading to Internet Archive", 357 | value: `(${90}%)`, 358 | }); 359 | await uploadZipToIA( 360 | zip, 361 | job.data.details, 362 | byteLength, 363 | job.data.details.email, 364 | job, 365 | trueURI, 366 | async (isError, error) => { 367 | if (isError) { 368 | logger.log({ 369 | level: "error", 370 | message: `IA Failure PDL: ${error}`, 371 | }); 372 | if (job.data.details.isEmailNotification === "true") { 373 | EmailProducer( 374 | job.data.details.userName, 375 | job.data.details.title, 376 | trueURI, 377 | { 378 | archive: false, 379 | commons: false, 380 | } 381 | ); 382 | } 383 | done(new Error(error)); 384 | } else { 385 | job.progress({ 386 | step: "Upload To IA", 387 | value: `(${100}%)`, 388 | }); 389 | if ( 390 | job.data.details.isUploadCommons !== "true" && 391 | job.data.details.isEmailNotification !== "true" 392 | ) { 393 | done(null, true); 394 | } 395 | if ( 396 | job.data.details.isUploadCommons !== "true" && 397 | job.data.details.isEmailNotification === "true" 398 | ) { 399 | EmailProducer( 400 | job.data.details.userName, 401 | job.data.details.title, 402 | trueURI, 403 | { 404 | archive: true, 405 | commons: false, 406 | } 407 | ); 408 | done(null, true); 409 | } 410 | if (job.data.details.isUploadCommons === "true") { 411 | job.progress({ 412 | step: "Uploading to Wikimedia Commons", 413 | value: `(50%)`, 414 | }); 415 | const base64Zip = await zip.generateAsync({ type: "base64" }); 416 | CommonsProducer( 417 | "pdlZip", 418 | base64Zip, 419 | job.data.details, 420 | async (commonsResponse) => { 421 | if (commonsResponse.status === true) { 422 | job.progress({ 423 | step: "Upload to Wikimedia Commons", 424 | value: `(100%)`, 425 | wikiLinks: { 426 | commons: await commonsResponse.value.filename, 427 | }, 428 | }); 429 | if (job.data.details.isEmailNotification === "true") { 430 | const commonsLink = `https://commons.wikimedia.org/wiki/File:${commonsResponse.value.filename}`; 431 | EmailProducer( 432 | job.data.details.userName, 433 | job.data.details.title, 434 | { archiveLink: trueURI, commonsLink: commonsLink }, 435 | { 436 | archive: true, 437 | commons: true, 438 | } 439 | ); 440 | } 441 | } else { 442 | job.progress({ 443 | step: "Upload To IA (100%), Upload To Commons", 444 | value: `(Failed)`, 445 | }); 446 | if (job.data.details.isEmailNotification === "true") { 447 | EmailProducer( 448 | job.data.details.userName, 449 | job.data.details.title, 450 | trueURI, 451 | { 452 | archive: true, 453 | commons: false, 454 | } 455 | ); 456 | } 457 | } 458 | } 459 | ); 460 | } 461 | return done(null, true); 462 | } 463 | } 464 | ); 465 | } 466 | } catch (error) { 467 | logger.log({ 468 | level: "error", 469 | message: `Failure PDL Queue: ${error}`, 470 | }); 471 | done(new Error(error)); 472 | } 473 | }); 474 | -------------------------------------------------------------------------------- /utils/helper.js: -------------------------------------------------------------------------------- 1 | /* Helper functions to modularize the code */ 2 | const fetch = require("isomorphic-fetch"); 3 | const rp = require("request-promise"); 4 | const _ = require("lodash"); 5 | const winston = require("winston"); 6 | const { truncate } = require("fs"); 7 | const logger = winston.loggers.get("defaultLogger"); 8 | const fs = require("fs"); 9 | const { Mwn } = require("mwn"); 10 | const JSZip = require("jszip"); 11 | const PDFDocument = require("pdfkit"); 12 | const path = require("path"); 13 | const { PDFDocument: PDFLibDocument } = require("pdf-lib"); 14 | 15 | module.exports = { 16 | checkIfFileExistsAtIA: async (ID) => { 17 | const fetchCall = await fetch(`https://archive.org/metadata/${ID}`); 18 | const resp = await fetchCall.json(); 19 | if (!_.isEmpty(resp)) { 20 | if (_.has(resp, "metadata.uploader") === true) { 21 | return resp.metadata.uploader !== process.env.IA_EMAIL; 22 | } else { 23 | return true; 24 | } 25 | } else { 26 | return false; 27 | } 28 | }, 29 | 30 | replaceTitle: (title) => { 31 | return title.replace(/[ \(\)\[\],:]/g, ""); 32 | }, 33 | 34 | customFetch: async ( 35 | URI, 36 | method = "GET", 37 | headers = new Headers(), 38 | contentType = "other" 39 | ) => { 40 | return fetch(URI, { 41 | method: method, 42 | headers: headers, 43 | }) 44 | .then( 45 | (res) => { 46 | if (res.status === 404) { 47 | return 404; 48 | } else { 49 | const result = contentType === "file" ? res : res.json(); 50 | return result; 51 | } 52 | }, 53 | (err) => { 54 | logger.log({ 55 | level: "error", 56 | message: `customFetch ${err}`, 57 | }); 58 | return 404; 59 | } 60 | ) 61 | .catch((err) => { 62 | logger.log({ 63 | level: "error", 64 | message: `customFetch catch ${err}`, 65 | }); 66 | return 404; 67 | }); 68 | }, 69 | 70 | queueData: async (job, queue) => { 71 | if (!job) return null; 72 | const jobid = job.id; 73 | const { logs } = await queue.getJobLogs(jobid, 0); 74 | if (logs[0]) return JSON.parse(logs[0]); 75 | else return []; 76 | }, 77 | 78 | bookTitle: { 79 | gb: "volumeInfo.title", 80 | pdl: "title", 81 | trove: "name", 82 | }, 83 | 84 | userNameLocation: { 85 | gb: "userName", 86 | pdl: "details.userName", 87 | trove: "details.userName", 88 | }, 89 | 90 | jobData: (job, queue) => { 91 | const bookTitlePath = { 92 | gb: "volumeInfo.title", 93 | pdl: "title", 94 | trove: "name", 95 | }; 96 | if (!job) return null; 97 | return _.get(job.data.details, bookTitlePath[`${queue}`]); 98 | }, 99 | 100 | statusConfig: (processedOn, sum) => { 101 | return { 102 | [sum]: "Completed", 103 | [processedOn]: "Active", 104 | 0: "In Queue", 105 | }; 106 | }, 107 | 108 | getPreviewLink: (queue_name, book_id, category_id = null) => { 109 | const previewLinks = { 110 | gb: `http://books.google.co.in/books?id=${book_id}&hl=&source=gbs_api`, 111 | pdl: `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${book_id}&page=1&CategoryID=${category_id}&Searched=W3GX`, 112 | trove: `https://trove.nla.gov.au/ndp/del/title/${book_id}`, 113 | }; 114 | return previewLinks[queue_name]; 115 | }, 116 | 117 | getPDLMetaData: async (cheerioOptions, bookid, categoryID) => { 118 | const $ = await rp(cheerioOptions); 119 | let PNdetails = {}; 120 | const keys = $(".ubhypers"); 121 | const values = $(".dhypers"); 122 | const downloadPdfLink = $("#downloadpdf a")[0]?.attribs.href; 123 | let pagesLabel = $(".ubhypers:contains('Pages')"); 124 | let pagesValue = pagesLabel.parent().next().find(".dhypers").text(); 125 | let contentType = "zip"; 126 | function addOtherMetaData(limit, keys, values, PNdetails) { 127 | let value; 128 | for (let i = 0; i < values.length; i++) { 129 | if ($(values[i]).attr("href")) { 130 | if (!$(values[i]).attr("href").includes("Keywords")) { 131 | value = i; 132 | break; 133 | } 134 | } 135 | } 136 | 137 | if (value <= limit) { 138 | const add = limit - value; 139 | for (let i = value; i < values.length; i++) { 140 | PNdetails[[$(keys[i + add]).text()]] = $(values[i]).text().trim(); 141 | } 142 | } else { 143 | const sub = value - limit; 144 | for (let i = value; i < values.length; i++) { 145 | PNdetails[[$(keys[i - sub]).text()]] = $(values[i]).text().trim(); 146 | } 147 | } 148 | } 149 | 150 | if ($(values[0]).text().trim() === "Click here to add description") { 151 | if ($(values[1]).text().trim() === "Click here to suggest keywords") { 152 | for (let i = 2; i < values.length; i++) { 153 | PNdetails[[$(keys[i + 1]).text()]] = $(values[i]).text().trim(); 154 | } 155 | } else { 156 | addOtherMetaData(4, keys, values, PNdetails); 157 | } 158 | } else if ( 159 | $(values[0]).text().trim() === "Click here to suggest keywords" 160 | ) { 161 | for (let i = 1; i < values.length; i++) { 162 | PNdetails[[$(keys[i + 2]).text()]] = $(values[i]).text().trim(); 163 | } 164 | PNdetails.description = $( 165 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td:nth-child(2)" 166 | ) 167 | .text() 168 | .trim(); 169 | } else { 170 | addOtherMetaData(5, keys, values, PNdetails); 171 | PNdetails.description = $( 172 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td:nth-child(2)" 173 | ) 174 | .text() 175 | .trim(); 176 | PNdetails.description = PNdetails.description.replace(/\n/g, ""); 177 | PNdetails.description = PNdetails.description.replace(/\[edit]/g, ""); 178 | } 179 | 180 | PNdetails.title = $( 181 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > a" 182 | ) 183 | .text() 184 | .trim(); 185 | PNdetails.bookID = bookid; 186 | PNdetails.categoryID = categoryID; 187 | let src = $( 188 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(1) > table > tbody > tr:nth-child(1) > td > table > tbody > tr > td > a > img" 189 | ).attr("src"); 190 | src = src.match(/pdl.*/gm); 191 | PNdetails.coverImage = `http://panjabdigilib.org/${src}`; 192 | 193 | if (downloadPdfLink?.length) { 194 | contentType = "pdf"; 195 | PNdetails.pdfUrl = `http://www.panjabdigilib.org/webuser/searches/${downloadPdfLink}`; 196 | } 197 | PNdetails.contentType = contentType; 198 | PNdetails.Pages = pagesValue; 199 | delete PNdetails[""]; 200 | return PNdetails; 201 | }, 202 | 203 | getPDLTitle: async (cheerioOptions) => { 204 | const $ = await rp(cheerioOptions); 205 | return $( 206 | "#Nanakshahi > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > table:nth-child(22) > tbody > tr:nth-child(3) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td:nth-child(2) > div > table > tbody > tr:nth-child(1) > td > a" 207 | ) 208 | .text() 209 | .trim(); 210 | }, 211 | 212 | getTroveMetaData: async (cheerioOptions) => { 213 | const $ = await rp(cheerioOptions); 214 | const issueRenditionId = $(".issueRendition") 215 | .attr("data-prepurl") 216 | .match(/\d+/); 217 | if (issueRenditionId && issueRenditionId !== null) 218 | return issueRenditionId[0]; 219 | else 220 | logger.log({ 221 | level: "error", 222 | message: `issueRenditionId not found ${issueRenditionId}`, 223 | }); 224 | }, 225 | checkForPublicDomain: (data, res) => { 226 | if (data === 404) { 227 | res.send({ error: true, message: "Invalid Book ID" }); 228 | return { 229 | error: true, 230 | }; 231 | } 232 | if (data.error) { 233 | if (data.error.code === 503) { 234 | //Google Books error 235 | res.send({ error: true, message: "Invalid Book ID" }); 236 | return { 237 | error: true, 238 | }; 239 | } 240 | } else { 241 | const { publicDomain } = data.accessInfo; //Response object destructuring 242 | if (publicDomain === false) { 243 | //Checking if the book belongs to publicDomain 244 | res.send({ error: true, message: "Not in public domain." }); 245 | return { 246 | error: true, 247 | }; 248 | } else { 249 | return { 250 | error: false, 251 | data, 252 | }; 253 | } 254 | } 255 | }, 256 | 257 | convertZipToPdf: async (targetZip, localFilePath) => { 258 | async function mergePdf(pdfDataArray) { 259 | try { 260 | const mergedPdf = await PDFLibDocument.create(); 261 | for (const pdfData of pdfDataArray) { 262 | const pdfDoc = await PDFLibDocument.load(pdfData); 263 | const pages = await mergedPdf.copyPages( 264 | pdfDoc, 265 | pdfDoc.getPageIndices() 266 | ); 267 | for (const page of pages) { 268 | mergedPdf.addPage(page); 269 | } 270 | } 271 | 272 | const mergedPdfFile = await mergedPdf.save(); 273 | await fs.promises.writeFile(localFilePath, mergedPdfFile); 274 | return { status: 200 }; 275 | } catch (error) { 276 | logger.log({ 277 | level: "error", 278 | message: `PDL - convertZipToPdf/mergePdf: ${error}`, 279 | }); 280 | return { status: 404, error: error }; 281 | } 282 | } 283 | 284 | async function zipToPdf() { 285 | try { 286 | const pdfInstances = []; 287 | await Promise.all( 288 | Object.values(targetZip.files).map(async (file, index) => { 289 | if (file.dir) return; 290 | if ([".jpg", ".jpeg", ".png"].includes(path.extname(file.name))) { 291 | const data = await file.async("nodebuffer"); 292 | const pdfDoc = new PDFDocument(); 293 | const buffers = []; 294 | const writeStream = new require("stream").Writable({ 295 | write(chunk, encoding, callback) { 296 | buffers.push(chunk); 297 | callback(); 298 | }, 299 | }); 300 | pdfDoc.pipe(writeStream); 301 | pdfDoc.image(data, 0, 0, { fit: [595.28, 841.89] }); // A4 size 302 | pdfDoc.end(); 303 | return new Promise((resolve) => { 304 | writeStream.on("finish", () => { 305 | pdfInstances.push({ 306 | index, 307 | pdfInstance: Buffer.concat(buffers), 308 | }); 309 | resolve(); 310 | }); 311 | }); 312 | } 313 | }) 314 | ); 315 | 316 | pdfInstances.sort((a, b) => a.index - b.index); 317 | 318 | const sortedPdfInstances = pdfInstances.map( 319 | ({ pdfInstance }) => pdfInstance 320 | ); 321 | return await mergePdf(sortedPdfInstances); 322 | } catch (error) { 323 | logger.log({ 324 | level: "error", 325 | message: `PDL - convertZipToPdf/zipToPdf: ${error}`, 326 | }); 327 | return { status: 404, error: error }; 328 | } 329 | } 330 | return await zipToPdf(); 331 | }, 332 | 333 | logUserData: (userName, libraryName) => { 334 | logger.log({ 335 | level: "info", 336 | message: `User ${userName} uploaded using ${libraryName}`, 337 | }); 338 | }, 339 | downloadFile: async (downloadUrl, localFilepath) => { 340 | try { 341 | const fileRes = await fetch(downloadUrl, { 342 | method: "GET", 343 | headers: new Headers({ 344 | "Content-Type": "application/pdf", 345 | }), 346 | }); 347 | const fileBuffer = await fileRes.buffer(); 348 | await fs.promises.writeFile(localFilepath, fileBuffer); 349 | return { 350 | writeFileStatus: 200, 351 | }; 352 | } catch (error) { 353 | logger.log({ 354 | level: "error", 355 | message: `downloadFile: ${error}`, 356 | }); 357 | return error; 358 | } 359 | }, 360 | uploadToCommons: async (metadata) => { 361 | try { 362 | const bot = await Mwn.init({ 363 | apiUrl: process.env.NEXT_PUBLIC_COMMONS_URL + "/w/api.php", 364 | OAuth2AccessToken: metadata.oauthToken, 365 | userAgent: "bub2.wmcloud ([[https://bub2.wmcloud.org]])", 366 | defaultParams: { 367 | assert: "user", 368 | }, 369 | }); 370 | 371 | bot.userinfo(); 372 | 373 | const commonsFilePayload = "commonsFilePayload.pdf"; 374 | let title = 375 | metadata.details?.volumeInfo?.title || metadata.name || metadata.title; 376 | title = title.replaceAll(".", ""); 377 | const response = await bot.upload( 378 | commonsFilePayload, 379 | title, 380 | metadata.commonsMetadata 381 | ); 382 | if (await response.filename) { 383 | await fs.promises.unlink(commonsFilePayload); 384 | } 385 | logger.log({ 386 | level: "info", 387 | message: `uploadToCommons: Upload of ${metadata.IAIdentifier} to commons successful`, 388 | }); 389 | return { 390 | fileUploadStatus: 200, 391 | filename: response.filename, 392 | }; 393 | } catch (error) { 394 | await fs.promises.unlink("commonsFilePayload.pdf"); 395 | logger.log({ 396 | level: "error", 397 | message: `uploadToCommons (catch): ${error}`, 398 | }); 399 | logger.log({ 400 | level: "error", 401 | message: `accessToken: ${metadata.oauthToken}`, 402 | }); 403 | return error; 404 | } 405 | }, 406 | 407 | uploadToWikiData: async (metadata, commonsItemFilename, libraryName) => { 408 | if (libraryName !== "gb") { 409 | //support only for Google Books for now 410 | return 404; 411 | } 412 | try { 413 | const title = metadata.details.volumeInfo.title || ""; 414 | const id = metadata.details.id || ""; 415 | const authorsArr = metadata.details.volumeInfo.authors 416 | ? metadata.details.volumeInfo.authors.join().trim() 417 | : null; 418 | 419 | const GBWikiDataPayload = { 420 | item: { 421 | labels: { 422 | en: title, 423 | }, 424 | descriptions: { 425 | en: "edition of a written work", 426 | }, 427 | statements: { 428 | P675: [ 429 | { 430 | rank: "normal", 431 | property: { 432 | id: "P675", 433 | }, 434 | value: { 435 | content: id, 436 | type: "value", 437 | }, 438 | qualifiers: [], 439 | references: [], 440 | }, 441 | ], 442 | P31: [ 443 | { 444 | rank: "normal", 445 | property: { 446 | id: "P31", 447 | "data-type": "wikibase-item", 448 | }, 449 | value: { 450 | type: "value", 451 | content: "Q47461344", //wikidata id for 'written work' 452 | }, 453 | qualifiers: [], 454 | references: [], 455 | }, 456 | ], 457 | P996: [ 458 | { 459 | rank: "normal", 460 | property: { 461 | id: "P996", 462 | "data-type": "commonsMedia", 463 | }, 464 | value: { 465 | content: commonsItemFilename, 466 | type: "value", 467 | }, 468 | qualifiers: [], 469 | references: [], 470 | }, 471 | ], 472 | P2093: [ 473 | { 474 | rank: "normal", 475 | property: { 476 | id: "P2093", 477 | }, 478 | value: { 479 | content: authorsArr, 480 | type: "value", 481 | }, 482 | qualifiers: [], 483 | references: [], 484 | }, 485 | ], 486 | P373: [ 487 | { 488 | rank: "normal", 489 | property: { 490 | id: "P373", 491 | }, 492 | value: { 493 | content: "Files_uploaded_with_BUB2", 494 | type: "value", 495 | }, 496 | qualifiers: [], 497 | references: [], 498 | }, 499 | ], 500 | P1476: [ 501 | { 502 | rank: "normal", 503 | property: { 504 | id: "P1476", 505 | "data-type": "monolingualtext", 506 | }, 507 | value: { 508 | type: "value", 509 | content: { 510 | text: title, 511 | language: "en", 512 | }, 513 | }, 514 | qualifiers: [], 515 | references: [], 516 | }, 517 | ], 518 | }, 519 | }, 520 | }; 521 | 522 | const TestPayload = { 523 | item: { 524 | labels: { 525 | en: "test12", 526 | }, 527 | descriptions: { 528 | en: "test1123", 529 | }, 530 | statements: { 531 | P97012: [ 532 | { 533 | rank: "normal", 534 | property: { 535 | id: "P97012", 536 | }, 537 | value: { 538 | content: "hello 1", 539 | type: "value", 540 | }, 541 | qualifiers: [], 542 | references: [], 543 | }, 544 | ], 545 | }, 546 | }, 547 | tags: [], 548 | bot: false, 549 | comment: "Metadata updated by BUB2", 550 | }; 551 | 552 | const wikiDataAPI = await fetch( 553 | `${process.env.NEXT_PUBLIC_WIKIDATA_URL}/w/rest.php/wikibase/v0/entities/items`, 554 | { 555 | method: "POST", 556 | headers: { 557 | Authorization: 558 | "Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIzM2I4ODg5NTJhOGQ3ZWVjMTNiNmVkNjMxNmQ2YjdiMSIsImp0aSI6IjU1NjY1MTM4M2Y1NmIwZTQ4YWVlNWE4NGYxYmZlNjAzZWIyYzRlYTk5MWZhN2M4YzY1ZGZkMTMxMGRkMzYyMTNmNmM2N2FjNzQ0MDVmOGRhIiwiaWF0IjoxNzE3ODcwMjQ5LjI5Mzc0NywibmJmIjoxNzE3ODcwMjQ5LjI5Mzc1LCJleHAiOjE3MTc4ODQ2NDkuMjg0MTg3LCJzdWIiOiI0NDc1MDA3NiIsImlzcyI6Imh0dHBzOi8vbWV0YS53aWtpbWVkaWEub3JnIiwicmF0ZWxpbWl0Ijp7InJlcXVlc3RzX3Blcl91bml0Ijo1MDAwLCJ1bml0IjoiSE9VUiJ9LCJzY29wZXMiOlsiYmFzaWMiLCJlZGl0cGFnZSIsImNyZWF0ZWVkaXRtb3ZlcGFnZSIsInVwbG9hZGZpbGUiLCJ1cGxvYWRlZGl0bW92ZWZpbGUiLCJzZW5kZW1haWwiXX0.M3OHvdO37MjlcfqAVzLHuXNodO87BrgS5YjIZ5VJn9_Tp1oEvBBydnJH5wyJdxSfHCAay7c8NdBglbrNCTRdOnFAWw2LbfMK8D8W53x2ilFmgq7oXG3EMRICgztYgA0YUCHvbq2TlpnizfrMqVcSeiSidDUH9s1DiT2xce1110e5VfFTDh1l0YB3BGXPHNezEnXsaLm_90dobrZeSiW6T94CCwpQ7dy88SEOfYPjNLRUTTmeAlOgV1ogdoDkJVUumzCnBj-05l_GVbPCQ6VbV-m4aDurnBli2Fjj_Nl4CV8K14ce1HxSi8MuNgbZsSwpNm73PVqFF_0aqBquGURdw2ysep61_MaPxGY9suNTW3uZ8pVVAypbrLeI8aczIbepbc-Vf8k0gVJXaJzTOo_l-xRNAXOdTzMd-6dMnypk4u4o0SITPD1prO8_kzgKtSdAUrrEQZgoexg1RUWQvwdk2cSlwStnIUjY-5qY9g2Y-W2qQJXq4I1-UTF8NL5DPjTpfdl0Qm2BdUNWyvKtqxFBi_96g9lmO8-vFOcuCSiFPM2nY1dHcnGgh7pzqQYmEEJ1p1YXYsHB4_rHujNP0NrlkCOk_zCieL0pRhDS-qZLZnNwVBb1fZj6dlV260TSRwWHIgF1fjCC2uJVgMTpR2-IO2bJUJoUgbQE9tyMbrlqxOE", 559 | "Content-Type": "application/json", 560 | }, 561 | body: JSON.stringify(TestPayload), 562 | } 563 | ); 564 | console.log(metadata.oauthToken, "::oauthToken"); 565 | if (wikiDataAPI.status === 201) { 566 | console.log("success"); 567 | const data = await wikiDataAPI.json(); 568 | return data.id; 569 | } else { 570 | const errorData = await wikiDataAPI.json(); 571 | logger.log({ 572 | level: "error", 573 | message: `wikiDataAPIFailure (fetch):${JSON.stringify(errorData)}`, 574 | }); 575 | return 404; 576 | } 577 | } catch (error) { 578 | logger.log({ 579 | level: "error", 580 | message: `uploadToWikidata:${error}`, 581 | }); 582 | return 404; 583 | } 584 | }, 585 | }; 586 | -------------------------------------------------------------------------------- /server.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | const express = require("express"); 3 | const next = require("next"); 4 | const bodyParser = require("body-parser"); 5 | const cors = require("cors"); 6 | const open = require("open"); 7 | const compression = require("compression"); 8 | require("dotenv").config(); 9 | const dev = process.env.NODE_ENV !== "production"; 10 | const PORT = process.env.PORT || 5000; 11 | const GB_KEY = process.env.GB_KEY; 12 | const trove_key = process.env.trove_key; 13 | const winston = require("winston"); 14 | const cheerio = require("cheerio"); // Basically jQuery for node.js 15 | const app = next({ 16 | dev, 17 | }); 18 | 19 | const logger = winston.loggers.add("defaultLogger", { 20 | level: "info", 21 | format: winston.format.combine( 22 | winston.format.timestamp(), 23 | winston.format.json() 24 | ), 25 | transports: [ 26 | // 27 | // - Write all logs with importance level of `error` or less to `error.log` 28 | // - Write all logs with importance level of `info` or less to `combined.log` 29 | // 30 | new winston.transports.File({ filename: "error.log", level: "error" }), 31 | new winston.transports.File({ filename: "combined.log" }), 32 | ], 33 | }); 34 | 35 | const handle = app.getRequestHandler(); 36 | var emailaddr = ""; 37 | var authUserName = ""; 38 | const { 39 | customFetch, 40 | queueData, 41 | statusConfig, 42 | bookTitle, 43 | userNameLocation, 44 | getPreviewLink, 45 | jobData, 46 | checkForPublicDomain, 47 | checkIfFileExistsAtIA, 48 | replaceTitle, 49 | getPDLTitle, 50 | getPDLMetaData, 51 | } = require("./utils/helper.js"); 52 | const GoogleBooksProducer = require("./bull/google-books-queue/producer"); 53 | const PDLProducer = require("./bull/pdl-queue/producer"); 54 | const TroveProducer = require("./bull/trove-queue/producer"); 55 | const { exec } = require("child_process"); 56 | const config = require("./utils/bullconfig"); 57 | const _ = require("lodash"); 58 | 59 | app 60 | .prepare() 61 | .then(() => { 62 | const server = express(); 63 | 64 | //Parse application/x-www-form-urlencoded 65 | server.use( 66 | bodyParser.urlencoded({ 67 | extended: true, 68 | }) 69 | ); 70 | 71 | //Parse application/json 72 | server.use(bodyParser.json()); 73 | 74 | //Enable and use CORS 75 | server.use( 76 | cors({ 77 | credentials: true, 78 | origin: true, 79 | }) 80 | ); 81 | 82 | server.use(compression()); 83 | 84 | /** 85 | * Every custom route that we build needs to arrive before the * wildcard. 86 | * This is necessary because otherwise the server won't recognise the route. 87 | */ 88 | 89 | server.get("/getstats", async (req, res) => { 90 | const pdl_queue = config.getNewQueue("pdl-queue"); 91 | const google_books_queue = config.getNewQueue("google-books-queue"); 92 | const trove_queue = config.getNewQueue("trove-queue"); 93 | const pdl_queue_count = await pdl_queue.getJobCounts(); 94 | const google_books_queue_count = await google_books_queue.getJobCounts(); 95 | const trove_queue_count = await trove_queue.getJobCounts(); 96 | const queueStats = { 97 | pdl: pdl_queue_count, 98 | gb: google_books_queue_count, 99 | trove: trove_queue_count, 100 | }; 101 | const commonsRes = await customFetch( 102 | process.env.NEXT_PUBLIC_COMMONS_URL + 103 | "/w/api.php?action=query&prop=categoryinfo&titles=Category:Files_uploaded_with_BUB2&format=json", 104 | "GET" 105 | ); 106 | customFetch( 107 | `https://archive.org/advancedsearch.php?q=${process.env.IA_EMAIL}+&rows=0&output=json`, 108 | "GET" 109 | ).then((resp) => { 110 | if (resp && resp.response && resp.response.numFound) { 111 | const pages = commonsRes?.query?.pages; 112 | const page_no = pages[`${_.keys(pages)}`]; 113 | res.send({ 114 | queueStats: queueStats, 115 | totalUploadedCount: resp.response.numFound, 116 | commonsUploadedCount: page_no?.categoryinfo?.files 117 | ? page_no.categoryinfo.files 118 | : "0", 119 | }); 120 | } 121 | }); 122 | }); 123 | 124 | server.get("/getJobInformation", async (req, res) => { 125 | try { 126 | let queue, queueName; 127 | switch (req.query.queue_name) { 128 | case "gb": 129 | queue = config.getNewQueue("google-books-queue"); 130 | queueName = "Google Books"; 131 | break; 132 | 133 | case "pdl": 134 | queue = config.getNewQueue("pdl-queue"); 135 | queueName = "Panjab Digital Library"; 136 | break; 137 | 138 | case "trove": 139 | queue = config.getNewQueue("trove-queue"); 140 | queueName = "Trove Digital Library"; 141 | break; 142 | 143 | default: 144 | throw "Invalid queue"; 145 | } 146 | if (req.query.job_id) { 147 | const job = await queue.getJob(req.query.job_id); 148 | if (job) { 149 | const queue_data = await queueData(job, queue); 150 | const progress = job.progress().value 151 | ? `${job.progress().step}${job.progress().value}` 152 | : job.progress(); 153 | const jobState = await job.getState(); 154 | const book_id = job.data.details.id || job.data.details.bookID; 155 | const categoryID = job.data.details.categoryID; 156 | const trueURI = _.get(queue_data, "trueURI"); 157 | if (req.query.queue_name === "trove") { 158 | _.set( 159 | queue_data, 160 | "coverImage", 161 | "https://assets.nla.gov.au/logos/trove/trove-colour.svg" 162 | ); 163 | } 164 | function getUploadLink(job, trueURI) { 165 | if (job.progress().step) { 166 | const link = 167 | (job.progress().step.includes("Upload To IA") || 168 | job.progress().step.includes("Upload to Wikimedia")) && 169 | trueURI 170 | ? trueURI 171 | : ""; 172 | return link; 173 | } else { 174 | const link = job.progress() === 100 ? trueURI : ""; 175 | return link; 176 | } 177 | } 178 | const obj = { 179 | progress: progress, 180 | queueName: queueName, 181 | previewLink: getPreviewLink( 182 | req.query.queue_name, 183 | book_id, 184 | categoryID 185 | ), 186 | uploadStatus: { 187 | uploadLink: getUploadLink(job, trueURI), 188 | isUploaded: jobState === "completed" ? true : false, 189 | }, 190 | wikimedia_links: { 191 | commons: job.progress().wikiLinks?.commons 192 | ? job.progress().wikiLinks.commons 193 | : "Not Integrated", 194 | wikidata: job.progress().wikiLinks?.wikidata 195 | ? job.progress().wikiLinks.wikidata !== 404 196 | ? job.progress().wikiLinks.wikidata 197 | : "Not Integrated" 198 | : "Not Integrated", 199 | }, 200 | }; 201 | res.send( 202 | Object.assign( 203 | {}, 204 | _.pick(queue_data, [ 205 | "title", 206 | "description", 207 | "imageLinks", 208 | "coverImage", 209 | ]), 210 | obj 211 | ) 212 | ); 213 | } else { 214 | res.send({}); 215 | } 216 | } else { 217 | res.send({}); 218 | } 219 | } catch (err) { 220 | res.send({}); 221 | logger.log({ 222 | level: "error", 223 | message: `getJobInformation ${err}`, 224 | }); 225 | } 226 | }); 227 | 228 | server.get("/getJobProgress", async (req, res) => { 229 | let queue; 230 | switch (req.query.queue_name) { 231 | case "gb": 232 | queue = config.getNewQueue("google-books-queue"); 233 | break; 234 | 235 | case "pdl": 236 | queue = config.getNewQueue("pdl-queue"); 237 | queueName = "Panjab Digital Library"; 238 | break; 239 | 240 | case "trove": 241 | queue = config.getNewQueue("trove-queue"); 242 | queueName = "Trove Digital Library"; 243 | break; 244 | 245 | default: 246 | throw "Invalid queue"; 247 | } 248 | if (req.query.job_id) { 249 | const job = await queue.getJob(req.query.job_id); 250 | if (job) { 251 | return job.progress().value; 252 | } 253 | return null; 254 | } 255 | return null; 256 | }); 257 | 258 | server.get("/allJobs", async (req, res) => { 259 | String.prototype.capitalize = function () { 260 | return this.charAt(0).toUpperCase() + this.slice(1); 261 | }; 262 | 263 | const returnJobStatus = (failedReason, finishedOn, processedOn) => { 264 | if (failedReason) return `Failed! (Reason: ${failedReason})`; 265 | if (!finishedOn) finishedOn = null; 266 | if (!processedOn) processedOn = null; 267 | const sum = processedOn + finishedOn; 268 | return statusConfig(processedOn, sum)[sum]; 269 | }; 270 | 271 | try { 272 | let queue; 273 | switch (req.query.queue_name) { 274 | case "gb": 275 | queue = config.getNewQueue("google-books-queue"); 276 | break; 277 | 278 | case "pdl": 279 | queue = config.getNewQueue("pdl-queue"); 280 | break; 281 | 282 | case "trove": 283 | queue = config.getNewQueue("trove-queue"); 284 | break; 285 | 286 | default: 287 | throw "Invalid queue"; 288 | } 289 | queue 290 | .getJobs([ 291 | "active", 292 | "waiting", 293 | "completed", 294 | "failed", 295 | "delayed", 296 | "paused", 297 | ]) 298 | .then((jobs) => { 299 | let filteredJobs = jobs.map((job) => { 300 | let date = new Date(job.timestamp); 301 | let userName = _.get( 302 | job.data, 303 | userNameLocation[req.query.queue_name] 304 | ); 305 | return { 306 | id: Number(job.id), 307 | title: _.get(job.data.details, bookTitle[req.query.queue_name]), 308 | userName: userName ? userName : "-", 309 | timestamp: 310 | date.getUTCFullYear() + 311 | "-" + 312 | parseInt(date.getUTCMonth() + 1) 313 | .toString() 314 | .padStart(2, "0") + 315 | "-" + 316 | date.getUTCDate().toLocaleString(undefined, { 317 | minimumIntegerDigits: 2, 318 | }) + 319 | " " + 320 | date.getUTCHours() + 321 | ":" + 322 | date.getUTCMinutes().toLocaleString(undefined, { 323 | minimumIntegerDigits: 2, 324 | }) + 325 | " (UTC)", 326 | upload_progress: job.progress().step 327 | ? `${job.progress().step}:${job.progress().value}` 328 | : `${job.progress()}%`, 329 | status: returnJobStatus( 330 | job.failedReason, 331 | job.finishedOn, 332 | job.processedOn 333 | ), 334 | wikimedia_links: job.progress().wikiLinks?.commons 335 | ? job.progress().wikiLinks.commons 336 | : "Not Integrated", 337 | }; 338 | }); 339 | res.send(_.orderBy(filteredJobs, "id", "desc")); 340 | }) 341 | .catch((err) => { 342 | res.send([]); 343 | logger.log({ 344 | level: "error", 345 | message: `allJobs getJobs ${err}`, 346 | }); 347 | }); 348 | } catch (err) { 349 | res.send([]); 350 | logger.log({ 351 | level: "error", 352 | message: `allJobs ${err}`, 353 | }); 354 | } 355 | }); 356 | 357 | server.get("/getqueue", async (req, res) => { 358 | const pdl_queue = await config.getNewQueue("pdl-queue"); 359 | const google_books_queue = await config.getNewQueue("google-books-queue"); 360 | const trove_queue = await config.getNewQueue("trove-queue"); 361 | const commons_queue = await config.getNewQueue("commons-queue"); 362 | 363 | const queryParams = { 364 | "gb-queue": { 365 | active: "", 366 | waiting: "", 367 | }, 368 | "pdl-queue": { 369 | active: "", 370 | waiting: "", 371 | }, 372 | "trove-queue": { 373 | active: "", 374 | waiting: "", 375 | }, 376 | "commons-queue": { 377 | active: "", 378 | waiting: "", 379 | }, 380 | }; 381 | const pdlqueue_active_job = await pdl_queue.getActive(0, 0); 382 | const pdlqueue_waiting_job = await pdl_queue.getWaiting(0, 0); 383 | 384 | const gbqueue_active_job = await google_books_queue.getActive(0, 0); 385 | const gbqueue_waiting_job = await google_books_queue.getWaiting(0, 0); 386 | 387 | const trovequeue_active_job = await trove_queue.getActive(0, 0); 388 | const trovequeue_waiting_job = await trove_queue.getWaiting(0, 0); 389 | 390 | const commonsqueue_active_job = await commons_queue.getActive(0, 0); 391 | const commonsqueue_waiting_job = await commons_queue.getWaiting(0, 0); 392 | 393 | queryParams["pdl-queue"]["active"] = jobData( 394 | pdlqueue_active_job[0], 395 | "pdl" 396 | ); 397 | queryParams["pdl-queue"]["waiting"] = jobData( 398 | pdlqueue_waiting_job[0], 399 | "pdl" 400 | ); 401 | 402 | queryParams["gb-queue"]["active"] = jobData(gbqueue_active_job[0], "gb"); 403 | queryParams["gb-queue"]["waiting"] = jobData( 404 | gbqueue_waiting_job[0], 405 | "gb" 406 | ); 407 | 408 | queryParams["trove-queue"]["active"] = jobData( 409 | trovequeue_active_job[0], 410 | "trove" 411 | ); 412 | queryParams["trove-queue"]["waiting"] = jobData( 413 | trovequeue_waiting_job[0], 414 | "trove" 415 | ); 416 | 417 | queryParams["commons-queue"]["active"] = jobData( 418 | commonsqueue_active_job[0], 419 | "commons" 420 | ); 421 | queryParams["commons-queue"]["waiting"] = jobData( 422 | commonsqueue_waiting_job[0], 423 | "commons" 424 | ); 425 | res.send(queryParams); 426 | }); 427 | 428 | let GBdetails = {}; 429 | let GBreq; 430 | let GBcommonsMetaData; 431 | const isAlphanumericLess50 = /^[a-zA-Z0-9]{1,50}$/; 432 | server.post("/check", async (req, res) => { 433 | const { 434 | bookid, 435 | 436 | option, 437 | 438 | email, 439 | 440 | userName, 441 | 442 | IAtitle, 443 | isEmailNotification, 444 | 445 | isUploadCommons, 446 | oauthToken, 447 | } = req.query; 448 | const commonsMetadata = req.body.commonsMetadata; 449 | emailaddr = email; 450 | authUserName = userName; 451 | switch (option) { 452 | case "gb": 453 | customFetch( 454 | `https://www.googleapis.com/books/v1/volumes/${bookid}?key=${GB_KEY}`, 455 | "GET", 456 | new Headers({ 457 | "Content-Type": "application/json", 458 | }) 459 | ).then(async (data) => { 460 | const { error } = checkForPublicDomain(data, res); 461 | if (!error) { 462 | const titleInIA = 463 | IAtitle.trim() !== "" 464 | ? replaceTitle(IAtitle.trim()) 465 | : replaceTitle(data.volumeInfo.title); 466 | if (isAlphanumericLess50.test(titleInIA) === false) { 467 | res.send({ 468 | isInValidIdentifier: true, 469 | titleInIA, 470 | }); 471 | } else if ((await checkIfFileExistsAtIA(titleInIA)) === true) { 472 | res.send({ 473 | isDuplicate: true, 474 | titleInIA, 475 | }); 476 | } else { 477 | GBdetails = data; 478 | GBreq = req; 479 | GBcommonsMetaData = commonsMetadata; 480 | res.send({ 481 | error: false, 482 | message: "In public domain.", 483 | url: data.accessInfo.pdf.downloadLink, 484 | title: data.volumeInfo.title, 485 | IAIdentifier: titleInIA, 486 | }); 487 | } 488 | } 489 | }); 490 | break; 491 | 492 | case "obp": 493 | res.send({ 494 | error: false, 495 | message: "You will be mailed with the details soon!", 496 | }); 497 | 498 | case "pn": 499 | //Check for duplicates 500 | const { categoryID } = req.query; 501 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookid}&page=1&CategoryID=${categoryID}&Searched=W3GX`; 502 | var options = { 503 | uri, 504 | transform: function (body) { 505 | return cheerio.load(body); 506 | }, 507 | }; 508 | const titleInIA = 509 | IAtitle.trim() !== "" 510 | ? replaceTitle(IAtitle.trim()) 511 | : replaceTitle(await getPDLTitle(options)); 512 | if (titleInIA === "") { 513 | res.send({ 514 | error: true, 515 | message: "Not able to fetch title.", 516 | }); 517 | } else if (isAlphanumericLess50.test(titleInIA) === false) { 518 | res.send({ 519 | isInValidIdentifier: true, 520 | titleInIA, 521 | }); 522 | } else { 523 | if ((await checkIfFileExistsAtIA(titleInIA)) === true) { 524 | res.send({ 525 | isDuplicate: true, 526 | titleInIA, 527 | }); 528 | } else { 529 | res.send({ 530 | error: false, 531 | message: "You will be mailed with the details soon!", 532 | }); 533 | PDLProducer( 534 | bookid, 535 | titleInIA, 536 | categoryID, 537 | email, 538 | authUserName, 539 | isEmailNotification, 540 | isUploadCommons, 541 | oauthToken, 542 | commonsMetadata 543 | ); 544 | } 545 | } 546 | // const isDuplicate = checkForDuplicatesFromIA(`bub_pn_${bookid}`); 547 | // isDuplicate.then(resp => { 548 | // if (resp.response.numFound != 0) { 549 | // res.send({ 550 | // error: true, 551 | // message: "The document already exists on Internet Archive." 552 | // }) 553 | // } 554 | // else { 555 | 556 | // } 557 | // }) 558 | break; 559 | 560 | case "trove": 561 | customFetch( 562 | `https://api.trove.nla.gov.au/v2/newspaper/${bookid}?key=${trove_key}&encoding=json&reclevel=full`, 563 | "GET", 564 | new Headers({ 565 | "Content-Type": "application/json", 566 | }) 567 | ).then(async (data) => { 568 | if (data === 404) { 569 | res.send({ 570 | error: true, 571 | message: "Invalid Newspaper/Gazette ID", 572 | }); 573 | } else { 574 | const name = _.get(data, "article.title.value"); 575 | const titleInIA = 576 | IAtitle.trim() !== "" 577 | ? replaceTitle(IAtitle.trim()) 578 | : replaceTitle(name); 579 | if (isAlphanumericLess50.test(titleInIA) === false) { 580 | res.send({ 581 | isInValidIdentifier: true, 582 | titleInIA, 583 | }); 584 | } else if ((await checkIfFileExistsAtIA(titleInIA)) === true) { 585 | res.send({ 586 | isDuplicate: true, 587 | titleInIA, 588 | }); 589 | } else { 590 | troveUrl = `https://trove.nla.gov.au/ndp/del/title/${data.article.title.id}`; 591 | const id = _.get(data, "article.title.id"); 592 | const date = _.get(data, "article.date"); 593 | const troveData = { 594 | id, 595 | name, 596 | troveUrl, 597 | date, 598 | }; 599 | res.send({ 600 | error: false, 601 | message: "You will be mailed with the details soon!", 602 | }); 603 | TroveProducer( 604 | bookid, 605 | 606 | titleInIA, 607 | 608 | troveData, 609 | 610 | email, 611 | 612 | userName, 613 | isEmailNotification, 614 | isUploadCommons, 615 | oauthToken, 616 | commonsMetadata 617 | ); 618 | } 619 | } 620 | }); 621 | break; 622 | } 623 | }); 624 | server.get("/checkPublicDomain", async (req, res) => { 625 | const { bookid } = req.query; 626 | customFetch( 627 | `https://www.googleapis.com/books/v1/volumes/${bookid}?key=${GB_KEY}`, 628 | "GET", 629 | new Headers({ 630 | "Content-Type": "application/json", 631 | }) 632 | ).then(async (data) => { 633 | const { error } = checkForPublicDomain(data, res); 634 | if (error === false) { 635 | res.send({ error: false }); 636 | } 637 | }); 638 | }); 639 | 640 | server.get("/checkEmailableStatus", async (req, res) => { 641 | const { username } = req.query; 642 | const usersQuery = await customFetch( 643 | process.env.NEXT_PUBLIC_WIKIMEDIA_URL + 644 | `/w/api.php?action=query&list=users&ususers=${username}&usprop=emailable&format=json`, 645 | "GET" 646 | ); 647 | const emailableStatus = 648 | usersQuery?.query?.users[0]?.emailable === undefined ? false : true; 649 | res.send(emailableStatus); 650 | }); 651 | 652 | server.get("/getMetadata", async (req, res) => { 653 | const { option, bookID, categoryID, IAIdentifier } = req.query; 654 | switch (option) { 655 | case "gb": 656 | const gbRes = await customFetch( 657 | `https://www.googleapis.com/books/v1/volumes/${bookID}?key=${GB_KEY}`, 658 | "GET" 659 | ); 660 | res.send(gbRes); 661 | break; 662 | case "trove": 663 | const troveRes = await customFetch( 664 | `https://api.trove.nla.gov.au/v2/newspaper/${bookID}?key=${trove_key}&encoding=json&reclevel=full`, 665 | "GET" 666 | ); 667 | res.send(troveRes); 668 | break; 669 | case "pdl": 670 | const uri = `http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=${bookID}&page=1&CategoryID=${categoryID}&Searched=W3GX`; 671 | var options = { 672 | uri, 673 | transform: function (body) { 674 | return cheerio.load(body); 675 | }, 676 | }; 677 | const pdlRes = await getPDLMetaData(options, bookID, categoryID); 678 | const titleInIA = 679 | IAIdentifier?.trim() !== "" 680 | ? replaceTitle(IAIdentifier?.trim()) 681 | : replaceTitle(await getPDLTitle(options)); 682 | pdlRes.IAIdentifier = titleInIA; 683 | res.send(pdlRes); 684 | break; 685 | } 686 | }); 687 | 688 | server.post("/webhook", async (req, res) => { 689 | exec( 690 | "cd www/js; git pull origin master; yes | npm install; webservice --backend kubernetes node16 restart", 691 | (err, stdout, stderr) => { 692 | if (err) { 693 | logger.log({ 694 | level: "error", 695 | message: `webhook err ${err}`, 696 | }); 697 | } else if (stderr) { 698 | logger.log({ 699 | level: "error", 700 | message: `webhook stderr ${stderr}`, 701 | }); 702 | } else { 703 | logger.log({ 704 | level: "info", 705 | message: `webhook ${stdout}`, 706 | }); 707 | } 708 | } 709 | ); 710 | res.send(); 711 | }); 712 | 713 | server.post("/download", async (req, res) => { 714 | const regex = /https:\/\/books\.googleusercontent\.com\/books\/content\?req=*/; 715 | if (regex.test(req.body.url)) { 716 | res.send({ 717 | error: false, 718 | message: "You will be mailed with the details soon!", 719 | }); 720 | GoogleBooksProducer( 721 | req.body.url, 722 | req.body.titleInIA, 723 | GBdetails, 724 | emailaddr, 725 | authUserName, 726 | GBreq.query.isEmailNotification, 727 | GBreq.query.isUploadCommons, 728 | GBreq.query.oauthToken, 729 | GBcommonsMetaData 730 | ); 731 | } else { 732 | res.send({ 733 | error: true, 734 | message: "Invalid URL.", 735 | }); 736 | } 737 | }); 738 | 739 | /** 740 | * The express handler for default routes. 741 | */ 742 | server.get("*", (req, res) => { 743 | return handle(req, res); 744 | }); 745 | 746 | /** 747 | * The express handler for default POST routes (for next-auth) 748 | */ 749 | server.post("*", (req, res) => { 750 | return handle(req, res); 751 | }); 752 | 753 | server.listen(PORT, (err) => { 754 | if (err) throw err; 755 | if (dev) { 756 | (async () => { 757 | await open(`http://localhost:${PORT}/`); 758 | })(); 759 | } 760 | }); 761 | }) 762 | .catch((ex) => { 763 | console.error(ex.stack); 764 | process.exit(1); 765 | }); 766 | -------------------------------------------------------------------------------- /components/Books.js: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from "react"; 2 | import Swal from "sweetalert2"; 3 | import { host } from "../utils/constants"; 4 | import { useSession, signIn } from "next-auth/react"; 5 | import ChangeIdentifier from "./ChangeIdentifier"; 6 | import useMetadataForUI from "../hooks/useMetadataForUI"; 7 | import BooksWrapper from "./BooksWrapper"; 8 | import { Box, Tooltip } from "@mui/material"; 9 | 10 | const Books = () => { 11 | const { data: session } = useSession(); 12 | const [option, setOption] = useState("gb"); 13 | const [bookid, setBookId] = useState(""); 14 | const [email, setEmail] = useState(""); 15 | const [loader, setLoader] = useState(false); 16 | const [isDuplicate, setIsDuplicate] = useState(false); 17 | const [isInValidIdentifier, setIsInValidIdentifier] = useState(false); 18 | const [isEmailNotification, setIsEmailNotification] = useState(false); 19 | const [isUploadCommons, setIsUploadCommons] = useState(false); 20 | const [IATitle, setIATitle] = useState(""); 21 | const [IAIdentifier, setIAIdentifier] = useState(""); 22 | const [inputDisabled, setInputDisabled] = useState(false); 23 | const [isUserEmailable, setIsUserEmailable] = useState(false); 24 | const [isCommonsMetadataReady, setIsCommonsMetadataReady] = useState(false); 25 | const [hasCommonsMetadataUpdated, setHasCommonsMetadataUpdated] = useState( 26 | false 27 | ); 28 | const [commonsMetadata, setCommonsMetadata] = useState(); 29 | const { getMetadataForUI } = useMetadataForUI(); 30 | 31 | const handleChange = (event) => { 32 | setOption(event.target.value); 33 | setBookId(""); 34 | setIsDuplicate(false); 35 | setIsEmailNotification(false); 36 | setIsInValidIdentifier(false); 37 | setIATitle(""); 38 | setIAIdentifier(""); 39 | setInputDisabled(false); 40 | setIsUploadCommons(false); 41 | }; 42 | 43 | const onResetButtonClicked = () => { 44 | setIsDuplicate(false); 45 | setIsEmailNotification(false); 46 | setIsInValidIdentifier(false); 47 | setInputDisabled(false); 48 | setIATitle(""); 49 | setIAIdentifier(""); 50 | setIsUploadCommons(false); 51 | setIsCommonsMetadataReady(false); 52 | setHasCommonsMetadataUpdated(false); 53 | }; 54 | 55 | const onSwalClosed = () => { 56 | setInputDisabled(false); 57 | setIAIdentifier(""); 58 | setIATitle(""); 59 | }; 60 | const renderContent = (option) => { 61 | switch (option) { 62 | case "gb": 63 | return ( 64 | <> 65 |

2. Enter Google Books ID

66 |
67 | 68 | https://books.google.co.in/books?id= 69 | 70 | setBookId(event.target.value)} 79 | aria-describedby="bid" 80 | /> 81 |
82 | 83 | ); 84 | case "pn": 85 | return ( 86 | <> 87 |

2. Enter URI

88 |
89 | setBookId(event.target.value)} 96 | required 97 | placeholder="http://www.panjabdigilib.org/webuser/searches/displayPage.jsp?ID=9073&page=1&CategoryID=1&Searched=" 98 | /> 99 |
100 | 101 | ); 102 | case "trove": 103 | return ( 104 | <> 105 |

2. Enter Newspaper/Gazette Article ID

106 |
107 | 108 | https://trove.nla.gov.au/newspaper/article/ 109 | 110 | setBookId(event.target.value)} 118 | required 119 | aria-describedby="bid" 120 | /> 121 |
122 | 123 | ); 124 | } 125 | }; 126 | 127 | const isPDLValidUrl = (urlString) => { 128 | var urlPattren = new RegExp( 129 | "((http|https)\\:\\/\\/)(www.)?(panjabdigilib\\.org\\/webuser\\/searches\\/displayPage\\.jsp\\?ID\\=)([0-9]*)(\\&page\\=)([0-9]*)(\\&CategoryID\\=)([0-9]*)(\\&Searched\\=)([a-zA-Z0-9@:%._+~#?&//=]*)" 130 | ); 131 | return urlPattren.test(urlString); 132 | }; 133 | 134 | const checkEmailableStatus = async (username) => { 135 | const response = await fetch( 136 | `${host}/checkEmailableStatus?username=${username}` 137 | ); 138 | const isEmailable = await response.json(); 139 | return isEmailable; 140 | }; 141 | 142 | const onSubmit = async (event) => { 143 | event?.preventDefault(); 144 | 145 | if (!session.user.name || session.user.name === "") { 146 | Swal("Error!", "Log in with Wikimedia to continue", "error"); 147 | return; 148 | } 149 | 150 | setLoader(true); 151 | setIsDuplicate(false); 152 | setIsInValidIdentifier(false); 153 | 154 | let url = ""; 155 | switch (option) { 156 | case "gb": 157 | if (isUploadCommons && !hasCommonsMetadataUpdated) { 158 | const checkPublicDomainURL = `${host}/checkPublicDomain?bookid=${bookid}`; 159 | const checkPublicDomainRes = await fetch(checkPublicDomainURL); 160 | const checkPublicDomainStatus = await checkPublicDomainRes.json(); 161 | if (checkPublicDomainStatus.error === false) { 162 | const commonsMetadata = await getMetadataForUI("gb", bookid); 163 | setCommonsMetadata(commonsMetadata); 164 | setIsCommonsMetadataReady(true); 165 | } else { 166 | Swal("Error!", checkPublicDomainStatus.message, "error"); 167 | setLoader(false); 168 | } 169 | } else { 170 | url = `${host}/check?bookid=${bookid}&option=${ 171 | option + (email ? "&email=" + email : "") 172 | }&userName=${ 173 | session?.user?.name 174 | }&IAtitle=${IAIdentifier}&isEmailNotification=${isEmailNotification}&isUploadCommons=${isUploadCommons}&oauthToken=${ 175 | session?.accessToken 176 | }`; 177 | fetch(url, { 178 | method: "POST", 179 | headers: { 180 | "Content-Type": "application/json", 181 | }, 182 | body: JSON.stringify({ 183 | commonsMetadata: commonsMetadata, 184 | }), 185 | }) 186 | .then((response) => response.json()) 187 | .then(async (response) => { 188 | setLoader(false); 189 | if (response.isDuplicate) { 190 | setIsDuplicate(true); 191 | setIATitle(response.titleInIA); 192 | setInputDisabled(true); 193 | } else if (response.isInValidIdentifier) { 194 | setIsInValidIdentifier(true); 195 | setIATitle(response.titleInIA); 196 | setInputDisabled(true); 197 | } else { 198 | if (response.error) { 199 | Swal("Error!", response.message, "error"); 200 | } else { 201 | setIsCommonsMetadataReady(false); 202 | const { value: url } = await Swal({ 203 | input: "url", 204 | backdrop: true, 205 | width: "50%", 206 | allowEscapeKey: false, 207 | allowOutsideClick: false, 208 | showCloseButton: true, 209 | onClose: onSwalClosed, 210 | title: 'Just a few more steps...', 211 | html: 212 | `
    ` + 213 | `
  1. Go to this link: ${response.title}
  2. ` + 214 | `
  3. Enter the captcha.
  4. ` + 215 | `
  5. Enter the URL below (https://books.googleusercontent.com/books/content?req=xxx)
  6. `, 216 | }); 217 | 218 | if (url && typeof url !== "object") { 219 | setLoader(true); 220 | fetch(`${host}/download`, { 221 | body: JSON.stringify({ 222 | url: url, 223 | titleInIA: response.IAIdentifier, 224 | }), 225 | headers: { 226 | "Content-Type": "application/json", 227 | "Access-Control-Allow-Origin": "*", 228 | }, 229 | method: "POST", 230 | }) 231 | .then((response) => response.json()) 232 | .then((response) => { 233 | setLoader(false); 234 | if (response.error) 235 | Swal("Error!", response.message, "error"); 236 | else Swal("Voila!", response.message, "success"); 237 | }); 238 | } 239 | } 240 | } 241 | }); 242 | } 243 | 244 | break; 245 | case "pn": 246 | if (isPDLValidUrl(bookid)) { 247 | const searchParams = new URL(bookid).searchParams; 248 | const ID = searchParams.get("ID"); 249 | const categoryID = searchParams.get("CategoryID"); 250 | if (isUploadCommons && !hasCommonsMetadataUpdated) { 251 | const pdlMetadata = await getMetadataForUI( 252 | "pdl", 253 | ID, 254 | categoryID, 255 | IAIdentifier 256 | ); 257 | setCommonsMetadata(pdlMetadata); 258 | setIsCommonsMetadataReady(true); 259 | } else { 260 | url = `${host}/check?bookid=${ID}&option=${ 261 | option + (email ? "&email=" + email : "") 262 | }&categoryID=${categoryID}&userName=${ 263 | session.user.name 264 | }&IAtitle=${IAIdentifier}&isEmailNotification=${isEmailNotification}&isUploadCommons=${isUploadCommons}&oauthToken=${ 265 | session?.accessToken 266 | }`; 267 | fetch(url, { 268 | method: "POST", 269 | headers: { 270 | "Content-Type": "application/json", 271 | }, 272 | body: JSON.stringify({ 273 | commonsMetadata: commonsMetadata, 274 | }), 275 | }) 276 | .then((res) => res.json()) 277 | .then((response) => { 278 | setLoader(false); 279 | if (response.isDuplicate) { 280 | setIsDuplicate(true); 281 | setIATitle(response.titleInIA); 282 | setInputDisabled(true); 283 | } else if (response.isInValidIdentifier) { 284 | setIsInValidIdentifier(true); 285 | setIATitle(response.titleInIA); 286 | setInputDisabled(true); 287 | } else { 288 | if (response.error) { 289 | Swal("Error!", response.message, "error"); 290 | } else { 291 | setIsCommonsMetadataReady(false); 292 | Swal("Voila!", response.message, "success"); 293 | } 294 | } 295 | }); 296 | } 297 | } else { 298 | setLoader(false); 299 | Swal("Opps...", "Enter a valid URL", "error"); 300 | } 301 | break; 302 | case "trove": 303 | if (isUploadCommons && !hasCommonsMetadataUpdated) { 304 | const commonsMetadata = await getMetadataForUI("trove", bookid); 305 | setCommonsMetadata(commonsMetadata); 306 | setIsCommonsMetadataReady(true); 307 | } else { 308 | url = `${host}/check?bookid=${bookid}&option=${ 309 | option + (email ? "&email=" + email : "") 310 | }&userName=${ 311 | session.user.name 312 | }&IAtitle=${IAIdentifier}&isUploadCommons=${isUploadCommons}&oauthToken=${ 313 | session?.accessToken 314 | }&isEmailNotification=${isEmailNotification}`; 315 | fetch(url, { 316 | method: "POST", 317 | headers: { 318 | "Content-Type": "application/json", 319 | }, 320 | body: JSON.stringify({ 321 | commonsMetadata: commonsMetadata, 322 | }), 323 | }) 324 | .then((res) => res.json()) 325 | .then((response) => { 326 | setLoader(false); 327 | if (response.isDuplicate) { 328 | setIsDuplicate(true); 329 | setIATitle(response.titleInIA); 330 | setInputDisabled(true); 331 | } else if (response.isInValidIdentifier) { 332 | setIsInValidIdentifier(true); 333 | setIATitle(response.titleInIA); 334 | setInputDisabled(true); 335 | } else { 336 | if (response.error) { 337 | Swal("Error!", response.message, "error"); 338 | } else { 339 | setIsCommonsMetadataReady(false); 340 | Swal("Voila!", response.message, "success"); 341 | } 342 | } 343 | }); 344 | } 345 | 346 | break; 347 | } 348 | }; 349 | 350 | useEffect(async () => { 351 | const isEmailable = await checkEmailableStatus(session?.user?.name); 352 | setIsUserEmailable(isEmailable); 353 | }, [session]); 354 | 355 | useEffect(() => { 356 | window.scrollTo({ top: 0, behavior: "smooth" }); 357 | if ( 358 | hasCommonsMetadataUpdated && 359 | isUploadCommons && 360 | isCommonsMetadataReady 361 | ) { 362 | onSubmit(null, session.user.name); 363 | } 364 | if (isUploadCommons === false && isCommonsMetadataReady) { 365 | onResetButtonClicked(); 366 | setLoader(false); 367 | } 368 | }, [hasCommonsMetadataUpdated, isUploadCommons]); 369 | 370 | return ( 371 | 372 | 373 | 384 |

    Book Uploader Bot

    385 |
    386 | 387 | Upload books, newspapers, magazines etc. from public libraries to 388 | Internet Archive and Wikimedia Commons. 389 | 390 |
    391 |
    onSubmit(e, session.user.name)}> 392 |
    393 |

    1. Select a library

    394 | 401 |
    402 |
    {renderContent(option)}
    403 | 404 |
    405 |

    3. Upload Preferences

    406 | 407 |
    408 | 409 | 416 | setIsUploadCommons(event.target.checked) 417 | } 418 | /> 419 | 420 | 421 |
    425 | 431 | 432 | 437 | BUB2 will also upload book and metadata to Commons 438 | 439 | } 440 | > 441 | 445 | 446 | 447 | 448 |
    449 |
    450 |
    451 | {isUploadCommons ? ( 452 |
    457 | 458 |
    459 | [NEW] BUB2 will create Wikidata entries for the file 460 | metadata. Only applicable for Google Books. 461 |
    462 |
    463 | ) : null} 464 |
    465 | 466 | 472 | setIsEmailNotification(event.target.checked) 473 | } 474 | disabled={!isUserEmailable} 475 | title={ 476 | isUserEmailable 477 | ? "" 478 | : "No email associated with this user account or the user has disabled email access." 479 | } 480 | /> 481 | 482 |
    490 | 496 |
    497 |
    498 | 499 | {isEmailNotification && ( 500 | 504 |

    505 | 506 |   BUB2 will send an email to your email ID associated 507 | with your Wikimedia account regarding the success or 508 | failure of the upload. 509 |

    510 |
    511 | )} 512 |
    513 |
    514 | {isDuplicate ? ( 515 | 518 | A file with this identifier{" "} 519 | 520 | (https://archive.org/{IATitle}) 521 | {" "} 522 | already exists at Internet Archive. Please enter a different 523 | identifier to proceed. 524 | 525 | } 526 | inputPlaceholder="Enter unique file identifier" 527 | onIdentifierChange={(event) => 528 | setIAIdentifier(event.target.value) 529 | } 530 | /> 531 | ) : null} 532 | 533 | {isInValidIdentifier === true ? ( 534 | 537 | The file you want to upload with title - {IATitle} either 538 | contains special characters or exceeds 50 characters in 539 | length. Please provide an identifier that consists only of 540 | letters (A-Z) and numbers (0-9). 541 | 542 | } 543 | inputPlaceholder="Enter valid identifier" 544 | onIdentifierChange={(event) => 545 | setIAIdentifier(event.target.value) 546 | } 547 | /> 548 | ) : null} 549 | 550 | {session && ( 551 |
    552 |
    559 | 571 | {isDuplicate === true || isInValidIdentifier === true ? ( 572 | 584 | ) : null} 585 |
    586 |
    587 | )} 588 | {!session && ( 589 |
    590 |
    591 | 592 | Upload restricted. Login with Wikimedia Account to continue. 593 | 594 |
    595 | 609 |
    610 | )} 611 | 612 | {loader && (!isCommonsMetadataReady || hasCommonsMetadataUpdated) ? ( 613 |
    614 | 615 | Fetching information. Please wait.. 616 | 617 |
    623 |
    624 |
    625 |
    626 | ) : null} 627 | 628 | 629 | {isCommonsMetadataReady && ( 630 | 641 |
    { 643 | e.preventDefault(); 644 | setHasCommonsMetadataUpdated(true); 645 | }} 646 | > 647 |