├── .eslintignore ├── .eslintrc.json ├── .github └── ISSUE_TEMPLATE │ ├── add_quotes.md │ ├── edit_quotes.md │ └── remove_quotes.md ├── .gitignore ├── .npmignore ├── .prettierrc.json ├── .vscode ├── launch.json └── settings.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── README.md ├── cli ├── addAuthors │ ├── README.md │ ├── index.js │ ├── logResults.js │ └── validation.js ├── addQuotes │ ├── README.md │ ├── index.js │ ├── logResults.js │ └── validation.js ├── addTags │ ├── README.md │ ├── index.js │ ├── logResults.js │ └── validation.js ├── build │ ├── README.md │ ├── index.js │ └── transforms.js ├── checkForDuplicates │ ├── README.md │ ├── index.js │ └── logResults.js ├── sample │ ├── README.md │ └── index.js ├── sync │ ├── README.md │ ├── db.js │ ├── help.js │ └── index.js ├── updateTags │ ├── index.js │ └── logResults.js └── validate │ ├── README.md │ └── index.js ├── config.js ├── data ├── authors.json ├── index.js ├── quotes.json └── tags.json ├── lib ├── Spinner.js ├── Table.js ├── __tests__ │ ├── ajv.test.js │ └── isEqual.js ├── ajv.js ├── delay.js ├── findAuthorByName.js ├── findQuoteByContent.js ├── findQuotesByAuthor.js ├── findQuotesByTag.js ├── isEqual.js ├── isJSONFile.js ├── log.js ├── logJSONTable.js ├── object.js ├── optionsTable.js ├── parseContent.js ├── parseDataFiles.js ├── parseFile.js ├── progressBar.js ├── run.js ├── selectInput.js ├── titleCase.js ├── truncate.js ├── wiki │ ├── __fixtures__ │ │ ├── example-page.mjson │ │ └── example-search-results.mjson │ ├── __tests__ │ │ ├── __snapshots__ │ │ │ └── extractAuthorDetails.test.mjs.snap │ │ ├── api.test.js │ │ ├── extractAuthorDetails.test.js │ │ ├── findAuthorByName.test.js │ │ └── getImageInfo.test.js │ ├── api.js │ ├── cache.js │ ├── extractAuthorDetails.js │ ├── findAuthorByName.js │ ├── getImageInfo.js │ ├── index.js │ └── rateLimit.js └── writeJSONFiles.js ├── package-lock.json ├── package.json └── schema ├── models ├── Author.js ├── Quote.js └── Tag.js └── types.js /.eslintignore: -------------------------------------------------------------------------------- 1 | /.vscode 2 | /.git 3 | /data 4 | /docs 5 | /private 6 | /temp 7 | /input 8 | /.cache 9 | __tests__ 10 | node_modules 11 | **/*.spec.js 12 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["airbnb-base", "prettier"], 3 | "parserOptions": { 4 | "ecmaVersion": 2022, 5 | "sourceType": "module" 6 | }, 7 | "env": { 8 | "node": true, 9 | "browser": false 10 | }, 11 | "rules": { 12 | "import/prefer-default-export": 0, 13 | "import/extensions": [2, "always"], 14 | "arrow-body-style": 0, 15 | // "no-unused-vars": "warn", 16 | "consistent-return": 0, 17 | "radix": 0, 18 | "no-console": 0, 19 | "no-underscore-dangle": 0, 20 | "no-restricted-syntax": 0, 21 | "no-await-in-loop": 0, 22 | "array-callback-return": 0 23 | }, 24 | "globals": { 25 | "Promise": true, 26 | "describe": true, 27 | "test": true, 28 | "it": true, 29 | "beforeEach": true, 30 | "beforeAll": true, 31 | "afterEach": true, 32 | "afterAll": true, 33 | "expect": true 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/add_quotes.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Add New Quotes 3 | about: Use this issue template to submit new quotes that you would like to add to the collection. 4 | title: 'data: new quotes' 5 | labels: 'data: new quotes' 6 | assignees: '' 7 | 8 | --- 9 | 10 | I would like to add the following quotes 11 | 12 | ### Checklist 13 | - [ ] I proofread these quotes for spelling and grammar 14 | - [ ] These quotes do not violate the content guidelines 15 | - [ ] The quotes are in the correct format 16 | 17 | ### List of Quotes 18 | 19 | ```jsonc 20 | // Replace this example with your list of quotes 21 | [ 22 | { 23 | "content": "a house divided will not stand", 24 | "author": "Abraham Lincoln", 25 | "tags": ["history", "politics", "famous-quotes"] 26 | } 27 | ] 28 | ``` 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/edit_quotes.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Edit Quotes 3 | about: Suggest changes to existing quotes to improve grammar, accuracy, etc. 4 | title: 'data: edit quotes' 5 | labels: 'data: edit quotes' 6 | assignees: '' 7 | --- 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/remove_quotes.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Remove Quotes 3 | about: Report quotes that violate the content guidelines 4 | title: 'data: remove quotes' 5 | labels: 'data: remove quotes' 6 | assignees: '' 7 | --- 8 | 9 | Please list the quotes that you would like to remove 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/ignore-files/ for more about ignoring files. 2 | 3 | /images 4 | /temp 5 | /private 6 | /cli/private 7 | cli/scrape 8 | .cache 9 | 10 | # dependencies 11 | /node_modules 12 | 13 | # testing 14 | /coverage 15 | /input 16 | 17 | # misc 18 | .DS_Store 19 | .eslintcache 20 | 21 | # Environment variables 22 | .env 23 | .env.local 24 | .env.development.local 25 | .env.test.local 26 | .env.production.local 27 | 28 | # yarn/npm 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | yarn.lock 33 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | src/ 2 | cli/ 3 | .vscode/ 4 | import/ -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "useTabs": false, 3 | "tabWidth": 2, 4 | "semi": false, 5 | "trailingComma": "es5", 6 | "proseWrap": "preserve", 7 | "printWidth": 80, 8 | "arrowParens": "avoid", 9 | "singleQuote": true 10 | } 11 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | // For debugging node js files 9 | // This will run the current file in debug mode 10 | "type": "node", 11 | "request": "launch", 12 | "name": "Node ${file}", 13 | "skipFiles": ["/**"], 14 | "program": "${file}", 15 | "runtimeExecutable": "node", 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "canonicalurl", 4 | "deburr", 5 | "deburred", 6 | "formatversion", 7 | "iiprop", 8 | "pageimage", 9 | "stopwords" 10 | ], 11 | "markdown.extension.toc.levels": "2..3" 12 | } 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Enforcement 47 | 48 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 49 | reported by contacting the project team at peaveycodese@gmail.com. 50 | 51 | ## Attribution 52 | 53 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 54 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 55 | 56 | [homepage]: https://www.contributor-covenant.org 57 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | - [Content Guidelines](#content-guidelines) 4 | - [Issue Contributions](#issue-contributions) 5 | - [Pull Requests](#pull-requests) 6 | 7 | ## Content Guidelines 8 | 9 | - **All quotes should be good quality: accurate, interesting, and fun to read.** 10 | - Quotes should not contain any offensive language 11 | - Quotes should not contain language or messages that may be interpreted as racist, sexist, xenophobic, or otherwise offensive. 12 | - We do not include any quotes by historical figures who are associated with violence, oppression, racism etc (ie Hitler, Stalin, etc), regardless of the content of the quote. 13 | - Political quotes are fine, as long as they are **not partisan**. For example, quotes that promote human rights, democracy, social justice, equality, etc, are totally fine. However, quotes that refer to a specific political party or politician should not be included. 14 | 15 | ## Issue Contributions 16 | 17 | The easiest way to contribute is by opening an issue to propose changes. 18 | 19 | ### Suggesting new quotes 20 | 21 | You can open an issue to suggest new quotes. **You don't need to check if the quotes are already in the database.** Duplicates will be skipped automatically when we add the quotes. 22 | 23 | - Please proof reed quotes for spelling and grammar. 24 | - Please format the list of quotes as a JSON array. Each quote should have the following properties. 25 | 26 | ```ts 27 | { 28 | // The quotation text 29 | content: string, 30 | // The author's full name 31 | // ideally use the exact name on the person's wikipedia page 32 | author: string, 33 | // Array of tags 34 | tags: string[] 35 | } 36 | ``` 37 | **Example** 38 | ```json 39 | [ 40 | { 41 | "content": "A house divided against itself cannot stand.", 42 | "author": "Abraham Lincoln", 43 | "tags": ["history", "politics", "famous-quotes"] 44 | }, 45 | { 46 | "content": "Any sufficiently advanced technology is indistinguishable from magic", 47 | "author": "Arthur C. Clarke", 48 | "tags": ["technology", "literature", "famous-quotes"] 49 | } 50 | ] 51 | ``` 52 | ### Remove Inappropriate Quotes 53 | 54 | If you come across any quotes that violate the [content guidelines](#content-guidelines), please report them so they can be removed. 55 | 56 | Open a new issue. List the quotes you want to remove. Include the quote content or `_id`. 57 | ### Suggesting Changes 58 | 59 | You can open to an issue to suggest changes to existing quotes. For example, to fix spelling, grammar, or accuracy, etc. 60 | 61 | - Please include the ids of object that needs to be fixed, along with the proposed changes. 62 | 63 | ## Pull Requests 64 | 65 | If you would like to contribute directly, feel free to submit a pull request. Please read the following documentation before submitting a PR. 66 | ### Setup 67 | 68 | 1. Fork and clone this repository. 69 | 2. Install dependencies 70 | 71 | The repository includes the following CLI scripts for managing the data files. These require Node >= 18. Click on the link below to view documentation for each CLI script. 72 | 73 | - [`cli/addQuotes`](./cli/addQuotes/README.md) 74 | - [`cli/addAuthors`](./cli/addAuthors/README.md) 75 | - [`cli/addTags`](./cli/addTags/README.md) 76 | - [`cli/validate`](./cli/validate/README.md) 77 | - [`cli/build`](./cli/build/README.md) 78 | 79 | ### Adding new content 80 | 81 | **Do not add new content to the JSON files directly** 82 | 83 | The `addQuotes` script is the primary mechanism for adding new content (quotes, authors, and tags). It takes an array of quotes from an input file, checks it against the existing collection to filter out duplicates, then adds the new quotes to `quotes.json` collection. It also checks for any authors and tags that do not already exist, creates the necessary objects and adds them to their respective collections. It uses the wiki API to get `Author` details like `bio`, `description`, `link` etc. 84 | 85 | **Refer to [`cli/addQuotes`](./cli/addQuotes/README.md) for detailed instructions** 86 | 87 | 1. create a JSON file (`input/quotes.json`) containing an array of quotes to add. 88 | 2. Run the following command to add the quotes from the input file 89 | ```sh 90 | node cli/addQuotes -v 91 | ``` 92 | 3. Run data validation to check the data files for any errors. If any errors are found, this script will provide detailed output including the location of the errors and what needs to be fixed. 93 | ```sh 94 | node cli/validate -v 95 | ``` 96 | 4. Open a pull request to submit your changes. 97 | 98 | ### Editing Content 99 | 100 | To edit existing quotes for spelling, grammar, and accuracy, you can edit the JSON files directly. Below is the list of the files and properties that can be edited. **Other properties should not be changed manually**. 101 | 102 | - `data/quotes.json/` 103 | - `Quote.content` 104 | - `Quote.tags` 105 | - `Quote.author` *(see below) 106 | - `data/authors.json/` 107 | - `Author.bio` 108 | - `Author.description` 109 | 110 | \* If a quote is attributed to the wrong author, you _can_ manually change the `author` property. However, the new value **must be the exact name of an existing author** in the `data/authors.json` collection. If the correct author is not already in the database, you can remove existing quote, and then use the addQuotes script to add it with the correct author name (this will automatically create the Author object and add it to the authors collection). 111 | 112 | After making changes, you can run the following command to check the data files for validation errors. 113 | 114 | ```sh 115 | node cli/validate -v 116 | ``` 117 | When you finish making changes, submit a pull request 118 | 119 | ### Adding Tags to Existing Quotes 120 | 121 | The goal of the tags feature was to organize the quotes into various overlapping categories or topics -- such as "civil rights", "business", "success", "happiness", "technology", etc. 122 | 123 | To make this feature more useful, we need to add appropriate tags to all the existing quotes. Currently less than half the quotes have tags. 124 | 125 | The only way to add tags to the existing quotes is by manually editing the `quotes.json` file. Each quote has a `tags` property which is an array of tag names. You can go through and manually add new tags to the quotes as you read through them. 126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quotable Data 2 | 3 | [quotable](https://github.com/lukePeavey/quotable) is a free, open source API that provides access to a large collection of famous quotes. Both the API and the data — the collection of quotes, authors, and other info — are open source projects. 4 | 5 | This repository contains the data for the quotable API. All changes to the database are made in this repository and then synced to the MongoDB database. It was set up this way to allow community contributions to the data. 6 | 7 | ## Contributing 8 | 9 | If you are interesting in contributing to the Quotable database, please check out the [contributors guide](CONTRIBUTING.md). 10 | 11 | ## Project Structure 12 | 13 | - `/data` Contains the source data for each of the database collections. 14 | - `/translations` Contains translations 15 | - `/config` Configuration values 16 | - `/cli` CLI scripts for managing the data 17 | - `/lib` Helper functions that are used by the CLI scripts 18 | 19 | ## CLI Scripts 20 | 21 | - [`addQuotes`](./cli/addQuotes/README.md) 22 | - [`addAuthors`](./cli/addAuthors/README.md) 23 | - [`addTags`](./cli/addTags/README.md) 24 | - [`validate`](./cli/validate/README.md) 25 | - [`build`](./cli/build/README.md) 26 | - [`sync`](./cli/sync/README.md) 27 | -------------------------------------------------------------------------------- /cli/addAuthors/README.md: -------------------------------------------------------------------------------- 1 | # $ addAuthors 2 | 3 | Adds one or more authors to the `authors` collection. 4 | 5 | > You do not need to use this script when adding new quotes. The [`addQuotes`](../addQuotes/README.md) script will automatically create any new authors and tags from the list of quotes and add them to their respective collections. 6 | 7 | ## Arguments 8 | 9 | | name | default | description | 10 | | :-------------- | -------------------- | ----------------------------------------------------------------- | 11 | | `` | `input/authors.json` | The path to the input file (relative to project root) | 12 | | `--name, -n` | NA | List of author names to add. Can be used instead of an input file | 13 | | `--verbose, -v` | false | Output details about each author that was added | 14 | | `--dryRun, -d` | false | Will not save changes | 15 | | `--dataDir` | `data/source` | Only for testing purposes. Use default value | 16 | 17 | ## Usage 18 | 19 | There are two ways to add authors: 20 | 21 | **1. From an input file** 22 | 23 | The default location for the input file is `input/authors.json`. If you use the default file location, you can omit the `` argument. See [input file](#input-file) for more details. 24 | 25 | ```SHELL 26 | ❯ node cli/addAuthors 27 | ``` 28 | 29 | **2. With the `--name` argument** 30 | 31 | Instead of creating an input file, you specify a list of author names via the `--name` argument. 32 | 33 | ```SHELL 34 | ❯ node cli/addAuthors --name="pete seeger, bob dylan" 35 | ``` 36 | 37 | ## Input File 38 | 39 | The input file should be a `JSON` file containing an array of `AuthorInput` objects. Each object defines an author to add. The only _required_ property for each author is `name`. All other fields will be filled in automatically using the wiki API. However, any properties that are included in the input data will take priority over the values pulled from the wiki API. 40 | 41 | ~~This can be used as an escape hatch if you need to add an author who does not have a wikipedia page.~~ 42 | 43 | ```ts 44 | interface AuthorInput { 45 | name: string 46 | bio?: string 47 | description?: string 48 | link?: string 49 | } 50 | type inputFile = AuthorInput[] 51 | ``` 52 | 53 | ## How is works 54 | 55 | The scripts starts by matching each new author to a wikipedia page. If it does not find an exact match for a given author name, it will prompt the user to select the intended person from a list of suggestions. If the input name is different than than the name of the person's wikipedia page, it will use the name on the wikipedia page. 56 | 57 | Then, it will determine if the author already exists. This prevents duplicate authors with different name variations (ie "john kennedy" and "john F. kennedy"). 58 | 59 | In this example, we misspelled the author's last name. The script will prompt us to choose the correct person, then create the author using the exact name on the person's wikipedia page (if the author does not already) 60 | 61 | ![addAuthors screenshot one](https://user-images.githubusercontent.com/8286271/120909240-12789b80-c641-11eb-99e8-4bfd960cd009.jpg) 62 | ![addAuthors screenshot two](https://user-images.githubusercontent.com/8286271/120909245-14daf580-c641-11eb-93de-1d9f0462e5ca.jpg) 63 | -------------------------------------------------------------------------------- /cli/addAuthors/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as path from 'path' 4 | import minimist from 'minimist' 5 | import { trim, uniq, remove } from 'lodash-es' 6 | import shortid from 'shortid' 7 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 8 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 9 | import { log } from '../../lib/log.js' 10 | import { parseFile } from '../../lib/parseFile.js' 11 | import { run } from '../../lib/run.js' 12 | import { logResults } from './logResults.js' 13 | import { select } from '../../lib/selectInput.js' 14 | import { validateInput } from './validation.js' 15 | import { dataDir } from '../../config.js' 16 | import { wiki } from '../../lib/wiki/index.js' 17 | 18 | /** 19 | * Parses the CLI arguments 20 | */ 21 | function parseArgs() { 22 | const args = minimist(process.argv.slice(2)) 23 | const VERBOSE = args.v || args.verbose 24 | const DRY_RUN = args.d || args.dryRun 25 | const DATA_DIR = path.resolve(args.dataDir || dataDir.source) 26 | const INPUT_FILE = path.resolve(args._[0] || 'input/authors.json') 27 | const NAME = args.name || args.n 28 | return { DATA_DIR, INPUT_FILE, DRY_RUN, NAME, VERBOSE } 29 | } 30 | 31 | function parseNameInput(NAME) { 32 | if (!NAME) return null 33 | return NAME.split(',').map(name => ({ name: trim(name) })) 34 | } 35 | 36 | run(async () => { 37 | const { DATA_DIR, INPUT_FILE, VERBOSE, DRY_RUN, NAME } = parseArgs() 38 | // The current database collections 39 | const db = parseDataFiles(DATA_DIR) 40 | 41 | // Get the input data (from an input file or the `--name` argument) 42 | const inputData = uniq(parseNameInput(NAME) || parseFile(INPUT_FILE)) 43 | 44 | if (!validateInput(inputData)) { 45 | validateInput.errors.forEach(error => log.error(error)) 46 | throw new Error('Input data does not match schema') 47 | } 48 | 49 | // Map each author name found in the input data to an array of matching 50 | // wikipedia pages. 51 | // const newAuthors = [] 52 | const skipped = { noWikipediaPage: [], duplicate: [] } 53 | const newAuthors = await Promise.all( 54 | inputData.map(async (input, index) => { 55 | // Find the wikipedia entry for this author 56 | await wiki.rateLimit(index, inputData.length) 57 | const results = await wiki.findAuthorByName(input.name) 58 | let [authorWiki] = results 59 | // If there are multiple results that match the given author name, the 60 | // script will prompt the user to select the intended person from the 61 | // list of results. 62 | if (results.length > 1) { 63 | authorWiki = await select({ 64 | message: `Select the wikipedia page for author: ${input.name}`, 65 | options: results, 66 | }) 67 | } 68 | // If the author does not have a wikipedia page, they will not be added 69 | if (!authorWiki) { 70 | return input 71 | } 72 | // Create an `Author` object using data from the wiki API and user input 73 | return { 74 | _id: shortid(), 75 | name: authorWiki.name, 76 | bio: input.bio || authorWiki.bio, 77 | description: input.description || authorWiki.description, 78 | link: authorWiki.link, 79 | } 80 | }) 81 | ) 82 | 83 | // Filter out authors that did not have a wikipedia page 84 | skipped.noWikipediaPage = remove(newAuthors, author => !author._id) 85 | 86 | // Filter out authors that are already in the collection. 87 | skipped.duplicate = remove(newAuthors, author => { 88 | return db.authors.some(({ name, link }) => { 89 | return link === author.link || name === author.name 90 | }) 91 | }) 92 | 93 | if (newAuthors.length && !DRY_RUN) { 94 | // Update the collections then save files to disk 95 | const authors = [...db.authors, ...newAuthors] 96 | writeJSONFiles(DATA_DIR, { authors }) 97 | } 98 | // Output the results to the console 99 | logResults(newAuthors, skipped, VERBOSE, DRY_RUN) 100 | }) 101 | -------------------------------------------------------------------------------- /cli/addAuthors/logResults.js: -------------------------------------------------------------------------------- 1 | import pluralize from 'pluralize' 2 | import { lowerCase } from 'lodash-es' 3 | import { log } from '../../lib/log.js' 4 | import { entries } from '../../lib/object.js' 5 | import { logJSONTable } from '../../lib/logJSONTable.js' 6 | 7 | function logResults(authors, skipped, verbose, dryRun) { 8 | // Output info about authors that were skipped 9 | entries(skipped).forEach(([key, documents]) => { 10 | const count = documents.length 11 | const reason = lowerCase(key) 12 | if (count) { 13 | log.newLine() 14 | log.info(`Skipped ${count} ${pluralize('author', count)}: ${reason}`) 15 | if (verbose) logJSONTable(documents, { excludeKeys: ['_id'] }) 16 | } 17 | }) 18 | // Output info about the authors that were added 19 | if (authors.length) { 20 | const count = authors.length 21 | log.info(`Added ${count} new ${pluralize('author', authors)} `) 22 | if (verbose) logJSONTable(authors, { excludeKeys: '_id' }) 23 | } else { 24 | log.info(`No new authors to add`) 25 | } 26 | 27 | if (dryRun) { 28 | log.newLine() 29 | log(`This was a dry run, changes were not saved`) 30 | } 31 | } 32 | export { logResults } 33 | -------------------------------------------------------------------------------- /cli/addAuthors/validation.js: -------------------------------------------------------------------------------- 1 | import { omit } from 'lodash-es' 2 | import { ajv } from '../../lib/ajv.js' 3 | import { properties } from '../../schema/models/Author.js' 4 | 5 | // Schema for the input data 6 | const validateInput = ajv.compile({ 7 | title: 'addAuthors input', 8 | type: 'array', 9 | items: { 10 | type: 'object', 11 | properties: omit(properties, ['_id']), 12 | required: ['name'], 13 | additionalProperties: false, 14 | }, 15 | }) 16 | 17 | export { validateInput } 18 | -------------------------------------------------------------------------------- /cli/addQuotes/README.md: -------------------------------------------------------------------------------- 1 | # $ addQuotes 2 | 3 | This script is the primary mechanism for adding new content (quotes, authors and tags). It takes a list of quotes from an input file, filters out any duplicates, then adds the new quotes to the collection. It will also identify any authors and tags that do not already exist, create them and add them to their respective collections. 4 | 5 | ## Usage 6 | 7 | The default location for the input file is `input/quotes.json`. If you use the default location you can omit the `` argument. 8 | 9 | ```SHELL 10 | $ node cli/addQuotes 11 | ``` 12 | 13 | ### Arguments 14 | 15 | | name | default | description | 16 | | :-------------- | ------------- | ------------------------------------------------------- | 17 | | `` | `quotes.json` | Path to input file (relative to `/input/`) | 18 | | `--dryRun, -d` | `false` | If true, script will run without modifying files | 19 | | `--verbose, -v` | `false` | Script will output details about the data being added | 20 | | `--cleanup, -c` | `false` | If true, deletes input file after operation is complete | 21 | | `--dataDir` | `data/source` | Only for testing purposes. Use the default value | 22 | 23 | ### Input File 24 | 25 | The input file should be a `JSON` file containing an array of quotes that you want to add. Each item should be an object with the following properties. All other fields will be added automatically. 26 | 27 | ```ts 28 | interface QuoteInput { 29 | // The quote content 30 | content: string 31 | // The author's name (as it appears on their wikipedia page) 32 | author: string 33 | // A list of tag names 34 | tags?: string[] 35 | } 36 | type inputFile = QuoteInput[] 37 | ``` 38 | 39 | ## How it Works 40 | 41 | ### Checking for duplicates 42 | 43 | It starts by filtering out any duplicate quotes that already exist. When comparing quotes, it ignores punctuation, case, and stopwords to avoid multiple variations of the same quote. 44 | 45 | ### Creating new authors 46 | 47 | It starts by matching each author name in the input data to a wikipedia page. If it does not find an exact match for a given author name, the script will prompt the user to select the intended person from a list of suggestions. If the author name in the input data is different than the name on the person's wikipedia page, it will use the name on the wikipedia page. This avoids duplicate authors caused by different name variations ("John Kennedy" and "John F. Kennedy") or misspellings. 48 | 49 | The script will create the necessary `Author` objects, using data from the wiki API to get the values for `bio`, `description`, `link`, etc. 50 | 51 | TODO: this does not currently handle author profile images 52 | 53 | ### Creating new tags 54 | 55 | Any new tags will be created and added to the `tags` collection. Please try to avoid creating duplicate tags (ie "inspiration" and "inspirational"). 56 | 57 | ## Example 58 | 59 | ```json 60 | // input/quotes.json 61 | [ 62 | { 63 | "content": "If you're changing the world, you're working on important things. You're excited to get up in the morning.", 64 | "author": "Larry Page", 65 | "tags": ["famous-quotes", "inspirational"] 66 | } 67 | ] 68 | ``` 69 | 70 | ```shell 71 | $ node cli/addQuotes input/quotes.json -v 72 | ``` 73 | 74 | In this example, there are two people on wikipedia named "Larry Page", so the script will prompt us to choose the correct person. 75 | 76 | ![](https://user-images.githubusercontent.com/8286271/120941761-858e1a80-c6f2-11eb-8ad7-48d6e093e03c.jpg) 77 | 78 | ![](https://user-images.githubusercontent.com/8286271/120941762-8626b100-c6f2-11eb-86a8-a72a26cc3dc7.jpg) 79 | -------------------------------------------------------------------------------- /cli/addQuotes/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as path from 'path' 4 | 5 | import minimist from 'minimist' 6 | import shell from 'shelljs' 7 | import { uniq, uniqBy, kebabCase, remove, flatMap, isEmpty } from 'lodash-es' 8 | import shortid from 'shortid' 9 | import slugify from '@lukepeavey/slugify' 10 | import { createProgressBar } from '../../lib/progressBar.js' 11 | import { dataDir } from '../../config.js' 12 | import { entries, values } from '../../lib/object.js' 13 | import { findQuoteByContent } from '../../lib/findQuoteByContent.js' 14 | import { titleCase } from '../../lib/titleCase.js' 15 | import { log } from '../../lib/log.js' 16 | import { logResults } from './logResults.js' 17 | import { parseContent } from '../../lib/parseContent.js' 18 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 19 | import { parseFile } from '../../lib/parseFile.js' 20 | import { run } from '../../lib/run.js' 21 | import { select } from '../../lib/selectInput.js' 22 | import { validateInput } from './validation.js' 23 | import { wiki } from '../../lib/wiki/index.js' 24 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 25 | 26 | /** 27 | * Parses the CLI arguments 28 | */ 29 | function parseArgs() { 30 | const args = minimist(process.argv.slice(2)) 31 | const INPUT_FILE = path.resolve(args._[0] || 'input/quotes.json') 32 | const DATA_DIR = args.dataDir || dataDir.source 33 | const VERBOSE = args.v || args.verbose 34 | const DRY_RUN = args.d || args.dryRun 35 | const CLEANUP = !DRY_RUN && (args.c || args.cleanup) 36 | return { DATA_DIR, INPUT_FILE, DRY_RUN, VERBOSE, CLEANUP } 37 | } 38 | 39 | /** 40 | * Matches each author name found in the input data to a wikipedia page. 41 | * @param {Quote[]} input The array of quotes from the input file 42 | * @return {Object} An object that maps each author name found in the input 43 | * data to a wikipedia page. 44 | */ 45 | async function getAuthorWikis(input) { 46 | // The array of unique author names found in the input data 47 | const inputNames = uniq(input.map(({ author }) => author)) 48 | 49 | // An object that maps each author name in the input data to a wikipedia page. 50 | const authors = {} 51 | 52 | // Note: we have to throttle wiki API requests to avoid exceeding the rate 53 | // limit. When adding a large number of quotes, this step will take a few 54 | // minutes. We display a progress bar while to provide some feedback while 55 | // fetching data from wikipedia. 56 | const wikiProgressBar = createProgressBar({ title: 'Getting author info' }) 57 | wikiProgressBar.start(inputNames.length, 0) 58 | 59 | let index = 0 60 | // 1. Use the wiki API to search for each author by name. In some cases 61 | // this function will return multiple results (if it doesn't find an an exact 62 | // match, or there are multiple people with the same name). So initially we 63 | // map each author name to an array of matching wikipedia pages. 64 | for (const inputName of inputNames) { 65 | await wiki.rateLimit(index, inputNames.length) 66 | const results = await wiki.findAuthorByName(inputName) 67 | wikiProgressBar.update((index += 1)) 68 | authors[inputName] = results 69 | } 70 | wikiProgressBar.stop() 71 | 72 | // 2. Iterate through the authors map. If any of the authors have multiple 73 | // wikipedia results, we prompt the user to select the intended person from 74 | // the list of results. 75 | for (const [inputName, wikiResults] of entries(authors)) { 76 | // let `authorWiki` be the wikipedia page for this author 77 | let [authorWiki] = wikiResults 78 | // If there are multiple results, it means that A) we did not find an exact 79 | // match for this name, or B) there are multiple people on wikipedia with 80 | // this name. 81 | if (wikiResults.length > 1) { 82 | authorWiki = await select({ 83 | message: `Select wikipedia page for author ${inputName}`, 84 | options: wikiResults, 85 | }) 86 | } 87 | if (!isEmpty(authorWiki)) { 88 | authors[inputName] = authorWiki 89 | } else { 90 | log.warn(`Could not a matching wikipedia page for author: ${inputName}`) 91 | log.warn(`Quotes by this author will not be imported`) 92 | } 93 | } 94 | return authors 95 | } 96 | 97 | /** 98 | * Processes the raw input data (an array of quotes) and returns an object 99 | * containing the documents that will be added to each database collection. 100 | * 101 | * @param {{content: string, author: string, tags?: string[]}[]} rawInputData 102 | * @param {{quotes: any[], authors: any[]}} db The existing database collections 103 | */ 104 | async function processInputData(rawInputData, db) { 105 | // An object to store the documents that will be added to each collection. 106 | // @type { quotes: Quote[], authors: Author[], tags: Tag[] } 107 | const documents = {} 108 | // An object to store any quotes that are **not** added. 109 | // @type { duplicate: any[], invalidAuthor: any[] } 110 | const skipped = {} 111 | // The array of unique quotes from the input data 112 | const input = uniqBy(rawInputData, value => parseContent(value.content)) 113 | 114 | // ============================================================== 115 | // Create Authors 116 | // ============================================================== 117 | // 1. Map each author found in the input data to a wikipedia page. We use 118 | // data from the wiki API to create author new author objects. We also use 119 | const authorWikis = await getAuthorWikis(input, db) 120 | 121 | // 2. Create the array of authors that will be added to the collection. 122 | documents.authors = uniqBy(values(authorWikis), author => author.name) 123 | .filter(author => !isEmpty(author)) 124 | // Filter out authors that are already in the collection 125 | .filter(author => db.authors.every(({ link }) => link !== author.link)) 126 | // Create `Author` objects using data from the wiki API 127 | .map(authorWiki => ({ 128 | _id: shortid(), 129 | name: authorWiki.name, 130 | bio: authorWiki.bio, 131 | description: authorWiki.description, 132 | link: authorWiki.link, 133 | })) 134 | 135 | // ============================================================== 136 | // Create Quotes 137 | // ============================================================== 138 | // 1. Remove any quotes that are already in the collection 139 | skipped.duplicate = remove(input, ({ content }) => 140 | findQuoteByContent(content, db) 141 | ) 142 | 143 | skipped.invalidAuthor = remove(input, ({ author }) => { 144 | return isEmpty(authorWikis[author]) 145 | }) 146 | 147 | // 2. Remove any quotes by authors that do not have a wikipedia page 148 | skipped.invalidAuthor = remove(input, ({ author }) => !authorWikis[author]) 149 | // 3. Create the array of new quotes that will be added to the collection. 150 | documents.quotes = input.map(({ author, content, tags = [] }) => ({ 151 | _id: shortid(), 152 | author: authorWikis[author].name, 153 | content, 154 | tags, 155 | })) 156 | 157 | // ============================================================== 158 | // Create Tags 159 | // ============================================================== 160 | // 1. Get an array of all uniq tag names found in the input data 161 | const allTags = uniq(flatMap(input, quote => quote.tags)) 162 | 163 | // 2. Create an array of **new** tags that will be added to the `tags` 164 | // collection 165 | documents.tags = allTags 166 | .filter(tagName => !isEmpty(tagName)) 167 | // Filter out tags that already exist 168 | .filter(tagName => db.tags.every(({ name }) => name !== slugify(tagName))) 169 | // Create a `Tag` object. Currently this just has an id and name. 170 | .map(tagName => { 171 | const name = titleCase(tagName) 172 | const slug = kebabCase(tagName) 173 | return { _id: shortid(), name, slug } 174 | }) 175 | 176 | return { added: documents, skipped } 177 | } 178 | 179 | run(async () => { 180 | // 1. Parse and validate CLI arguments 181 | const { INPUT_FILE, DATA_DIR, DRY_RUN, VERBOSE, CLEANUP } = parseArgs() 182 | // Get the current database collections 183 | const db = parseDataFiles(DATA_DIR) 184 | 185 | // 2. Validate the input file... 186 | // Make sure the input file matches the required format 187 | const rawInputData = parseFile(INPUT_FILE) 188 | if (!validateInput(rawInputData)) { 189 | const { errors } = validateInput 190 | log.bgRed(' Invalid input data: ') 191 | errors.forEach(error => log.error(error)) 192 | process.exit(1) 193 | } 194 | // 3. Process the input data... 195 | // Returns an object containing new documents that were added to each 196 | // collection, and an object containing quotes that were not added. 197 | const { added, skipped } = await processInputData(rawInputData, db) 198 | 199 | // 4. Update the collections 200 | if (!DRY_RUN) { 201 | // a. Add the new documents to the existing collections 202 | const quotes = [...db.quotes, ...added.quotes] 203 | const authors = [...db.authors, ...added.authors] 204 | const tags = [...db.tags, ...added.tags] 205 | // b. Save the collections to disk. 206 | writeJSONFiles(DATA_DIR, { quotes, authors, tags }) 207 | } 208 | // 5. Cleanup 209 | if (CLEANUP) { 210 | const date = kebabCase(new Date().toISOString()) 211 | const CACHE_DIR = path.resolve(`.cache/imported/${date}/`) 212 | shell.mkdir('-p', CACHE_DIR) 213 | shell.mv(INPUT_FILE, CACHE_DIR) 214 | } 215 | // 6. Output results to console 216 | logResults(added, skipped, VERBOSE, DRY_RUN) 217 | }) 218 | -------------------------------------------------------------------------------- /cli/addQuotes/logResults.js: -------------------------------------------------------------------------------- 1 | import { lowerCase } from 'lodash-es' 2 | import pluralize from 'pluralize' 3 | import { log } from '../../lib/log.js' 4 | import { entries } from '../../lib/object.js' 5 | import { logJSONTable } from '../../lib/logJSONTable.js' 6 | 7 | function logResults(added, skipped, verbose, dryRun) { 8 | // Output info about the quotes that were skipped 9 | entries(skipped).forEach(([key, documents]) => { 10 | const count = documents.length 11 | const reason = lowerCase(key) 12 | if (count) { 13 | log.newLine() 14 | log.info(`Skipped ${count} ${pluralize('quote', count)}: ${reason}`) 15 | if (verbose) logJSONTable(documents, { excludeKeys: ['_id'] }) 16 | } 17 | }) 18 | 19 | // Log info about the objects that were added to each collection 20 | entries(added).forEach(([key, documents]) => { 21 | const count = documents.length 22 | const MODEL = key.replace(/s$/, '') 23 | log.newLine() 24 | if (count) { 25 | log.info(`Added ${count} new ${pluralize(MODEL, count)}`) 26 | if (verbose) logJSONTable(documents, { excludeKeys: ['_id'] }) 27 | } else { 28 | log.info(`No new ${pluralize(MODEL, count)} to add`) 29 | } 30 | }) 31 | 32 | if (dryRun) { 33 | log.warn(`This was a dry run, changes were not saved`) 34 | } 35 | } 36 | export { logResults } 37 | -------------------------------------------------------------------------------- /cli/addQuotes/validation.js: -------------------------------------------------------------------------------- 1 | import { omit } from 'lodash-es' 2 | import { ajv } from '../../lib/ajv.js' 3 | import { properties } from '../../schema/models/Quote.js' 4 | import { types } from '../../schema/types.js' 5 | 6 | // Schema for the input data 7 | const validateInput = ajv.compile({ 8 | title: 'addQuotes inputs', 9 | type: 'array', 10 | items: { 11 | type: 'object', 12 | properties: { 13 | ...omit(properties, ['_id']), 14 | tags: { type: 'array', items: types.nonEmptyString }, 15 | }, 16 | required: ['author', 'content'], 17 | additionalProperties: false, 18 | }, 19 | }) 20 | 21 | export { validateInput } 22 | -------------------------------------------------------------------------------- /cli/addTags/README.md: -------------------------------------------------------------------------------- 1 | # $ addTags 2 | -------------------------------------------------------------------------------- /cli/addTags/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as path from 'path' 4 | import minimist from 'minimist' 5 | import shortid from 'shortid' 6 | import { uniq, trim } from 'lodash-es' 7 | import slugify from '@lukepeavey/slugify' 8 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 9 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 10 | import { log } from '../../lib/log.js' 11 | import { parseFile } from '../../lib/parseFile.js' 12 | import { run } from '../../lib/run.js' 13 | import { validateInput } from './validation.js' 14 | import { dataDir } from '../../config.js' 15 | import { logResults } from './logResults.js' 16 | 17 | /** 18 | * Parses the CLI arguments 19 | */ 20 | function parseArgs() { 21 | const args = minimist(process.argv.slice(2)) 22 | const VERBOSE = args.v || args.verbose 23 | const DRY_RUN = args.d || args.dryRun 24 | const DATA_DIR = path.resolve(args.dataDir || dataDir.source) 25 | const INPUT_FILE = path.resolve('input', args._[0] || 'tags.json') 26 | const NAME = args.name || args.n 27 | return { DATA_DIR, INPUT_FILE, DRY_RUN, NAME, VERBOSE } 28 | } 29 | 30 | /** 31 | * Parses the value of the `--name` argument 32 | * 33 | * @param {string} name a comma separated list of tag names 34 | * @return {{ name: string }[]} 35 | */ 36 | function parseNameInput(NAME) { 37 | if (!NAME) return null 38 | return NAME.split(',').map(name => ({ name: trim(name) })) 39 | } 40 | 41 | run(async () => { 42 | const { DATA_DIR, INPUT_FILE, VERBOSE, DRY_RUN, NAME } = parseArgs() 43 | // The current database collections 44 | const db = parseDataFiles(DATA_DIR) 45 | 46 | // Get the input data (from the `--name` argument or an input file) 47 | const inputData = parseNameInput(NAME) || parseFile(INPUT_FILE) 48 | 49 | // Validate input data 50 | if (!validateInput(inputData)) { 51 | validateInput.errors.forEach(error => log.error(error)) 52 | throw new Error('Input data does not match schema') 53 | } 54 | 55 | // Create an array of new tag names that will be created 56 | const tagNames = inputData.filter(({ name }) => { 57 | return !db.tags.find(tag => slugify(name) === slugify(tag.name)) 58 | }) 59 | 60 | // Map the new tag names to an array of `Tag` objects 61 | const tags = uniq(tagNames).map(({ name }) => ({ 62 | _id: shortid(), 63 | name: slugify(name), 64 | })) 65 | 66 | if (tags.length && !DRY_RUN) { 67 | // Add the new tags to the existing collection, then write files to disk 68 | writeJSONFiles(DATA_DIR, { tags: [...db.tags, ...tags] }) 69 | } 70 | // Output the results to the console 71 | logResults(inputData, tags, VERBOSE, DRY_RUN) 72 | }) 73 | -------------------------------------------------------------------------------- /cli/addTags/logResults.js: -------------------------------------------------------------------------------- 1 | import pluralize from 'pluralize' 2 | import { log } from '../../lib/log.js' 3 | import { logJSONTable } from '../../lib/logJSONTable.js' 4 | 5 | function logResults(inputData, newTags, verbose, dryRun) { 6 | const skipped = inputData.length - newTags.length 7 | const tags = newTags.length 8 | if (skipped) { 9 | log.newLine() 10 | log.info(`Skipped ${skipped} duplicate ${pluralize('tag', skipped)}`) 11 | } 12 | log.newLine() 13 | if (tags) { 14 | log.info(`Added ${tags} new ${pluralize('tag', tags)} `) 15 | if (verbose) logJSONTable(newTags, { excludeKeys: '_id' }) 16 | } else { 17 | log.info(`No new tags to add`) 18 | } 19 | 20 | if (dryRun) { 21 | log.newLine() 22 | log(`This was a dry run, changes were not saved`) 23 | } 24 | } 25 | export { logResults } 26 | -------------------------------------------------------------------------------- /cli/addTags/validation.js: -------------------------------------------------------------------------------- 1 | import { ajv } from '../../lib/ajv.js' 2 | 3 | // Schema for the input data 4 | const validateInput = ajv.compile({ 5 | title: 'addTags input', 6 | type: 'array', 7 | items: { 8 | type: 'object', 9 | properties: { name: { type: 'string' } }, 10 | required: ['name'], 11 | additionalProperties: false, 12 | }, 13 | }) 14 | 15 | export { validateInput } 16 | -------------------------------------------------------------------------------- /cli/build/README.md: -------------------------------------------------------------------------------- 1 | # $ build 2 | 3 | ```shell 4 | ❯ node cli/build 5 | ``` 6 | 7 | The `build` script creates the generated data files from the source data. It adds computed properties, (such as `quoteCount`, `length`, etc) as well as timestamp (`dateAdded` and `dateModified`). The generated data files are then synced with the MongoDB database. 8 | 9 | **⚠️ Keep in Mind** 10 | 11 | The data files generated by the build command are not included in this repository. The default directory for the generated data files is outside the root of this repository. 12 | 13 | It it is not necessary to run the build command when submitting changes to the upstream repository. 14 | 15 | ## Usage 16 | 17 | ``` 18 | npm run build 19 | ``` -------------------------------------------------------------------------------- /cli/build/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import minimist from 'minimist' 4 | import shortid from 'shortid' 5 | import * as path from 'path' 6 | import shell from 'shelljs' 7 | import moment from 'moment' 8 | import { dataDir, rootDir } from '../../config.js' 9 | import { isEqual } from '../../lib/isEqual.js' 10 | import { log } from '../../lib/log.js' 11 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 12 | import { run } from '../../lib/run.js' 13 | import { transforms } from './transforms.js' 14 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 15 | import { parseFile } from '../../lib/parseFile.js' 16 | 17 | const DATE = moment().format('YYYY-MM-DD') 18 | const { entries } = Object 19 | 20 | const pkg = await parseFile(path.join(rootDir, 'package.json')) 21 | 22 | /** Parses and validates CLI arguments */ 23 | function parseArgs() { 24 | const args = minimist(process.argv.slice(2)) 25 | const SRC = path.resolve(args.src || args.s || dataDir.source) 26 | const DEST = path.resolve(args[0] || args.d || args.dest || dataDir.generated) 27 | return { SRC, DEST } 28 | } 29 | 30 | function getTimeStamps(db, COLLECTION, object) { 31 | // See if this object exists in the current data 32 | const existingObject = db[COLLECTION].find(({ _id }) => _id === object._id) 33 | // Will be true if the object already exists and has not been modified 34 | const hasNotChanged = existingObject && isEqual(object, existingObject) 35 | const dateAdded = existingObject ? existingObject.dateAdded : DATE 36 | const dateModified = hasNotChanged ? existingObject.dateModified : DATE 37 | return { dateAdded, dateModified } 38 | } 39 | 40 | run(async () => { 41 | // Run validation script on source data to make sure it matches schema before 42 | // updating generated data files. 43 | log.info('Validating source data') 44 | const validate = `node ${path.resolve('cli/validate')} source` 45 | const validationResult = shell.exec(validate, { silent: true }) 46 | if (validationResult.code) { 47 | log.newLine() 48 | log.error('Data validation failed') 49 | log('For more info, run `node cli/validate source`') 50 | process.exit(1) 51 | } 52 | // If validation passed, build new generated data files from source data 53 | log.newLine() 54 | log.info('Building data files') 55 | const { SRC, DEST } = parseArgs() 56 | // The source data. 57 | // @type { quotes: Quote[], authors: Author[], tags: Tag[] } 58 | const src = parseDataFiles(SRC) 59 | // The current version of generated data files. 60 | // @type { quotes: Quote[], authors: Author[], tags: Tag[] } 61 | const db = parseDataFiles(DEST) 62 | 63 | // Apply transforms to the source data to create the generated data files 64 | // This will add computed properties that are not included in the source 65 | // data files. 66 | // @type { quotes: Quote[], authors: Author[], tags: Tag[] } 67 | let data = transforms.reduce((result, transform) => transform(result), src) 68 | 69 | // Add timestamps to each object (dateAdded and dateModified) 70 | data = entries(data).reduce((result, [COLLECTION, objects]) => { 71 | const objectsWithTimeStamp = objects.map(object => { 72 | return { ...object, ...getTimeStamps(db, COLLECTION, object) } 73 | }) 74 | return { ...result, [COLLECTION]: objectsWithTimeStamp } 75 | }, {}) 76 | 77 | const info = [ 78 | { 79 | _id: db.info && db.info[0] ? db.info[0]._id : shortid(), 80 | databaseVersion: pkg.version, 81 | count: { 82 | quotes: data.quotes.length, 83 | authors: data.authors.length, 84 | tags: data.tags.length, 85 | }, 86 | }, 87 | ] 88 | // Save the updated JSON to files in the `dist` directory. 89 | writeJSONFiles(DEST, { ...data, info }) 90 | }) 91 | -------------------------------------------------------------------------------- /cli/build/transforms.js: -------------------------------------------------------------------------------- 1 | import { identity } from 'lodash-es' 2 | import slugify from '@lukepeavey/slugify' 3 | import { findQuotesByTag } from '../../lib/findQuotesByTag.js' 4 | import { log } from '../../lib/log.js' 5 | 6 | /** 7 | * Adds computed fields to author 8 | * - quoteCount 9 | * - slug 10 | */ 11 | function addAuthorFields({ authors, ...collections }) { 12 | const { quotes } = collections 13 | return { 14 | ...collections, 15 | authors: authors.map(author => ({ 16 | ...author, 17 | quoteCount: quotes.filter(quote => quote.author === author.name).length, 18 | slug: slugify(author.name), 19 | })), 20 | } 21 | } 22 | 23 | /** 24 | * Adds computed fields to quotes: 25 | * - length 26 | * - authorSlug 27 | * - authorId (soon to be removed) 28 | */ 29 | function addQuoteFields({ quotes, ...collections }) { 30 | const { authors } = collections 31 | return { 32 | ...collections, 33 | quotes: quotes 34 | .map(quote => { 35 | const author = authors.find(({ name }) => name === quote.author) 36 | if (!author) { 37 | log.warn(`[warn] Invalid quote: ${quote._id}`) 38 | log.dim(`Author "${quote.author}" does not exist`) 39 | } else { 40 | return { 41 | ...quote, 42 | authorId: author._id, 43 | authorSlug: author.slug, 44 | length: quote.content.length, 45 | } 46 | } 47 | }) 48 | .filter(identity), 49 | } 50 | } 51 | 52 | function addTagFields({ tags, ...collections }) { 53 | return { 54 | ...collections, 55 | tags: tags.map(tag => { 56 | const quotesWithTag = findQuotesByTag(tag.name, collections) 57 | return { 58 | ...tag, 59 | slug: slugify(tag.name), 60 | quoteCount: quotesWithTag.length, 61 | } 62 | }), 63 | } 64 | } 65 | 66 | export const transforms = [addAuthorFields, addQuoteFields, addTagFields] 67 | -------------------------------------------------------------------------------- /cli/checkForDuplicates/README.md: -------------------------------------------------------------------------------- 1 | # cli/findDuplicates 2 | 3 | Checks the `quotes` collection for possible duplicates quotes. It does not modify the data files, it just outputs a list of possible duplicates to the console so they can be manually checked and removed. 4 | 5 | ### Usage 6 | ```shell 7 | ❯ node cli/findDuplicates 8 | ``` 9 | 10 | ### Output 11 | ```shell 12 | Found 4 possible duplicates. 13 | 14 | ┌──────────────┬───────────────────────────────────────────────────────────────────────────────────────┐ 15 | │ B9ssb1gZ0LnN │ Whoever is happy will make others happy too. │ 16 | ├──────────────┼───────────────────────────────────────────────────────────────────────────────────────┤ 17 | │ _ZVJWv9HJsBe │ Whoever is happy will make others happy, too. │ 18 | └──────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ 19 | ┌──────────────┬───────────────────────────────────────────────────────────────────────────────────────┐ 20 | │ FI5mThj-syB4 │ We are what we repeatedly do. Excellence, then, is not an act, but a habit. │ 21 | ├──────────────┼───────────────────────────────────────────────────────────────────────────────────────┤ 22 | │ zjULM0EKmwKH │ We are what we repeatedly do. Excellence, then, is not an act but a habit. │ 23 | └──────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ 24 | ┌──────────────┬───────────────────────────────────────────────────────────────────────────────────────┐ 25 | │ wNdaJpR3m0lK │ Action may not always bring happiness; but there is no happiness without action. │ 26 | ├──────────────┼───────────────────────────────────────────────────────────────────────────────────────┤ 27 | │ MUARorcdLeDg │ Action may not always bring happiness, but there is no happiness without action. │ 28 | └──────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ 29 | ┌──────────────┬───────────────────────────────────────────────────────────────────────────────────────┐ 30 | │ NZ2RbZuB0-bw │ The greatest good you can do for another is not just to share your riches but to │ 31 | │ │ reveal to him his own. │ 32 | ├──────────────┼───────────────────────────────────────────────────────────────────────────────────────┤ 33 | │ 0PnL1GPc2muX │ The greatest good you can do for another is not just share your riches, but │ 34 | │ │ reveal to them their own. │ 35 | └──────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ 36 | ``` -------------------------------------------------------------------------------- /cli/checkForDuplicates/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { createProgressBar } from '../../lib/progressBar.js' 4 | import { parseContent } from '../../lib/parseContent.js' 5 | import { logResults } from './logResults.js' 6 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 7 | import { run } from '../../lib/run.js' 8 | import { dataDir } from '../../config.js' 9 | 10 | function isContentEqual(a, b) { 11 | return parseContent(a) === parseContent(b) 12 | } 13 | 14 | run(() => { 15 | const db = parseDataFiles(dataDir.source) 16 | const cache = new WeakSet() 17 | const progressBar = createProgressBar({ title: 'Checking for Duplicates' }) 18 | 19 | // Start the progress bar 20 | progressBar.start(db.quotes.length, 0) 21 | 22 | // Create an array of sets of duplicate quotes 23 | const duplicateGroups = db.quotes.reduce((result, quote, idx) => { 24 | progressBar.update(idx, {}) 25 | if (cache.has(quote)) { 26 | return result 27 | } 28 | 29 | const duplicates = db.quotes.filter(compareQuote => { 30 | if (quote._id !== compareQuote._id) { 31 | if (isContentEqual(quote.content, compareQuote.content)) { 32 | cache.add(compareQuote) 33 | progressBar.update(idx, { duplicateCount: result.length }) 34 | return true 35 | } 36 | } 37 | }) 38 | 39 | if (duplicates.length) { 40 | return [...result, [quote, ...duplicates]] 41 | } 42 | return result 43 | }, []) 44 | // Stop the progress bar 45 | progressBar.stop() 46 | logResults(duplicateGroups) 47 | }) 48 | -------------------------------------------------------------------------------- /cli/checkForDuplicates/logResults.js: -------------------------------------------------------------------------------- 1 | import wrap from 'word-wrap' 2 | import chalk from 'chalk' 3 | import { log } from '../../lib/log.js' 4 | import { Table } from '../../lib/Table.js' 5 | 6 | function logResults(duplicateGroups) { 7 | const WIDTH = Math.min(process.stdout.columns, 105) 8 | const ID_COL_WIDTH = 14 9 | const PADDING = 3 10 | const CONTENT_COL_WIDTH = WIDTH - ID_COL_WIDTH - PADDING * 2 11 | const TEXT_WIDTH = CONTENT_COL_WIDTH - PADDING * 2 12 | const count = duplicateGroups.length 13 | log.newLine() 14 | log[count ? 'warn' : 'success'](`Found ${count} possible duplicates`) 15 | log.newLine() 16 | 17 | duplicateGroups.map(quotes => { 18 | const table = Table({ 19 | colWidths: [ID_COL_WIDTH, CONTENT_COL_WIDTH], 20 | styles: { paddingLeft: PADDING, paddingRight: PADDING }, 21 | items: quotes.map(quote => { 22 | const id = chalk.dim(quote._id) 23 | const content = wrap(quote.content, { width: TEXT_WIDTH, indent: '' }) 24 | return { [id]: content } 25 | }), 26 | }) 27 | console.log(table.toString()) 28 | }) 29 | } 30 | 31 | export { logResults } 32 | -------------------------------------------------------------------------------- /cli/sample/README.md: -------------------------------------------------------------------------------- 1 | # $sampleData 2 | 3 | Generates a random sample of the data with the specified number of quotes. The sample will include the specified number of quotes, as well the authors of the those quotes. 4 | 5 | | Name | default | description | 6 | | ------------- | ---------------- | ------------------------------------------------------------ | 7 | | `--dest` `-d` | NA | `required` Directory where sample data files will be created | 8 | | `--count` `-c` | 500 | the number of quotes to include in sample data | 9 | | `--src` `-s` | `data/generated` | Directory containing complete data files | 10 | 11 | ```shell 12 | $ sample -s data/generated -d ../api/data/sample 13 | ``` 14 | -------------------------------------------------------------------------------- /cli/sample/index.js: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | import chalk from 'chalk' 3 | import minimist from 'minimist' 4 | import * as path from 'path' 5 | import shell from 'shelljs' 6 | import { faker } from '@faker-js/faker' 7 | import { run } from '../../lib/run.js' 8 | import { findAuthorByName } from '../../lib/findAuthorByName.js' 9 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 10 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 11 | import { Table } from '../../lib/Table.js' 12 | 13 | const { random } = faker 14 | 15 | /** Outputs a table showing the results */ 16 | function createResultsTable(sample) { 17 | return Table({ 18 | items: Object.entries(sample).map(([COLLECTION, documents]) => { 19 | return [`${COLLECTION}.json`, `${documents.length} documents`] 20 | }), 21 | }) 22 | } 23 | 24 | /** Parses the CLI arguments */ 25 | function parseArgs() { 26 | const args = minimist(process.argv.slice(2)) 27 | if (!(args.dest || args.d)) { 28 | throw new Error('Missing required argument --dest') 29 | } 30 | const SRC = path.resolve(args.src || args.s || 'data') 31 | const DEST = path.resolve(args.dest || args.d) 32 | const COUNT = args.count || args.c || 500 33 | return { SRC, DEST, COUNT } 34 | } 35 | 36 | run(async () => { 37 | const { SRC, DEST, COUNT } = parseArgs() 38 | // The data files 39 | const db = parseDataFiles(SRC) 40 | 41 | // The sample data 42 | const sample = { quotes: [], authors: [], tags: db.tags } 43 | // Keep track of the quote _ids that have been added to sample 44 | const ids = [] 45 | 46 | // Select the specified number (`COUNT`) of random quotes 47 | while (sample.quotes.length < COUNT) { 48 | let quote = random.arrayElement(db.quotes) 49 | while (ids.includes(quote._id)) { 50 | // If the quote was already added, pick another one 51 | quote = random.arrayElement(db.quotes) 52 | } 53 | ids.push(quote._id) 54 | sample.quotes.push(quote) 55 | } 56 | 57 | // Get the names of all authors in the sample data 58 | const authorNames = sample.quotes.reduce((acc, quote) => { 59 | return acc.includes(quote.author) ? acc : [...acc, quote.author] 60 | }, []) 61 | 62 | // Map author names to complete `Author` objects 63 | sample.authors = authorNames.map(name => { 64 | const author = findAuthorByName(name, db) 65 | const quoteCount = sample.quotes.filter(q => q.author === name).length 66 | return { 67 | ...author, 68 | quoteCount, 69 | } 70 | }) 71 | 72 | shell.rm('-rf', DEST) 73 | fs.mkdirSync(DEST, { recursive: true }) 74 | writeJSONFiles(DEST, sample) 75 | console.log(chalk.greenBright('\n ✨ Saved new sample data to...')) 76 | console.log(chalk.yellow(`==> ${DEST}\n`)) 77 | console.log(createResultsTable(sample).toString()) 78 | }) 79 | -------------------------------------------------------------------------------- /cli/sync/README.md: -------------------------------------------------------------------------------- 1 | 2 | # $cli/sync 3 | 4 | Syncs the data from the JSON files in this repository to a MongoDB database. 5 | 6 | **This performs a one-way sync from JSON files --> MongoDB database.** 7 | 8 | Note: this uses the generated data files, not the source data. The generated data is files are created by running the build command. This adds various computed properties to the data that are not included in the source data. The generated data is stored outside the root of this repository. You can change the location where generated data files are stored via the `config.dataDir.generated`. 9 | 10 | Before running this command, make sure you run the build command to create / update the generated data files. 11 | 12 | 13 | 14 | ## Usage 15 | 16 | ```sh 17 | $ node cli/syncData [] [..options] 18 | ``` 19 | 20 | ## Options 21 | | Argument | Description | 22 | |:-----|:-----| 23 | | --help, -h | Show command documentation | 24 | | `` | The directory containing the generated data files that will synced to the database. Default = `config.dataDir.generated` | 25 | | --overwrite, -o | If this flag is included, the command removes all existing documents from the MongoDB collections, and then populated them with the data from the JSON files. Otherwise, the command will only add, update, and remove the documents that have been modified since the last sync | 26 | | --verbose, -v | If this flag is included, the command will show more detailed output about the operation. This includes the full list of objects that will be added, updated, and removed for each |collection. | 27 | -------------------------------------------------------------------------------- /cli/sync/db.js: -------------------------------------------------------------------------------- 1 | import 'dotenv/config.js' 2 | import { MongoClient } from 'mongodb' 3 | // Connection URI 4 | const uri = process.env.MONGODB_URI 5 | // Create a new MongoClient 6 | 7 | /** @type {MongoClient} */ 8 | export const client = new MongoClient(uri) 9 | 10 | export async function testConnection() { 11 | try { 12 | console.log('Testing database connection') 13 | // Connect the client to the server 14 | await client.connect() 15 | // Establish and verify connection 16 | await client.db('admin').command({ ping: 1 }) 17 | console.log('Test successful') 18 | } finally { 19 | // Ensures that the client will close when you finish/error 20 | await client.close() 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /cli/sync/help.js: -------------------------------------------------------------------------------- 1 | // import wrap from 'word-wrap' 2 | 3 | import { log } from '../../lib/log.js' 4 | import { optionsTable } from '../../lib/optionsTable.js' 5 | 6 | export function help() { 7 | // Configuration for text wrapping 8 | // const MAX_WIDTH = 100 9 | // const WIDTH = Math.min(MAX_WIDTH, process.stdout.columns) - 5 10 | // const wrapOptions = { width: WIDTH, trim: false, indent: ' ' } 11 | log.newLine() 12 | 13 | log.bold(`Description:`) 14 | log.newLine() 15 | log( 16 | `Sync the data from the JSON files in this repository to a MongoDB database. The performs a one-way sync from JSON files to database` 17 | ) 18 | 19 | log.newLine() 20 | log.newLine() 21 | log.newLine() 22 | 23 | log.bold('Usage:') 24 | log.newLine() 25 | log(`$ node cli/syncData [] [..options]`) 26 | 27 | log.newLine() 28 | log.newLine() 29 | log.newLine() 30 | 31 | log.bold('Options:') 32 | 33 | const options = [ 34 | `[--help | -h] : Show command documentation`, 35 | 36 | `[--overwrite | -o] : If this flag is included, the command removes all existing documents from the MongoDB collections, and then populated them with the data from the JSON files. Otherwise, the command will only add, update, and remove the documents that have been modified since the last sync`, 37 | 38 | `[--verbose | -v] : If this flag is included, the command will show more detailed output about the operation. This includes the full list of objects that will be added, updated, and removed for each collection.`, 39 | ] 40 | console.log(optionsTable(options).toString()) 41 | } 42 | -------------------------------------------------------------------------------- /cli/sync/index.js: -------------------------------------------------------------------------------- 1 | import { isEqual, isEmpty } from 'lodash-es' 2 | import minimist from 'minimist' 3 | import { createProgressBar } from '../../lib/progressBar.js' 4 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 5 | import { dataDir } from '../../config.js' 6 | import { client, testConnection } from './db.js' 7 | import { log } from '../../lib/log.js' 8 | import { help } from './help.js' 9 | import { delay } from '../../lib/delay.js' 10 | 11 | const args = minimist(process.argv.slice(2)) 12 | const VERBOSE = args.v || args.verbose 13 | const HELP = args.h || args.help 14 | 15 | function toObjectMap(arr) { 16 | return arr.reduce((obj, item) => ({ ...obj, [item._id]: item }), {}) 17 | } 18 | 19 | if (HELP) { 20 | help() 21 | process.exit(0) 22 | } 23 | 24 | // An object to store the documents that will be inserted, updated, deleted 25 | // in each collection 26 | const allChanges = {} 27 | 28 | // An object to track the number of documents that were successfully inserted, 29 | // updated, and removed from each collection. 30 | const allResults = {} 31 | 32 | try { 33 | await testConnection() 34 | log.newLine() 35 | 36 | const dataFiles = parseDataFiles(dataDir.generated) 37 | 38 | // Connect to the database 39 | await client.connect() 40 | const progressBar = createProgressBar() 41 | 42 | // Iterate through the collection names in the source data (JSON) 43 | for (const COLLECTION_NAME of Object.keys(dataFiles)) { 44 | // Start a progress bar. Initially set total to 5 45 | progressBar.start(6, 0, { 46 | title: `Updating ${COLLECTION_NAME.padEnd(15, ' ')}`, 47 | }) 48 | // Number documents that were successfully modified for the collection 49 | const results = { added: 0, updated: 0, removed: 0 } 50 | allResults[COLLECTION_NAME] = results 51 | 52 | // documents that will be inserted, updated, removed 53 | const changes = { added: [], updated: [], removed: [] } 54 | allChanges[COLLECTION_NAME] = changes 55 | 56 | // The source data for the current collection (JSON files) 57 | // The data is mapped to an object where the keys are the object _ids 58 | const sourceDocuments = toObjectMap(dataFiles[COLLECTION_NAME]) 59 | 60 | // The mongoDB collection object 61 | const collection = client.db().collection(COLLECTION_NAME) 62 | // The live data for the current collection (MongoDB) 63 | // The data is mapped to an object where the keys are the object _ids 64 | const liveDocuments = toObjectMap(await collection.find({}).toArray()) 65 | 66 | // Increment the progress bar 67 | await delay(50) 68 | progressBar.increment() 69 | await delay(50) 70 | 71 | // Find objects in this collection that need to be added or updated 72 | Object.entries(sourceDocuments).forEach(([id, doc]) => { 73 | const existingDocument = liveDocuments[id] 74 | if (existingDocument) { 75 | // If there is an existing document with the same id... 76 | // See if the objects are equal, if not add it to the list of modified items 77 | if (!isEqual(existingDocument, doc)) { 78 | changes.updated = [...changes.updated, doc] 79 | } 80 | } else { 81 | // if there is no object with the same id, this is a new object that needs 82 | // to be added to the database. 83 | changes.added = [...changes.added, doc] 84 | } 85 | }) 86 | 87 | // Increment the progress bar 88 | await delay(50) 89 | progressBar.increment() 90 | await delay(50) 91 | 92 | // Find any objects in the MongoDb collection that have been removed from 93 | // the source data. These will be removed from the database. 94 | Object.entries(liveDocuments).forEach(([id, doc]) => { 95 | if (!sourceDocuments[id]) { 96 | changes.removed = [...changes.removed, doc] 97 | } 98 | }) 99 | 100 | // Increment the progress bar 101 | await delay(50) 102 | progressBar.increment() 103 | await delay(50) 104 | 105 | if (VERBOSE) { 106 | log.header(`Add ${COLLECTION_NAME}`) 107 | log(changes.added) 108 | log.header(`Update ${COLLECTION_NAME}`) 109 | log(changes.updated) 110 | log.header(`Delete ${COLLECTION_NAME}`) 111 | log(changes.removed) 112 | } 113 | 114 | if (!isEmpty(changes.added)) { 115 | // Add new objects to the MongoDB collection 116 | const insertResult = await collection.insertMany(changes.added, { 117 | ordered: false, 118 | }) 119 | if (insertResult.acknowledged) { 120 | results.added += insertResult.insertedCount 121 | } 122 | } 123 | 124 | // Increment the progress bar 125 | await delay(50) 126 | progressBar.increment() 127 | await delay(50) 128 | 129 | // If there are documents that need to be updated, change the progress bar 130 | // total to reflect the number of documents to update. We increment the 131 | // progress bar after document is updated. Otherwise the progress bar will 132 | // appear frozen while this step is in progress. 133 | progressBar.setTotal(5 + (changes.updated.length || 0)) 134 | 135 | if (!isEmpty(changes.updated)) { 136 | // Update the objects that have been modified since last sync 137 | for (const { _id, ...doc } of changes.updated) { 138 | const updateResult = await collection.findOneAndUpdate( 139 | { _id }, 140 | { $set: doc } 141 | ) 142 | if (updateResult.ok) { 143 | results.updated += 1 144 | progressBar.increment() 145 | } 146 | } 147 | } 148 | 149 | // Increment the progress bar 150 | await delay(50) 151 | progressBar.increment() 152 | await delay(50) 153 | 154 | if (!isEmpty(changes.removed)) { 155 | // Remove the items that have been deleted from the source data 156 | const deleteResults = await collection.deleteMany({ 157 | _id: { $in: changes.removed.map(({ _id }) => _id) }, 158 | }) 159 | if (deleteResults.acknowledged) { 160 | results.removed += deleteResults.deletedCount 161 | } 162 | } 163 | await delay(500) 164 | progressBar.update(6 + (changes.updated.length || 0)) 165 | await delay(500) 166 | } 167 | 168 | Object.entries(allResults).forEach(([COLLECTION_NAME, results]) => { 169 | const changes = allChanges[COLLECTION_NAME] 170 | log.header(COLLECTION_NAME) 171 | log.info(`Added: ${results.added} of ${changes.added.length}`) 172 | log.info(`Updated: ${results.updated} of ${changes.updated.length}`) 173 | log.info(`Deleted: ${results.removed} of ${changes.removed.length}`) 174 | }) 175 | 176 | process.exit(0) 177 | } catch (error) { 178 | log.error(error) 179 | process.exit(1) 180 | } 181 | -------------------------------------------------------------------------------- /cli/updateTags/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as path from 'path' 4 | import minimist from 'minimist' 5 | import { uniq, kebabCase } from 'lodash-es' 6 | import shortid from 'shortid' 7 | import slugify from '@lukepeavey/slugify' 8 | import { writeJSONFiles } from '../../lib/writeJSONFiles.js' 9 | import { parseDataFiles } from '../../lib/parseDataFiles.js' 10 | import { run } from '../../lib/run.js' 11 | import { dataDir } from '../../config.js' 12 | import { logResults } from './logResults.js' 13 | 14 | /** 15 | * Parses the CLI arguments 16 | */ 17 | function parseArgs() { 18 | const args = minimist(process.argv.slice(2)) 19 | const VERBOSE = args.v || args.verbose 20 | const DRY_RUN = args.d || args.dryRun 21 | const DATA_DIR = path.resolve(args.dataDir || dataDir.source) 22 | const INPUT_FILE = path.resolve('input', args._[0] || 'tags.json') 23 | const NAME = args.name || args.n 24 | return { DATA_DIR, INPUT_FILE, DRY_RUN, NAME, VERBOSE } 25 | } 26 | 27 | run(async () => { 28 | const { DATA_DIR, VERBOSE, DRY_RUN } = parseArgs() 29 | // The current database collections 30 | const db = parseDataFiles(DATA_DIR) 31 | 32 | const allTags = db.quotes.reduce((acm, { tags }) => { 33 | return uniq([...acm, ...tags.map(kebabCase)]) 34 | }, []) 35 | 36 | const tags = allTags 37 | .filter(tag => !db.tags.find(({ name }) => name === tag)) 38 | .map(tag => ({ 39 | _id: shortid(), 40 | name: slugify(tag), 41 | })) 42 | 43 | if (tags.length && !DRY_RUN) { 44 | // Add the new tags to the existing collection, then write files to disk 45 | writeJSONFiles(DATA_DIR, { tags: [...db.tags, ...tags] }) 46 | } 47 | // Output the results to the console 48 | logResults(tags, VERBOSE, DRY_RUN) 49 | }) 50 | -------------------------------------------------------------------------------- /cli/updateTags/logResults.js: -------------------------------------------------------------------------------- 1 | import pluralize from 'pluralize' 2 | import { log } from '../../lib/log.js' 3 | import { logJSONTable } from '../../lib/logJSONTable.js' 4 | 5 | export function logResults(newTags, verbose, dryRun) { 6 | const tagsAdded = newTags.length 7 | if (tagsAdded) { 8 | log.info(`Added ${tagsAdded} new ${pluralize('tag', tagsAdded)} `) 9 | if (verbose) logJSONTable(newTags, { excludeKeys: '_id' }) 10 | } else { 11 | log.info(`No new tags were added`) 12 | } 13 | 14 | if (dryRun) { 15 | log.newLine() 16 | log(`This was a dry run, changes were not saved`) 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /cli/validate/README.md: -------------------------------------------------------------------------------- 1 | # $validate 2 | 3 | The validate script runs data validation to ensure the data files match the schema. 4 | 5 | Note: There are two schemas, one for the source data (located in the `data/` directory of this repository), and one for the generated data files (not included in repository). The generated data files are created by running the build command and are stored outside the root of this repository. 6 | 7 | The script will provide detailed output about any validation errors, including a link the exact the exact location of the error in the JSON file. This make is easy to quickly find and address any validation errors in the source data. 8 | 9 | ### Usage 10 | 11 | ```SHELL 12 | $ node cli/validate [] [--verbose, -v] 13 | ``` 14 | 15 | ### Arguments 16 | 17 | | Name | default | description | 18 | | --------- | ------- | ---------------------------------------------- | 19 | | `` | `source` | The data files to target: `source` or `generated` | 20 | | `--verbose` `-v` | false | Validation errors will also show the full object in which the error occurred | 21 | 22 | ### Examples 23 | 24 | ⚠️ To run CLI scripts as executables, you need to run `npm link` from the project root during initial setup. Otherwise, replace `validate` with `node cli/validate`. 25 | 26 | ![](https://user-images.githubusercontent.com/8286271/120944850-a52e3e80-c704-11eb-97f9-82599286a8e5.jpg) 27 | 28 | If you include the `--verbose` flag, it will also output the objects where validation errors occurred. 29 | 30 | ![](https://user-images.githubusercontent.com/8286271/120944767-410b7a80-c704-11eb-909f-2ced93f5884b.jpg) -------------------------------------------------------------------------------- /cli/validate/index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import * as fs from 'fs' 4 | import chalk from 'chalk' 5 | import figures from 'figures' 6 | import { mapValues, add } from 'lodash-es' 7 | import minimist from 'minimist' 8 | import * as path from 'path' 9 | import { parse } from 'json-source-map' 10 | import { Author, AuthorSource } from '../../schema/models/Author.js' 11 | import { compile } from '../../lib/ajv.js' 12 | import { dataDir } from '../../config.js' 13 | import { isJSONFile } from '../../lib/isJSONFile.js' 14 | import { log } from '../../lib/log.js' 15 | import { Quote, QuoteSource } from '../../schema/models/Quote.js' 16 | import { run } from '../../lib/run.js' 17 | import { Tag, TagSource } from '../../schema/models/Tag.js' 18 | import { truncate } from '../../lib/truncate.js' 19 | 20 | const Errors = { 21 | invalidTarget: `Invalid CLI arguments should be "source" | "generated"`, 22 | } 23 | 24 | /** Parses and validates CLI args */ 25 | function parseArgs() { 26 | const args = minimist(process.argv.slice(2)) 27 | const DEBUG = args.b || args.debug 28 | const VERBOSE = args.v || args.verbose 29 | const TARGET = args._[0] || 'source' 30 | if (!/^(source|generated)$/i.test(TARGET)) { 31 | throw new Error(Errors.invalidTarget) 32 | } 33 | return { DEBUG, VERBOSE, TARGET } 34 | } 35 | 36 | const schemas = { 37 | source: compile({ 38 | authors: AuthorSource, 39 | quotes: QuoteSource, 40 | tags: TagSource, 41 | }), 42 | generated: compile({ authors: Author, quotes: Quote, tags: Tag }), 43 | } 44 | 45 | /** 46 | * Returns an array of data files to validate. Each item in the array is object 47 | * containing the file name and corresponding validation function. 48 | * 49 | * @param {"source" | "generated"} TARGET the target data directory 50 | * @return {Array<{FILE: string, validate: Ajv.ValidateFunction}>}. 51 | */ 52 | function getDataFiles(TARGET) { 53 | const DIR = dataDir[TARGET] 54 | if (!fs.existsSync(DIR)) { 55 | throw new Error(`Data directory does not exist \n${DIR}`) 56 | } 57 | const files = fs.readdirSync(DIR).filter(isJSONFile) 58 | return files.reduce((acm, FILE) => { 59 | const validate = schemas[TARGET][path.basename(FILE, '.json')] 60 | return validate ? [...acm, { FILE, validate }] : acm 61 | }, []) 62 | } 63 | 64 | /** 65 | * Gets the location of a validation error 66 | * @param {Ajv.ValidationError} error 67 | * @param {number} index the index of the object in which the error occurred 68 | * @param {Object} pointers pointers returned by json-source-map 69 | * @return {{line: number, column: number}} location of the error 70 | */ 71 | function getErrorLocation(error, index, pointers) { 72 | const { instancePath, params } = error 73 | const { additionalProperty } = params 74 | const property = String(additionalProperty || instancePath).replace(/^\//, '') 75 | const pointer = pointers[property ? `/${index}/${property}` : `/${index}`] 76 | const { line, column } = pointer.value 77 | return { line: line + 1, column } 78 | } 79 | 80 | /** 81 | * @param {Ajv.ValidationError} error 82 | * @return {string} The message to be displayed for a given validation error 83 | */ 84 | function getMessage(error) { 85 | const { instancePath, params } = error 86 | const { additionalProperty } = params 87 | let { message } = error 88 | if (instancePath) message = `'${instancePath.slice(1)}' ${message}` 89 | if (additionalProperty) message = `${message}: '${additionalProperty}'` 90 | // Add syntax highlighting for strings contained within the error message. 91 | // This will highlight property names 92 | return `${message.replace(/'[a-z\s.]+'/i, match => chalk.cyan(match))}` 93 | } 94 | 95 | function validateCollection(PATH, validate, verbose) { 96 | log.header(`Checking ${path.basename(PATH, '.json')}... `) 97 | const { data, pointers } = parse(fs.readFileSync(PATH, 'utf-8')) 98 | const { schema } = validate 99 | let errorCount = 0 100 | // Iterate through the items in this collection; validate each object to 101 | // check that it matches the schema for this object type. 102 | data.forEach((object, index) => { 103 | const isValid = validate(object) 104 | if (!isValid) { 105 | if (errorCount > 0) log[verbose ? 'divider' : 'newLine']() 106 | log.error(`Invalid ${schema.title}`) 107 | // If `VERBOSE` is true, also output the object where error occurred. 108 | if (verbose) log(mapValues(object, value => truncate(value))) 109 | // Out details for each validation error on this object 110 | validate.errors.forEach(error => { 111 | const { column, line } = getErrorLocation(error, index, pointers) 112 | log(getMessage(error)) 113 | log.link(`${path.join(PATH)}:${line}:${column}`) 114 | errorCount += 1 115 | }) 116 | } 117 | }) 118 | if (!errorCount) { 119 | log.success( 120 | `${figures.tick} no validation errors in ${path.basename(PATH)}` 121 | ) 122 | } 123 | return errorCount 124 | } 125 | 126 | run(async () => { 127 | const { TARGET, VERBOSE } = parseArgs() 128 | // Returns the number of validation errors in each file. 129 | const errorCounts = getDataFiles(TARGET).map(({ FILE, validate }) => { 130 | const PATH = path.join(dataDir[TARGET], FILE) 131 | return validateCollection(PATH, validate, VERBOSE) 132 | }) 133 | // Total number of validation errors for all files. 134 | const totalErrors = add(...errorCounts) 135 | // If there were any errors, log a message and exit 136 | if (totalErrors) { 137 | log.newLine() 138 | log(`Validation failed`) 139 | log('Done') 140 | process.exit(1) 141 | } 142 | }) 143 | -------------------------------------------------------------------------------- /config.js: -------------------------------------------------------------------------------- 1 | import * as path from 'path' 2 | import { fileURLToPath } from 'url' 3 | 4 | export const rootDir = path.dirname(fileURLToPath(import.meta.url)) 5 | 6 | export const dataDir = { 7 | // The source data 8 | source: path.join(rootDir, 'data'), 9 | // Generated data 10 | generated: path.join(rootDir, '../generated'), 11 | } 12 | 13 | export const cacheDir = path.join(rootDir, '.cache') 14 | -------------------------------------------------------------------------------- /data/index.js: -------------------------------------------------------------------------------- 1 | import quotes from './quotes.json' 2 | import authors from './authors.json' 3 | import tags from './tags.json' 4 | 5 | export { quotes, authors, tags } 6 | -------------------------------------------------------------------------------- /data/tags.json: -------------------------------------------------------------------------------- 1 | [ 2 | { "_id": "OMnUd1CUg", "name": "Future" }, 3 | { "_id": "krXU-q4FE", "name": "Friendship" }, 4 | { "_id": "kqzFRe-4V4", "name": "Education" }, 5 | { "_id": "k97A51Uf5", "name": "Happiness" }, 6 | { "_id": "fvpORe-t", "name": "Famous Quotes" }, 7 | { "_id": "M83oc3scg", "name": "Business" }, 8 | { "_id": "-7GEUrC5r", "name": "Faith" }, 9 | { "_id": "6J1qxxuj3", "name": "Wisdom" }, 10 | { "_id": "tWLCsyf_K", "name": "Religion" }, 11 | { "_id": "qO4zwIUdFW", "name": "Science" }, 12 | { "_id": "JCMoLDds9", "name": "Inspirational" }, 13 | { "_id": "Gq75KBrfb", "name": "Technology" }, 14 | { "_id": "rnrd8q9X1", "name": "Love" }, 15 | { "_id": "3n-gucf_OB", "name": "Literature" }, 16 | { "_id": "96NNdxeI_", "name": "Politics" }, 17 | { "_id": "poT-7QEBm", "name": "Life" }, 18 | { "_id": "wm1HFcO8vf", "name": "Success" }, 19 | { "_id": "vmVZQ72P_", "name": "Nature" }, 20 | { "_id": "KBWnDu4rH", "name": "History" }, 21 | { "_id": "p5xRCWCdU", "name": "Proverb" }, 22 | { "_id": "uki-krBKTB_", "name": "Humor" }, 23 | { "_id": "B1O_IThWjSP", "name": "Truth" }, 24 | { "_id": "-WCNo8uFORU", "name": "Social Justice" }, 25 | { "_id": "QuBdKRcjNsO", "name": "Freedom" }, 26 | { "_id": "AjxQFSPEylb", "name": "Conservative" }, 27 | { "_id": "olEL606Ju49", "name": "War" }, 28 | { "_id": "Ipw2-EcInZg", "name": "Courage" }, 29 | { "_id": "DNZ4IdtHiFG", "name": "Spirituality" }, 30 | { "_id": "r437PEqVZx5", "name": "Family" }, 31 | { "_id": "vWfmIQt6k8c", "name": "Character" }, 32 | { "_id": "QmvdN2qkQCC", "name": "Change" }, 33 | { "_id": "LzQ9iXOoZw_", "name": "Time" }, 34 | { "_id": "q8kOLaefsZM", "name": "Pain" }, 35 | { "_id": "mh6HEhK_T_a", "name": "Philosophy" }, 36 | { "_id": "5wvH2mbETdq", "name": "Self" }, 37 | { "_id": "uCLiEwnwh", "name": "Honor" }, 38 | { "_id": "bsT8Bb9sxB", "name": "Virtue" }, 39 | { "_id": "CaE-vzeOZb", "name": "Motivational" }, 40 | { "_id": "HJ05xaA6gN", "name": "Film" }, 41 | { "_id": "MbsuUl67N3", "name": "Power Quotes" }, 42 | { "_id": "IJsMcEupo4", "name": "Self Help" }, 43 | { "_id": "V60a195td", "name": "Leadership" }, 44 | { "_id": "EKV8W1TN-wb", "name": "Sports" }, 45 | { "_id": "qTFouZDPBZz", "name": "Athletics" }, 46 | { "_id": "JaQwywHSk59", "name": "Competition" }, 47 | { "_id": "HJp_e1usX", "name": "Humorous" }, 48 | { "_id": "s19tg5r8EM-", "name": "Mathematics" }, 49 | { "_id": "jl_H9UVXhGb", "name": "Ethics" }, 50 | { "_id": "eD6qAIcDR8s", "name": "Society" }, 51 | { "_id": "eghR-r-OzL9", "name": "Imagination" }, 52 | { "_id": "v9QUFHDZPT3", "name": "Knowledge" }, 53 | { "_id": "o7BP9_4e2lL", "name": "Tolerance" }, 54 | { "_id": "kZei477Cojv", "name": "Stupidity" }, 55 | { "_id": "AN2qILFNzW", "name": "Weakness" }, 56 | { "_id": "i_OFrgCiQ4", "name": "Generosity" }, 57 | { "_id": "S9OYJZlyu5", "name": "Gratitude" }, 58 | { "_id": "PIyOMHYqPd", "name": "Age" }, 59 | { "_id": "dm15bka7Qc", "name": "Sadness" }, 60 | { "_id": "pnLPcXTs_S", "name": "Opportunity" }, 61 | { "_id": "Y3mg6WH7Qv1", "name": "Work" }, 62 | { "_id": "gElqvJIRz0h", "name": "Failure" }, 63 | { "_id": "GaQEsvfbYYd", "name": "Perseverance" }, 64 | { "_id": "kv9zk8WIqaq", "name": "Health" }, 65 | { "_id": "5j5s-YkHAr_", "name": "Wellness" }, 66 | { "_id": "aLWQOIYpeMz", "name": "Creativity" }, 67 | { "_id": "NLC25zc7-m5", "name": "Work" }, 68 | { "_id": "L09SJrfjY8s", "name": "Genius" } 69 | ] 70 | -------------------------------------------------------------------------------- /lib/Spinner.js: -------------------------------------------------------------------------------- 1 | import Ora from 'ora' 2 | import { isString } from 'lodash-es' 3 | import isInteractive from 'is-interactive' 4 | 5 | /** 6 | * CLI spinners that fall back gracefully on non TTY environments like as CI. 7 | */ 8 | export class Spinner { 9 | get isSpinning() { 10 | return this.isEnabled && this.spinner.isSpinning 11 | } 12 | 13 | constructor(input = {}) { 14 | const options = isString(input) ? { text: input } : input 15 | this.isEnabled = 16 | typeof options.isEnabled === 'boolean' 17 | ? options.isEnabled 18 | : isInteractive({ stream: options.stream }) 19 | this.spinner = this.isEnabled ? new Ora(this.options) : null 20 | } 21 | 22 | stop() { 23 | if (this.spinner && this.spinner.isSpinning) { 24 | this.spinner.stop() 25 | } 26 | } 27 | 28 | start(text) { 29 | if (this.spinner) { 30 | this.spinner.start(text) 31 | } else if (text) { 32 | console.log(text) 33 | } 34 | } 35 | 36 | succeed(text) { 37 | if (this.spinner) { 38 | this.spinner.succeed(text) 39 | } else if (text) { 40 | console.log(text) 41 | } 42 | } 43 | 44 | warn(text) { 45 | if (this.spinner) { 46 | this.spinner.warn(text) 47 | } else if (text) { 48 | console.log(text) 49 | } 50 | } 51 | 52 | fail(text) { 53 | if (this.spinner) { 54 | this.spinner.fail(text) 55 | } else { 56 | console.log(text || 'Failed!') 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /lib/Table.js: -------------------------------------------------------------------------------- 1 | import CLITable from 'cli-table3' 2 | import { kebabCase, mapKeys } from 'lodash-es' 3 | 4 | export function Table({ styles = {}, chars = {}, items, ...rest } = {}) { 5 | const params = { 6 | styles: mapKeys(styles, (_, key) => kebabCase(key)), 7 | chars: mapKeys(chars, (_, key) => kebabCase(key)), 8 | ...rest, 9 | } 10 | const table = new CLITable(params) 11 | 12 | if (items) { 13 | table.push(...items) 14 | } 15 | return table 16 | } 17 | -------------------------------------------------------------------------------- /lib/__tests__/ajv.test.js: -------------------------------------------------------------------------------- 1 | import { compile, ajv } from '../ajv' 2 | 3 | describe('utils > ajv > compile', () => { 4 | it(`Takes an object in which the values are JSON schema definitions and 5 | returns a new object where the values are the compiled schema returned by 6 | ajv`, () => { 7 | const schemas = compile({ 8 | foo: { 9 | name: 'Foo', 10 | type: 'object', 11 | properties: { 12 | str: { type: 'string' }, 13 | }, 14 | }, 15 | bar: { 16 | name: 'Bar', 17 | type: 'object', 18 | properties: { 19 | str: { type: 'string' }, 20 | int: { type: 'integer' }, 21 | }, 22 | required: ['str', 'int'], 23 | }, 24 | }) 25 | expect(schemas.foo.schema.name).toEqual('Foo') 26 | }) 27 | }) 28 | -------------------------------------------------------------------------------- /lib/__tests__/isEqual.js: -------------------------------------------------------------------------------- 1 | import { isEqual } from '../isEqual.js' 2 | 3 | describe('utils > isEqual', () => { 4 | it(`Returns true if object are deep equal`, () => { 5 | const a = { one: 'foo', two: 'bar', arr: [1, 2] } 6 | const b = { one: 'foo', two: 'bar', arr: [2, 1] } 7 | expect(isEqual(a, b)).toBe(true) 8 | }) 9 | }) 10 | -------------------------------------------------------------------------------- /lib/ajv.js: -------------------------------------------------------------------------------- 1 | import Ajv from 'ajv' 2 | import { mapValues } from 'lodash-es' 3 | import addFormats from 'ajv-formats' 4 | 5 | /** 6 | * A global instance of Ajv 7 | */ 8 | export const ajv = new Ajv({ allErrors: true, useDefaults: true }) 9 | addFormats(ajv) 10 | 11 | /** 12 | * Takes an object in which the values are JSON schemas and compiles them 13 | * with ajv. The return value is a new object in which the values are the 14 | * the compiled schema. 15 | */ 16 | export const compile = schemas => 17 | mapValues(schemas, schema => ajv.compile(schema)) 18 | -------------------------------------------------------------------------------- /lib/delay.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Return a promise that resolves after given delay 3 | */ 4 | export async function delay(ms) { 5 | return new Promise(resolve => { 6 | setTimeout(resolve, ms) 7 | }) 8 | } 9 | -------------------------------------------------------------------------------- /lib/findAuthorByName.js: -------------------------------------------------------------------------------- 1 | import { toLower } from 'lodash-es' 2 | /** 3 | * Finds an author by name. 4 | * 5 | * @param {string} name 6 | * @param {{quotes: any[], authors: any[], tags: any[]}} db 7 | */ 8 | export function findAuthorByName(name, db) { 9 | return db.authors.find(author => toLower(name) === toLower(author.name)) 10 | } 11 | -------------------------------------------------------------------------------- /lib/findQuoteByContent.js: -------------------------------------------------------------------------------- 1 | import { parseContent } from './parseContent.js' 2 | 3 | /** 4 | * Finds a quote with equal or similar content 5 | * 6 | * @param {string} content 7 | * @param {{quotes: any[], authors: any[]}} db the database collections (JSON) 8 | */ 9 | export function findQuoteByContent(content, db) { 10 | return db.quotes.find(quote => { 11 | return parseContent(quote.content) === parseContent(content) 12 | }) 13 | } 14 | -------------------------------------------------------------------------------- /lib/findQuotesByAuthor.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Filter quotes by author name 3 | * 4 | * @param {string} author the author name 5 | * @param {{quotes: any[], authors: any[], tags: any[]}} db 6 | */ 7 | export function findQuotesByAuthor(author, db) { 8 | const name = typeof author === 'object' ? author.name : author 9 | return db.quotes.filter(quote => quote.author === name) 10 | } 11 | -------------------------------------------------------------------------------- /lib/findQuotesByTag.js: -------------------------------------------------------------------------------- 1 | import slugify from '@lukepeavey/slugify' 2 | 3 | export function findQuotesByTag(tag, db) { 4 | const slug = slugify(tag) 5 | return db.quotes.filter(quote => quote.tags.includes(slug)) 6 | } 7 | -------------------------------------------------------------------------------- /lib/isEqual.js: -------------------------------------------------------------------------------- 1 | import { isEqualWith, omit, isArray, sortBy } from 'lodash-es' 2 | 3 | /** 4 | * Customized version of lodash isEqual. 5 | * 6 | * Compares two objects using deep equality. This is used to compare objects in 7 | * database collections to see if they have changed. It ignores timestamp 8 | * properties. 9 | */ 10 | export function isEqual(...objects) { 11 | // Ignore timestamp properties 12 | const [a, b] = objects.map(obj => omit(obj, ['dateAdded', 'dateModified'])) 13 | // Check if object `a` and `b` are deep equal. 14 | return isEqualWith(a, b, (value, otherValue) => { 15 | // Array properties are considered equal if they both contain the same 16 | // elements (order does not matter). 17 | if (isArray(value) && isArray(otherValue)) { 18 | return isEqualWith(...[value, otherValue].map(sortBy)) 19 | } 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /lib/isJSONFile.js: -------------------------------------------------------------------------------- 1 | import * as path from 'path' 2 | 3 | /** Returns true if a file path has .json extension */ 4 | export function isJSONFile(FILE) { 5 | return /\.jsonc?/.test(path.extname(FILE)) 6 | } 7 | -------------------------------------------------------------------------------- /lib/log.js: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | import { isString, identity, range } from 'lodash-es' 3 | import wrap from 'word-wrap' 4 | 5 | // Configuration for text wrapping 6 | const MAX_WIDTH = 100 7 | const WIDTH = Math.min(MAX_WIDTH, process.stdout.columns) - 5 8 | const wrapOptions = { width: WIDTH, trim: false, indent: ' ' } 9 | 10 | /** 11 | * Creates a log function with the specified `level` and text formatting. 12 | * 13 | * @param {'log' | 'warn' | 'error' | 'info'} level 14 | * @param {string => string} color a chalk function 15 | * @return {(message?: any, optionalParams?: any) => void} A function with the 16 | * same signature as built-in console methods. 17 | */ 18 | const createLogger = 19 | (level, color = identity) => 20 | (message, ...args) => { 21 | // If `value` **IS NOT** a string, we pass it directly to the built-in 22 | // console method to maintain the default formatting of objects and arrays 23 | if (!isString(message)) console[level](message, ...args) 24 | // For string values, we wrap text to a specified width and format with the 25 | // the provided color function. 26 | else console[level](`${wrap(color(message), wrapOptions)}`, ...args) 27 | } 28 | 29 | /** 30 | * The default log function. When called as a function, this is equivalent to 31 | * `console.log`. 32 | * 33 | * It is also an object that contains all the other log methods, such as 34 | * `log.error`, `log.success`, etc. 35 | * 36 | * @example 37 | * // Called as a function, outputs message with default formatting 38 | * log('some value') 39 | * 40 | * // Outputs message with error formatting 41 | * log.error("something went wrong") 42 | * 43 | * // Outputs message with success formatting 44 | * log.success("something worked") 45 | */ 46 | export function log(message, ...args) { 47 | return createLogger('log')(message, ...args) 48 | } 49 | 50 | log.warn = createLogger('warn', chalk.magentaBright) 51 | log.info = createLogger('warn', chalk.yellowBright) 52 | log.error = createLogger('error', chalk.bold.redBright) 53 | log.success = createLogger('warn', chalk.greenBright) 54 | log.dim = createLogger('warn', chalk.dim) 55 | log.bgGreen = createLogger('warn', chalk.white.bgGreenBright.bold) 56 | log.bgRed = createLogger('error', chalk.white.bgRedBright.bold) 57 | log.bgYellow = createLogger('warn', chalk.white.bgYellowBright.bold) 58 | log.link = createLogger('log', chalk.dim.underline) 59 | 60 | /** 61 | * Outputs the stack trace from an error 62 | * Removes the error message and only displays the stack trace. 63 | */ 64 | log.stack = (error = {}) => { 65 | const { stack = '' } = error 66 | const lines = stack.split('\n').filter(line => /^\s+at/i.test(line)) 67 | const str = lines.map(line => ` ${line.trim()}`).join('\n') 68 | console.error(chalk.dim(str)) 69 | } 70 | 71 | /** 72 | * Outputs a horizontal divider 73 | */ 74 | log.divider = ({ char = '-', compact = false, color = chalk.dim } = {}) => { 75 | const w = process.stdout.columns - 2 76 | const divider = `${color(char.padEnd(w, char))}` 77 | console.log(compact ? divider : `\n${divider}\n`) 78 | } 79 | 80 | /** 81 | * Outputs a block header 82 | */ 83 | log.header = (str, { color = chalk.yellow, char = '=' } = {}) => { 84 | const w = process.stdout.columns - 3 85 | console.log(' ') 86 | console.log(` ${color.dim(''.padEnd(w, char))}`) 87 | console.log(` ${color(str)} `) 88 | console.log(` ${color.dim(''.padEnd(w, char))} `) 89 | console.log(' ') 90 | } 91 | log.bold = str => { 92 | log(chalk.bold(str)) 93 | } 94 | /** 95 | * Outputs a single empty line 96 | */ 97 | log.newLine = (n = 1) => { 98 | range(n).forEach(() => console.log(' ')) 99 | } 100 | -------------------------------------------------------------------------------- /lib/logJSONTable.js: -------------------------------------------------------------------------------- 1 | import chalk from 'chalk' 2 | import { isEmpty } from 'lodash-es' 3 | import wrap from 'word-wrap' 4 | import { Table } from './Table.js' 5 | import { log } from './log.js' 6 | 7 | /** 8 | * Creates a table representation of an array of JSON objects and logs it to 9 | * the console. 10 | */ 11 | export function logJSONTable(objects, options = {}) { 12 | const { excludeKeys = [], excludeEmpty = true } = options 13 | const WIDTH = Math.min(process.stdout.columns, 105) 14 | const KEY_COL_WIDTH = 14 15 | const PADDING = 2 16 | const CONTENT_COL_WIDTH = WIDTH - KEY_COL_WIDTH - PADDING * 2 17 | const TEXT_WIDTH = CONTENT_COL_WIDTH - PADDING * 2 18 | objects.map(object => { 19 | const entries = Object.entries(object).filter(([key, value]) => { 20 | if (excludeKeys.includes(key)) return false 21 | if (excludeEmpty && isEmpty(value)) return false 22 | return true 23 | }) 24 | // blank space between items 25 | log.newLine() 26 | // Outputs a table representation of the given object 27 | const table = Table({ 28 | colWidths: [KEY_COL_WIDTH, CONTENT_COL_WIDTH], 29 | styles: { paddingLeft: PADDING, paddingRight: PADDING }, 30 | items: entries.map(([key, rawValue]) => { 31 | const value = wrap(String(rawValue), { width: TEXT_WIDTH, indent: '' }) 32 | return [chalk.dim(key), value] 33 | }), 34 | }) 35 | console.log(table.toString()) 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /lib/object.js: -------------------------------------------------------------------------------- 1 | export const { entries, values, keys } = Object 2 | -------------------------------------------------------------------------------- /lib/optionsTable.js: -------------------------------------------------------------------------------- 1 | import Table from 'cli-table3' 2 | import { max } from 'lodash-es' 3 | 4 | const MAX_TEXT_WIDTH = 100 5 | 6 | /** 7 | * 8 | */ 9 | export function optionsTable(rows) { 10 | // Set the border characters for the table. The table is displayed without 11 | // any visible borders. 12 | const chars = { 13 | top: ' ', 14 | 'top-mid': ' ', 15 | 'top-left': '', 16 | 'top-right': ' ', 17 | bottom: ' ', 18 | 'bottom-mid': ' ', 19 | 'bottom-left': ' ', 20 | 'bottom-right': ' ', 21 | left: '', 22 | 'left-mid': '', 23 | mid: ' ', 24 | 'mid-mid': ' ', 25 | right: ' ', 26 | 'right-mid': ' ', 27 | middle: ' ', 28 | } 29 | 30 | const data = rows.map(row => row.split(':').map(v => v.trim())) 31 | // Width of the left column is set to the length of text content in left column 32 | const leftColWidth = max(data.map(([leftCol]) => leftCol.length)) + 2 33 | // Width of the right column is based on the available space in the terminal, with a max 34 | // width of 100 35 | const rightColWidth = Math.min( 36 | MAX_TEXT_WIDTH, 37 | process.stdout.columns - leftColWidth 38 | ) 39 | 40 | // Create a table with no visible borders. This creates a two column layout 41 | // where the option is displayed in the left column and description is 42 | // displayed in the right column. The text in the description column is 43 | // wrapped to a readable line length, while maintaining alignment with the 44 | // column. 45 | const table = new Table({ 46 | chars, 47 | colWidths: [leftColWidth, rightColWidth], 48 | wordWrap: true, 49 | }) 50 | data.forEach(row => table.push(row)) 51 | 52 | return table 53 | } 54 | -------------------------------------------------------------------------------- /lib/parseContent.js: -------------------------------------------------------------------------------- 1 | import stopwords from '@lukepeavey/stopwords' 2 | 3 | /** 4 | * - Converts string to lowercase 5 | * - removes all punctuation 6 | * - Removes english stopwords 7 | */ 8 | export function parseContent(str) { 9 | const words = String(str) 10 | .toLowerCase() 11 | .replace(/[^a-z1-9'\s]/gi, '') 12 | .replace(/\s+/, ' ') 13 | .split(' ') 14 | return words.filter(word => !stopwords.includes(word)).join(' ') 15 | } 16 | -------------------------------------------------------------------------------- /lib/parseDataFiles.js: -------------------------------------------------------------------------------- 1 | import * as path from 'path' 2 | import * as fs from 'fs' 3 | import { isJSONFile } from './isJSONFile.js' 4 | 5 | /** 6 | * Reads and parses the database files. 7 | */ 8 | export function parseDataFiles(DIR) { 9 | if (!fs.existsSync(DIR)) { 10 | throw new Error(`Does not exist \n ${DIR}`) 11 | } 12 | const files = fs.readdirSync(DIR).filter(isJSONFile) 13 | return files.reduce((data, FILE) => { 14 | const documents = JSON.parse(fs.readFileSync(path.resolve(DIR, FILE))) 15 | return { ...data, [path.basename(FILE, '.json')]: documents } 16 | }, {}) 17 | } 18 | -------------------------------------------------------------------------------- /lib/parseFile.js: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | 3 | export function parseFile(PATH) { 4 | if (!fs.existsSync(PATH)) { 5 | throw new Error(`The specified file does not exist\n${PATH}`) 6 | } 7 | return JSON.parse(fs.readFileSync(PATH, 'utf-8')) 8 | } 9 | -------------------------------------------------------------------------------- /lib/progressBar.js: -------------------------------------------------------------------------------- 1 | import { Bar, Presets } from 'cli-progress' 2 | import chalk from 'chalk' 3 | 4 | const { cyan } = chalk 5 | 6 | /** 7 | * Default format for progress bars. This can be overridden 8 | */ 9 | const getFormat = (title = '{title}') => { 10 | const BAR = cyan('{bar}') 11 | const PERCENTAGE = `{percentage}%` 12 | const COMPLETED = `{value}/{total}` 13 | const ETA = `ETA: {eta}s` 14 | return `${title} ${BAR} ${PERCENTAGE} | ${COMPLETED} | ${ETA}`.trim() 15 | } 16 | 17 | /** 18 | * @param {Object} config progress bar configuration 19 | * @param {String} [config.title = ''] Text that will be displayed to the 20 | * left of the progress bar. If omitted, no title will be shown. 21 | * @param {boolean} [config.show = true] If false, no progress bar will be 22 | * rendered. 23 | * @return {Bar} 24 | */ 25 | export function createProgressBar(options = {}) { 26 | const defaults = { 27 | format: getFormat(), 28 | hideCursor: true, 29 | stopOnComplete: true, 30 | synchronousUpdate: true, 31 | forceRedraw: true, 32 | } 33 | return new Bar({ ...defaults, ...options }, Presets.shades_classic) 34 | } 35 | -------------------------------------------------------------------------------- /lib/run.js: -------------------------------------------------------------------------------- 1 | import { log } from './log.js' 2 | 3 | /** 4 | * Wrapper function for CLI scripts. It takes an async function and executes it 5 | * inside a try catch block, to provide error handling for CLI scripts. 6 | * 7 | * @example 8 | * run(async () => { 9 | * console.log('Starting some script') 10 | * const result = await doStuff() 11 | * console.log('done!') 12 | * }) 13 | */ 14 | export async function run(func) { 15 | try { 16 | await func() 17 | log.newLine() 18 | log('done!') 19 | } catch (error) { 20 | log.error(`[Error] ${error.message}`) 21 | log.stack(error) 22 | process.exit(1) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /lib/selectInput.js: -------------------------------------------------------------------------------- 1 | import selectBase from '@inquirer/select' 2 | import chalk from 'chalk' 3 | import wrap from 'word-wrap' 4 | 5 | export async function select({ message, options, none = true }) { 6 | /** 7 | * This formats the `description` that is displayed below the list of choices. 8 | * It shows a description of currently highlighted author. 9 | */ 10 | const formatDescription = value => { 11 | const width = Math.min(process.stdout.columns, 75) 12 | const description = wrap(chalk.dim(value || ''), { indent: ' ', width }) 13 | const separator = chalk.dim(` ${'—'.padEnd(73, '—')}`) 14 | return `${separator}\n${description}` 15 | } 16 | 17 | const choices = [ 18 | ...options.map(obj => ({ 19 | name: obj.name || obj.title, 20 | description: formatDescription(obj.description), 21 | value: obj.value || obj, 22 | })), 23 | none && { name: 'None of the above', value: false }, 24 | ] 25 | return selectBase({ name: 'select', message, choices }) 26 | } 27 | -------------------------------------------------------------------------------- /lib/titleCase.js: -------------------------------------------------------------------------------- 1 | import { words as toWords, capitalize } from 'lodash-es' 2 | 3 | /** 4 | * Converts a string to title case, with words separated by a single space, 5 | * the first letter of each word capitalized, and all other characters in 6 | * lowercase. 7 | */ 8 | export function titleCase(str = '') { 9 | const words = toWords(str) 10 | return words.map(capitalize).join(' ') 11 | } 12 | -------------------------------------------------------------------------------- /lib/truncate.js: -------------------------------------------------------------------------------- 1 | import figures from 'figures' 2 | import { isString } from 'lodash-es' 3 | 4 | export function truncate(value, length = 70) { 5 | if (isString(value) && value.length > length) { 6 | return `${value.slice(0, length)} ${figures.ellipsis}` 7 | } 8 | return value 9 | } 10 | -------------------------------------------------------------------------------- /lib/wiki/__fixtures__/example-page.mjson: -------------------------------------------------------------------------------- 1 | { 2 | "pageid": 62682, 3 | "ns": 0, 4 | "title": "A. P. J. Abdul Kalam", 5 | "thumbnail": { 6 | "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6e/A._P._J._Abdul_Kalam.jpg/42px-A._P._J._Abdul_Kalam.jpg", 7 | "width": 42, 8 | "height": 50 9 | }, 10 | "pageimage": "A._P._J._Abdul_Kalam.jpg", 11 | "description": "Scientist and 11th President of India", 12 | "descriptionsource": "local", 13 | "categories": [ 14 | { "ns": 14, "title": "Category:1931 births" }, 15 | { "ns": 14, "title": "Category:2015 deaths" }, 16 | { "ns": 14, "title": "Category:20th-century Indian engineers" }, 17 | { "ns": 14, "title": "Category:20th-century Indian politicians" }, 18 | { "ns": 14, "title": "Category:21st-century Indian engineers" }, 19 | { "ns": 14, "title": "Category:21st-century Indian politicians" }, 20 | { "ns": 14, "title": "Category:A. P. J. Abdul Kalam" }, 21 | { 22 | "ns": 14, 23 | "title": "Category:All Wikipedia articles written in Indian English" 24 | }, 25 | { 26 | "ns": 14, 27 | "title": "Category:Articles with Encyclopædia Britannica links" 28 | }, 29 | { "ns": 14, "title": "Category:Articles with hAudio microformats" }, 30 | { "ns": 14, "title": "Category:Articles with hCards" }, 31 | { "ns": 14, "title": "Category:Articles with short description" }, 32 | { "ns": 14, "title": "Category:CS1 maint: archived copy as title" }, 33 | { "ns": 14, "title": "Category:CS1 maint: extra text: authors list" }, 34 | { "ns": 14, "title": "Category:CS1 maint: others" }, 35 | { 36 | "ns": 14, 37 | "title": "Category:Defence Research and Development Organisation" 38 | }, 39 | { "ns": 14, "title": "Category:Engineers from Tamil Nadu" }, 40 | { 41 | "ns": 14, 42 | "title": "Category:Fellows of the National Academy of Medical Sciences" 43 | }, 44 | { "ns": 14, "title": "Category:Good articles" }, 45 | { "ns": 14, "title": "Category:Indian Muslims" }, 46 | { "ns": 14, "title": "Category:Indian Space Research Organisation people" }, 47 | { "ns": 14, "title": "Category:Indian Tamil academics" }, 48 | { "ns": 14, "title": "Category:Indian Tamil politicians" }, 49 | { "ns": 14, "title": "Category:Indian aerospace engineers" }, 50 | { "ns": 14, "title": "Category:Madras Institute of Technology alumni" }, 51 | { "ns": 14, "title": "Category:Nuclear energy in India" }, 52 | { "ns": 14, "title": "Category:Pages including recorded pronunciations" }, 53 | { 54 | "ns": 14, 55 | "title": "Category:Pages using Sister project links with hidden wikidata" 56 | }, 57 | { "ns": 14, "title": "Category:People from Ramanathapuram district" }, 58 | { "ns": 14, "title": "Category:Presidents of India" }, 59 | { "ns": 14, "title": "Category:Recipients of the Bharat Ratna" }, 60 | { 61 | "ns": 14, 62 | "title": "Category:Recipients of the Padma Bhushan in civil service" 63 | }, 64 | { 65 | "ns": 14, 66 | "title": "Category:Recipients of the Padma Vibhushan in science & engineering" 67 | }, 68 | { 69 | "ns": 14, 70 | "title": "Category:Short description is different from Wikidata" 71 | }, 72 | { 73 | "ns": 14, 74 | "title": "Category:St Joseph's College, Tiruchirappalli alumni" 75 | }, 76 | { "ns": 14, "title": "Category:State funerals in India" }, 77 | { "ns": 14, "title": "Category:Tamil engineers" }, 78 | { "ns": 14, "title": "Category:Tamil poets" }, 79 | { "ns": 14, "title": "Category:University of Madras alumni" }, 80 | { "ns": 14, "title": "Category:Use Indian English from June 2016" }, 81 | { "ns": 14, "title": "Category:Use dmy dates from March 2021" }, 82 | { "ns": 14, "title": "Category:Webarchive template wayback links" }, 83 | { "ns": 14, "title": "Category:Wikipedia articles with BNF identifiers" }, 84 | { 85 | "ns": 14, 86 | "title": "Category:Wikipedia articles with CANTIC identifiers" 87 | }, 88 | { "ns": 14, "title": "Category:Wikipedia articles with GND identifiers" }, 89 | { "ns": 14, "title": "Category:Wikipedia articles with ISNI identifiers" }, 90 | { "ns": 14, "title": "Category:Wikipedia articles with LCCN identifiers" }, 91 | { "ns": 14, "title": "Category:Wikipedia articles with NDL identifiers" }, 92 | { "ns": 14, "title": "Category:Wikipedia articles with NKC identifiers" }, 93 | { "ns": 14, "title": "Category:Wikipedia articles with NLA identifiers" }, 94 | { "ns": 14, "title": "Category:Wikipedia articles with NLI identifiers" }, 95 | { "ns": 14, "title": "Category:Wikipedia articles with NLK identifiers" }, 96 | { "ns": 14, "title": "Category:Wikipedia articles with NTA identifiers" }, 97 | { "ns": 14, "title": "Category:Wikipedia articles with SUDOC identifiers" }, 98 | { "ns": 14, "title": "Category:Wikipedia articles with Trove identifiers" }, 99 | { "ns": 14, "title": "Category:Wikipedia articles with VIAF identifiers" }, 100 | { 101 | "ns": 14, 102 | "title": "Category:Wikipedia articles with WORLDCATID identifiers" 103 | }, 104 | { 105 | "ns": 14, 106 | "title": "Category:Wikipedia indefinitely semi-protected pages" 107 | } 108 | ], 109 | "contentmodel": "wikitext", 110 | "pagelanguage": "en", 111 | "pagelanguagehtmlcode": "en", 112 | "pagelanguagedir": "ltr", 113 | "touched": "2021-05-21T03:52:25Z", 114 | "lastrevid": 1021351304, 115 | "length": 140822, 116 | "fullurl": "https://en.wikipedia.org/wiki/A._P._J._Abdul_Kalam", 117 | "editurl": "https://en.wikipedia.org/w/index.php?title=A._P._J._Abdul_Kalam&action=edit", 118 | "canonicalurl": "https://en.wikipedia.org/wiki/A._P._J._Abdul_Kalam", 119 | "extract": "Avul Pakir Jainulabdeen Abdul Kalam ( (listen); 15 October 1931 – 27 July 2015) was an Indian aerospace scientist who served as the 11th President of India from 2002 to 2007. He was born and raised in Rameswaram, Tamil Nadu and studied physics and aerospace engineering. He spent the next four decades as a scientist and science administrator, mainly at the Defence Research and Development Organisation (DRDO) and Indian Space Research Organisation (ISRO) and was intimately involved in India's civilian space programme and military missile development efforts." 120 | } 121 | -------------------------------------------------------------------------------- /lib/wiki/__fixtures__/example-search-results.mjson: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "pageid": 15124855, 4 | "ns": 0, 5 | "title": "Samuel Adams", 6 | "index": 1, 7 | "thumbnail": { 8 | "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/00/Samuel_Adams_by_John_Singleton_Copley.jpg/40px-Samuel_Adams_by_John_Singleton_Copley.jpg", 9 | "width": 40, 10 | "height": 50 11 | }, 12 | "pageimage": "Samuel_Adams_by_John_Singleton_Copley.jpg", 13 | "description": "American statesman, political philosopher, governor of Massachusetts, and Founding Father of the United States", 14 | "descriptionsource": "local", 15 | "categories": [ 16 | { "ns": 14, "title": "Category:1722 births" }, 17 | { "ns": 14, "title": "Category:1803 deaths" }, 18 | { "ns": 14, "title": "Category:18th-century American politicians" }, 19 | { "ns": 14, "title": "Category:Adams political family" }, 20 | { "ns": 14, "title": "Category:American Congregationalists" }, 21 | { "ns": 14, "title": "Category:American civil rights activists" }, 22 | { "ns": 14, "title": "Category:American people of English descent" }, 23 | { "ns": 14, "title": "Category:American political philosophers" }, 24 | { "ns": 14, "title": "Category:Articles with Internet Archive links" }, 25 | { "ns": 14, "title": "Category:Articles with Project Gutenberg links" }, 26 | { "ns": 14, "title": "Category:Articles with long short description" }, 27 | { "ns": 14, "title": "Category:Articles with short description" }, 28 | { "ns": 14, "title": "Category:Boston Latin School alumni" }, 29 | { "ns": 14, "title": "Category:Burials at Granary Burying Ground" }, 30 | { 31 | "ns": 14, 32 | "title": "Category:Candidates in the 1796 United States presidential election" 33 | }, 34 | { "ns": 14, "title": "Category:Commons category link is on Wikidata" }, 35 | { 36 | "ns": 14, 37 | "title": "Category:Continental Congressmen from Massachusetts" 38 | }, 39 | { "ns": 14, "title": "Category:Featured articles" }, 40 | { 41 | "ns": 14, 42 | "title": "Category:Fellows of the American Academy of Arts and Sciences" 43 | }, 44 | { "ns": 14, "title": "Category:Governors of Massachusetts" }, 45 | { "ns": 14, "title": "Category:Harvard College alumni" }, 46 | { "ns": 14, "title": "Category:Lieutenant Governors of Massachusetts" }, 47 | { "ns": 14, "title": "Category:Massachusetts state senators" }, 48 | { 49 | "ns": 14, 50 | "title": "Category:Members of the colonial Massachusetts House of Representatives" 51 | }, 52 | { "ns": 14, "title": "Category:Politicians from Boston" }, 53 | { "ns": 14, "title": "Category:Presidents of the Massachusetts Senate" }, 54 | { 55 | "ns": 14, 56 | "title": "Category:Short description is different from Wikidata" 57 | }, 58 | { 59 | "ns": 14, 60 | "title": "Category:Signers of the Articles of Confederation" 61 | }, 62 | { 63 | "ns": 14, 64 | "title": "Category:Signers of the United States Declaration of Independence" 65 | }, 66 | { "ns": 14, "title": "Category:Use mdy dates from November 2019" }, 67 | { "ns": 14, "title": "Category:Wikipedia articles with BNF identifiers" }, 68 | { "ns": 14, "title": "Category:Wikipedia articles with GND identifiers" }, 69 | { 70 | "ns": 14, 71 | "title": "Category:Wikipedia articles with ISNI identifiers" 72 | }, 73 | { 74 | "ns": 14, 75 | "title": "Category:Wikipedia articles with LCCN identifiers" 76 | }, 77 | { 78 | "ns": 14, 79 | "title": "Category:Wikipedia articles with NARA identifiers" 80 | }, 81 | { "ns": 14, "title": "Category:Wikipedia articles with NKC identifiers" }, 82 | { "ns": 14, "title": "Category:Wikipedia articles with NLI identifiers" }, 83 | { "ns": 14, "title": "Category:Wikipedia articles with NTA identifiers" }, 84 | { 85 | "ns": 14, 86 | "title": "Category:Wikipedia articles with SNAC-ID identifiers" 87 | }, 88 | { 89 | "ns": 14, 90 | "title": "Category:Wikipedia articles with SUDOC identifiers" 91 | }, 92 | { 93 | "ns": 14, 94 | "title": "Category:Wikipedia articles with USCongress identifiers" 95 | }, 96 | { 97 | "ns": 14, 98 | "title": "Category:Wikipedia articles with VIAF identifiers" 99 | }, 100 | { 101 | "ns": 14, 102 | "title": "Category:Wikipedia articles with WORLDCATID identifiers" 103 | }, 104 | { 105 | "ns": 14, 106 | "title": "Category:Wikipedia indefinitely move-protected pages" 107 | }, 108 | { 109 | "ns": 14, 110 | "title": "Category:Wikipedia indefinitely semi-protected pages" 111 | } 112 | ], 113 | "contentmodel": "wikitext", 114 | "pagelanguage": "en", 115 | "pagelanguagehtmlcode": "en", 116 | "pagelanguagedir": "ltr", 117 | "touched": "2021-05-18T03:21:23Z", 118 | "lastrevid": 1023028652, 119 | "length": 89574, 120 | "fullurl": "https://en.wikipedia.org/wiki/Samuel_Adams", 121 | "editurl": "https://en.wikipedia.org/w/index.php?title=Samuel_Adams&action=edit", 122 | "canonicalurl": "https://en.wikipedia.org/wiki/Samuel_Adams", 123 | "extract": "Samuel Adams (September 27 [O.S. September 16] 1722 – October 2, 1803) was an American statesman, political philosopher, and one of the Founding Fathers of the United States. He was a politician in colonial Massachusetts, a leader of the movement that became the American Revolution, and one of the architects of the principles of American republicanism that shaped the political culture of the United States. He was a second cousin to his fellow Founding Father, President John Adams." 124 | }, 125 | { 126 | "pageid": 44389678, 127 | "ns": 0, 128 | "title": "Samuel Adams (beer)", 129 | "index": 2, 130 | "description": "American brand of beer", 131 | "descriptionsource": "local", 132 | "categories": [ 133 | { "ns": 14, "title": "Category:All articles with unsourced statements" }, 134 | { "ns": 14, "title": "Category:American beer brands" }, 135 | { "ns": 14, "title": "Category:Articles with hProducts" }, 136 | { "ns": 14, "title": "Category:Articles with hRecipes" }, 137 | { "ns": 14, "title": "Category:Articles with short description" }, 138 | { 139 | "ns": 14, 140 | "title": "Category:Articles with unsourced statements from May 2020" 141 | }, 142 | { "ns": 14, "title": "Category:Short description matches Wikidata" } 143 | ], 144 | "contentmodel": "wikitext", 145 | "pagelanguage": "en", 146 | "pagelanguagehtmlcode": "en", 147 | "pagelanguagedir": "ltr", 148 | "touched": "2021-05-21T21:17:04Z", 149 | "lastrevid": 1017031215, 150 | "length": 12556, 151 | "fullurl": "https://en.wikipedia.org/wiki/Samuel_Adams_(beer)", 152 | "editurl": "https://en.wikipedia.org/w/index.php?title=Samuel_Adams_(beer)&action=edit", 153 | "canonicalurl": "https://en.wikipedia.org/wiki/Samuel_Adams_(beer)", 154 | "extract": "Samuel Adams is the flagship brand of the Boston Beer Company. The brand name (often shortened to Sam Adams) was chosen in honor of Founding Father of the United States Samuel Adams. Adams inherited his father's brewery on King Street (modern day State Street)." 155 | }, 156 | { 157 | "pageid": 224016, 158 | "ns": 0, 159 | "title": "Boston Beer Company", 160 | "index": 3, 161 | "thumbnail": { 162 | "source": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/01/Boston_Beer_Company.jpg/50px-Boston_Beer_Company.jpg", 163 | "width": 50, 164 | "height": 16 165 | }, 166 | "pageimage": "Boston_Beer_Company.jpg", 167 | "description": "American beverage company", 168 | "descriptionsource": "local", 169 | "categories": [ 170 | { "ns": 14, "title": "Category:1984 establishments in Massachusetts" }, 171 | { "ns": 14, "title": "Category:1995 initial public offerings" }, 172 | { 173 | "ns": 14, 174 | "title": "Category:Articles needing POV-check from December 2014" 175 | }, 176 | { "ns": 14, "title": "Category:Articles with short description" }, 177 | { 178 | "ns": 14, 179 | "title": "Category:Beer brewing companies based in Massachusetts" 180 | }, 181 | { "ns": 14, "title": "Category:CS1 maint: ref duplicates default" }, 182 | { 183 | "ns": 14, 184 | "title": "Category:Companies listed on the New York Stock Exchange" 185 | }, 186 | { "ns": 14, "title": "Category:Coordinates not on Wikidata" }, 187 | { 188 | "ns": 14, 189 | "title": "Category:Food and drink companies established in 1984" 190 | }, 191 | { "ns": 14, "title": "Category:Manufacturing companies based in Boston" }, 192 | { 193 | "ns": 14, 194 | "title": "Category:Pages with non-numeric formatnum arguments" 195 | }, 196 | { 197 | "ns": 14, 198 | "title": "Category:Short description is different from Wikidata" 199 | }, 200 | { 201 | "ns": 14, 202 | "title": "Category:Wikipedia articles with LCCN identifiers" 203 | }, 204 | { 205 | "ns": 14, 206 | "title": "Category:Wikipedia articles with VIAF identifiers" 207 | }, 208 | { 209 | "ns": 14, 210 | "title": "Category:Wikipedia articles with WORLDCATID identifiers" 211 | } 212 | ], 213 | "contentmodel": "wikitext", 214 | "pagelanguage": "en", 215 | "pagelanguagehtmlcode": "en", 216 | "pagelanguagedir": "ltr", 217 | "touched": "2021-05-15T16:05:50Z", 218 | "lastrevid": 1018315260, 219 | "length": 33908, 220 | "fullurl": "https://en.wikipedia.org/wiki/Boston_Beer_Company", 221 | "editurl": "https://en.wikipedia.org/w/index.php?title=Boston_Beer_Company&action=edit", 222 | "canonicalurl": "https://en.wikipedia.org/wiki/Boston_Beer_Company", 223 | "extract": "The Boston Beer Company is a brewer founded in 1984. Boston Beer Company's first brand of beer was named Samuel Adams after Founding Father Samuel Adams, an American revolutionary patriot. The company launched Angry Orchard brand hard ciders in 2012." 224 | }, 225 | { 226 | "pageid": 37942548, 227 | "ns": 0, 228 | "title": "Samuel Adams Green", 229 | "index": 4, 230 | "description": "American art curator", 231 | "descriptionsource": "local", 232 | "categories": [ 233 | { "ns": 14, "title": "Category:1940 births" }, 234 | { "ns": 14, "title": "Category:2011 deaths" }, 235 | { "ns": 14, "title": "Category:Adams political family" }, 236 | { "ns": 14, "title": "Category:All articles with unsourced statements" }, 237 | { "ns": 14, "title": "Category:American art curators" }, 238 | { "ns": 14, "title": "Category:American socialites" }, 239 | { "ns": 14, "title": "Category:Articles with short description" }, 240 | { 241 | "ns": 14, 242 | "title": "Category:Articles with unsourced statements from July 2013" 243 | }, 244 | { "ns": 14, "title": "Category:CS1 maint: discouraged parameter" }, 245 | { "ns": 14, "title": "Category:People from Boston" }, 246 | { "ns": 14, "title": "Category:People from New York City" }, 247 | { "ns": 14, "title": "Category:Short description matches Wikidata" }, 248 | { "ns": 14, "title": "Category:Webarchive template wayback links" }, 249 | { 250 | "ns": 14, 251 | "title": "Category:Wikipedia articles needing clarification from December 2020" 252 | }, 253 | { 254 | "ns": 14, 255 | "title": "Category:Wikipedia articles with ISNI identifiers" 256 | }, 257 | { 258 | "ns": 14, 259 | "title": "Category:Wikipedia articles with LCCN identifiers" 260 | }, 261 | { 262 | "ns": 14, 263 | "title": "Category:Wikipedia articles with SNAC-ID identifiers" 264 | }, 265 | { 266 | "ns": 14, 267 | "title": "Category:Wikipedia articles with VIAF identifiers" 268 | }, 269 | { 270 | "ns": 14, 271 | "title": "Category:Wikipedia articles with WORLDCATID identifiers" 272 | } 273 | ], 274 | "contentmodel": "wikitext", 275 | "pagelanguage": "en", 276 | "pagelanguagehtmlcode": "en", 277 | "pagelanguagedir": "ltr", 278 | "touched": "2021-05-21T21:11:09Z", 279 | "lastrevid": 1007316112, 280 | "length": 15375, 281 | "fullurl": "https://en.wikipedia.org/wiki/Samuel_Adams_Green", 282 | "editurl": "https://en.wikipedia.org/w/index.php?title=Samuel_Adams_Green&action=edit", 283 | "canonicalurl": "https://en.wikipedia.org/wiki/Samuel_Adams_Green", 284 | "extract": "Samuel Adams Green (May 20, 1940 – March 4, 2011) was an American art curator and director, most associated with his promotion of American pop art, particularly the early works of his friend Andy Warhol." 285 | }, 286 | { 287 | "pageid": 15616283, 288 | "ns": 0, 289 | "title": "Samuel A. Adams", 290 | "index": 5, 291 | "categories": [ 292 | { "ns": 14, "title": "Category:1934 births" }, 293 | { "ns": 14, "title": "Category:1988 deaths" }, 294 | { "ns": 14, "title": "Category:Adams political family" }, 295 | { "ns": 14, "title": "Category:American whistleblowers" }, 296 | { 297 | "ns": 14, 298 | "title": "Category:Analysts of the Central Intelligence Agency" 299 | }, 300 | { "ns": 14, "title": "Category:Articles with hCards" }, 301 | { "ns": 14, "title": "Category:Harvard College alumni" }, 302 | { "ns": 14, "title": "Category:Harvard Law School alumni" }, 303 | { 304 | "ns": 14, 305 | "title": "Category:St. Mark's School (Massachusetts) alumni" 306 | }, 307 | { 308 | "ns": 14, 309 | "title": "Category:Wikipedia articles with ISNI identifiers" 310 | }, 311 | { 312 | "ns": 14, 313 | "title": "Category:Wikipedia articles with LCCN identifiers" 314 | }, 315 | { 316 | "ns": 14, 317 | "title": "Category:Wikipedia articles with NARA identifiers" 318 | }, 319 | { 320 | "ns": 14, 321 | "title": "Category:Wikipedia articles with SNAC-ID identifiers" 322 | }, 323 | { 324 | "ns": 14, 325 | "title": "Category:Wikipedia articles with VIAF identifiers" 326 | }, 327 | { 328 | "ns": 14, 329 | "title": "Category:Wikipedia articles with WORLDCATID identifiers" 330 | } 331 | ], 332 | "contentmodel": "wikitext", 333 | "pagelanguage": "en", 334 | "pagelanguagehtmlcode": "en", 335 | "pagelanguagedir": "ltr", 336 | "touched": "2021-05-09T23:56:45Z", 337 | "lastrevid": 986660952, 338 | "length": 24611, 339 | "fullurl": "https://en.wikipedia.org/wiki/Samuel_A._Adams", 340 | "editurl": "https://en.wikipedia.org/w/index.php?title=Samuel_A._Adams&action=edit", 341 | "canonicalurl": "https://en.wikipedia.org/wiki/Samuel_A._Adams", 342 | "extract": "Samuel Alexander Adams (June 14, 1934 – October 10, 1988), known as Sam Adams, was an analyst for the Central Intelligence Agency (CIA). He is best known for his role in discovering that during the mid-1960s American military intelligence had underestimated the number of Viet Cong and North Vietnamese Army soldiers. Although his opinion was challenged, he pushed the case for a higher troop count." 343 | } 344 | ] 345 | -------------------------------------------------------------------------------- /lib/wiki/__tests__/__snapshots__/extractAuthorDetails.test.mjs.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`wiki.extractAuthorDetails Returns the expected response 1`] = ` 4 | Object { 5 | "bio": "Avul Pakir Jainulabdeen Abdul Kalam (15 October 1931 – 27 July 2015) was an Indian aerospace scientist who served as the 11th President of India from 2002 to 2007. He was born and raised in Rameswaram, Tamil Nadu and studied physics and aerospace engineering. He spent the next four decades as a scientist and science administrator, mainly at the Defence Research and Development Organisation (DRDO) and Indian Space Research Organisation (ISRO) and was intimately involved in India's civilian space programme and military missile development efforts.", 6 | "categories": Array [ 7 | "1931 births", 8 | "2015 deaths", 9 | "20th-century Indian engineers", 10 | "20th-century Indian politicians", 11 | "21st-century Indian engineers", 12 | "21st-century Indian politicians", 13 | "A. P. J. Abdul Kalam", 14 | "All Wikipedia articles written in Indian English", 15 | "Articles with Encyclopædia Britannica links", 16 | "Articles with hAudio microformats", 17 | "Articles with hCards", 18 | "Articles with short description", 19 | "CS1 maint: archived copy as title", 20 | "CS1 maint: extra text: authors list", 21 | "CS1 maint: others", 22 | "Defence Research and Development Organisation", 23 | "Engineers from Tamil Nadu", 24 | "Fellows of the National Academy of Medical Sciences", 25 | "Good articles", 26 | "Indian Muslims", 27 | "Indian Space Research Organisation people", 28 | "Indian Tamil academics", 29 | "Indian Tamil politicians", 30 | "Indian aerospace engineers", 31 | "Madras Institute of Technology alumni", 32 | "Nuclear energy in India", 33 | "Pages including recorded pronunciations", 34 | "Pages using Sister project links with hidden wikidata", 35 | "People from Ramanathapuram district", 36 | "Presidents of India", 37 | "Recipients of the Bharat Ratna", 38 | "Recipients of the Padma Bhushan in civil service", 39 | "Recipients of the Padma Vibhushan in science & engineering", 40 | "Short description is different from Wikidata", 41 | "St Joseph's College, Tiruchirappalli alumni", 42 | "State funerals in India", 43 | "Tamil engineers", 44 | "Tamil poets", 45 | "University of Madras alumni", 46 | "Use Indian English from June 2016", 47 | "Use dmy dates from March 2021", 48 | "Webarchive template wayback links", 49 | "Wikipedia articles with BNF identifiers", 50 | "Wikipedia articles with CANTIC identifiers", 51 | "Wikipedia articles with GND identifiers", 52 | "Wikipedia articles with ISNI identifiers", 53 | "Wikipedia articles with LCCN identifiers", 54 | "Wikipedia articles with NDL identifiers", 55 | "Wikipedia articles with NKC identifiers", 56 | "Wikipedia articles with NLA identifiers", 57 | "Wikipedia articles with NLI identifiers", 58 | "Wikipedia articles with NLK identifiers", 59 | "Wikipedia articles with NTA identifiers", 60 | "Wikipedia articles with SUDOC identifiers", 61 | "Wikipedia articles with Trove identifiers", 62 | "Wikipedia articles with VIAF identifiers", 63 | "Wikipedia articles with WORLDCATID identifiers", 64 | "Wikipedia indefinitely semi-protected pages", 65 | ], 66 | "description": "Scientist and 11th President of India", 67 | "imagePath": "A._P._J._Abdul_Kalam.jpg", 68 | "images": undefined, 69 | "link": "https://en.wikipedia.org/wiki/A._P._J._Abdul_Kalam", 70 | "name": "A. P. J. Abdul Kalam", 71 | "pageId": 62682, 72 | } 73 | `; 74 | -------------------------------------------------------------------------------- /lib/wiki/__tests__/api.test.js: -------------------------------------------------------------------------------- 1 | import { api } from '../api.js' 2 | 3 | describe('wiki api > search', () => { 4 | it(`Returns an array of search results sorted by score`, async () => { 5 | const query = 'Fred Hampton' 6 | const results = await api.search(query) 7 | results.forEach(result => { 8 | expect(result.title).toEqual(expect.any(String)) 9 | expect(result.pageid).toEqual(expect.any(Number)) 10 | }) 11 | }) 12 | }) 13 | 14 | describe('wiki > api > getPagesByTitle', () => { 15 | it(`Returns matching page each valid title. Titles can be normalized, 16 | non-normalized, or the full page url.`, async () => { 17 | const titles = [ 18 | // Normalized title 19 | 'A._A._Milne', 20 | // Non-normalized title 21 | 'Alexander the Great', 22 | // Canonical URL 23 | 'https://en.wikipedia.org/wiki/A._P._J._Abdul_Kalam', 24 | ] 25 | const results = await api.getPagesByTitle(titles) 26 | expect(results.length).toEqual(titles.length) 27 | }) 28 | }) 29 | -------------------------------------------------------------------------------- /lib/wiki/__tests__/extractAuthorDetails.test.js: -------------------------------------------------------------------------------- 1 | import { extractAuthorDetails } from '../extractAuthorDetails.js' 2 | import examplePage from '../__fixtures__/example-page.json' 3 | 4 | describe('wiki.extractAuthorDetails', () => { 5 | it(`Returns the expected response`, async () => { 6 | const authorDetails = extractAuthorDetails(examplePage) 7 | expect(authorDetails).toMatchSnapshot() 8 | }) 9 | }) 10 | -------------------------------------------------------------------------------- /lib/wiki/__tests__/findAuthorByName.test.js: -------------------------------------------------------------------------------- 1 | import { findAuthorByName } from '../findAuthorByName.js' 2 | 3 | describe('wiki > findAuthorByName', () => { 4 | it(`Takes an author name and returns an array of matching wikipedia pages, sorted by score with the best match first`, async () => { 5 | const authorName = 'Fred Hampton' 6 | const results = await findAuthorByName(authorName) 7 | expect(results).toEqual(expect.any(Array)) 8 | expect(results[0].name).toEqual(authorName) 9 | }) 10 | 11 | it(`Returns an error object if no page could be found for the 12 | provided name`, async () => { 13 | // wiki API will not find any matching pages for this query 14 | const name = 'abcdaddf2edaffff2efasdf2' 15 | const results = await findAuthorByName(name) 16 | expect(results).toEqual([]) 17 | }) 18 | }) 19 | -------------------------------------------------------------------------------- /lib/wiki/__tests__/getImageInfo.test.js: -------------------------------------------------------------------------------- 1 | import { getImageInfo } from '../getImageInfo.js' 2 | 3 | const ImageInfo = { 4 | title: expect.any(String), 5 | url: expect.any(String), 6 | size: expect.any(Number), 7 | width: expect.any(Number), 8 | height: expect.any(Number), 9 | } 10 | 11 | const ImageInfoError = { 12 | error: expect.any(String), 13 | title: expect.any(String), 14 | } 15 | 16 | describe('wiki > getImageInfo', () => { 17 | it(`Returns an Array of n objects`, async () => { 18 | const results = await getImageInfo(['foo', 'bar']) 19 | expect(results.length).toBe(2) 20 | results.forEach(result => { 21 | expect(result).toEqual( 22 | expect.objectContaining({ title: expect.any(String) }) 23 | ) 24 | }) 25 | }) 26 | it(`For each valid image title, returns an object containing image info`, async () => { 27 | const titles = ['A._P._J._Abdul_Kalam.jpg'] 28 | const results = await getImageInfo(titles) 29 | expect(results[0]).toEqual(expect.objectContaining(ImageInfo)) 30 | }) 31 | 32 | it(`For each image title that does not exist, returns an object containing error message`, async () => { 33 | const titles = ['does not exist'] 34 | const results = await getImageInfo(titles) 35 | expect(results[0]).toEqual(expect.objectContaining(ImageInfoError)) 36 | }) 37 | }) 38 | -------------------------------------------------------------------------------- /lib/wiki/api.js: -------------------------------------------------------------------------------- 1 | import { stringify } from 'query-string' 2 | import { get, sortBy } from 'lodash-es' 3 | import fetch from 'node-fetch' 4 | import { log } from '../log.js' 5 | import { cache } from './cache.js' 6 | 7 | const BASE_URL = `https://en.wikipedia.org/w/api.php` 8 | 9 | // The list of properties to include for pages returned by the wiki API 10 | // @see https://www.mediawiki.org/wiki/API:Properties 11 | const pageProperties = [ 12 | 'pageimages', 13 | 'description', 14 | 'categories', 15 | 'info', 16 | 'extracts', 17 | ] 18 | 19 | // These are the default params we use for all API requests 20 | // https://www.mediawiki.org/wiki/API:Query 21 | const defaultParams = { 22 | origin: '*', 23 | format: 'json', 24 | action: 'query', 25 | formatversion: 2, 26 | prop: pageProperties.join('|'), 27 | inprop: 'url', 28 | exintro: true, 29 | exsentences: 3, 30 | explaintext: false, 31 | gsrnamespace: 0, 32 | cllimit: 'max', 33 | } 34 | 35 | /** 36 | * Helper function that makes an API request and returns an array of results, 37 | * sorted by index. 38 | */ 39 | const request = async url => { 40 | try { 41 | // Return the cached response when present 42 | const cachedResponse = await cache.get(url) 43 | if (cachedResponse) return cachedResponse 44 | // Otherwise, make a request to the wiki API 45 | const response = await fetch(url) 46 | const json = await response.json() 47 | const results = sortBy(get(json, 'query.pages', []), ({ index }) => index) 48 | return cache.set(url, results) 49 | } catch (error) { 50 | log.error(error) 51 | return [] 52 | } 53 | } 54 | 55 | /** 56 | * Wiki API Methods 57 | */ 58 | export const api = { 59 | /** 60 | * Search for wikipedia pages matching the given query string. We use this to 61 | * find an author's wikipedia page from the person's name. 62 | * 63 | * @param {string} query the search string 64 | * @param {Object} [customParams = {}] custom query parameters 65 | */ 66 | async search(query = '', customParams = {}) { 67 | // Search parameters 68 | const searchParams = { 69 | generator: 'search', 70 | gsrlimit: 5, 71 | gsrsearch: query, 72 | } 73 | const params = { ...defaultParams, ...searchParams, ...customParams } 74 | return request(`${BASE_URL}?${stringify(params)}`) 75 | }, 76 | 77 | /** 78 | * Get one or more wikipedia pages by title. This can be used to batch 79 | * request a large number of pages with a single API call. 80 | * 81 | * We use this to get additional info about existing authors. For example, 82 | * when adding a new field like bio or description, we used this to batch 83 | * request data for all the existing authors, using the author's `link` 84 | * property as the `title` param. 85 | * 86 | * Titles can be any of the following formats: 87 | * 1. The normalized page title (example: 'A._A._Milne') 88 | * 2. The non-normalized page title (example: 'A. A. Milne') 89 | * 3. The full page url (example: 'https://en.wikipedia.org/wiki/A._A._Milne') 90 | * 91 | * @param {string[]} title An array of titles 92 | * @param {any} [customParams = {}] params for the wiki API 93 | */ 94 | async getPagesByTitle(titles, customParams) { 95 | // if title is a full URL, extract the title from URL 96 | const parseTitle = str => String(str).replace(/[^/]+\/+/g, '') 97 | // The formatted `titles` param for the API 98 | const titlesParam = titles.map(parseTitle).join('|') 99 | // If title is a full URL, extract the title from URL 100 | const params = { ...defaultParams, titles: titlesParam, ...customParams } 101 | return request(`${BASE_URL}?${stringify(params)}`) 102 | }, 103 | } 104 | -------------------------------------------------------------------------------- /lib/wiki/cache.js: -------------------------------------------------------------------------------- 1 | import * as path from 'path' 2 | import { Level } from 'level' 3 | import { cacheDir } from '../../config.js' 4 | 5 | // Create a data store 6 | const db = new Level(path.join(cacheDir, 'wiki'), {}) 7 | 8 | export const cache = { 9 | /** 10 | * Checks if the data store contains a given key. 11 | * @param {string} key 12 | * @returns {boolean} 13 | */ 14 | async has(key) { 15 | try { 16 | await db.get(key) 17 | return true 18 | } catch (error) { 19 | return false 20 | } 21 | }, 22 | 23 | /** 24 | * If the given key exists, returns the corresponding data. Otherwise returns 25 | * `null`. 26 | * @param {string} key 27 | * @returns {any} 28 | */ 29 | async get(key) { 30 | try { 31 | const value = await db.get(key) 32 | return JSON.parse(value) 33 | } catch (error) { 34 | return null 35 | } 36 | }, 37 | 38 | /** 39 | * Adds a key value entry to the cache. The data can be any valid JSON data. 40 | * 41 | * @param {string} key 42 | * @param {any} data 43 | * @returns the `data` that was stored in the cache 44 | */ 45 | async set(key, data) { 46 | await db.put(key, JSON.stringify(data)) 47 | return data 48 | }, 49 | 50 | /** 51 | * Delete an entry from the cache. If the key does not exist, this has not 52 | * effect. 53 | * 54 | * @param {string} key 55 | * @return void 56 | */ 57 | async delete(key) { 58 | try { 59 | await db.del(key) 60 | /* eslint-disable-next-line */ 61 | } catch (error) {} 62 | }, 63 | 64 | /** 65 | * Clears the cache 66 | * @returns void 67 | */ 68 | async clear() { 69 | db.clear() 70 | }, 71 | } 72 | -------------------------------------------------------------------------------- /lib/wiki/extractAuthorDetails.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Gets author details from a wikipedia page returned by the API 3 | */ 4 | export function extractAuthorDetails(page) { 5 | if (!page) return null 6 | const { 7 | extract, 8 | title, 9 | pageid, 10 | canonicalurl, 11 | pageimage, 12 | images, 13 | description: originalDescription = '', 14 | categories = [], 15 | } = page 16 | 17 | // The `extract` is a brief summary of the page, usually taken from the first 18 | // paragraph. We use this the for author's `bio` field. 19 | // NOTE: The length and content of the `extract` can be customized via API 20 | // parameters when requesting a page (see lib/wiki/api). Currently it is set 21 | // to two sentences. 22 | const bio = extract 23 | // clean up the extract... 24 | // In most cases, the extract starts with the person's full legal name, 25 | // followed by "(date of birth [- date of death])". Sometimes there is 26 | // additional text inside the parenthesis, such as alternate names, 27 | // pronunciations and spellings in different languages, etc, each separated 28 | // by a `;`. We try to remove everything from inside the parenthesis 29 | // except the birth and death dates. Example: 30 | // Before: "( ; (listen); 15 October 1931 – 27 July 2015)" 31 | // After: "(15 October 1931 – 27 July 2015)" 32 | .replace(/\(\s*listen\s*\)/g, ' ') 33 | .replace(/\(([^;)]+;\s*)+/, '(') 34 | .replace(/\[.*\]/g, ' ') 35 | .replace(/\([;\s]+/, '(') 36 | // Remove newlines and double spaces from the extract 37 | .replace(/(\s+)|(\n)/g, ' ') 38 | 39 | // The `description` is a short, one line description of the person. 40 | // It's usually their primary occupation or what they are known for. 41 | // For example: "Theoretical Physicist" or "American Author" 42 | // In some cases, the description also includes birth/death dates inside 43 | // parenthesis. We remove those if present. 44 | const description = originalDescription.replace(/\(.+\)/, '').trim() || null 45 | 46 | // Get a list of category names that the person belongs to. In the future, 47 | // we could use this to add tags/categories to authors. 48 | const categoryNames = categories.map(category => { 49 | return category.title.replace(/category:/i, '').trim() 50 | }) 51 | 52 | // This only includes fields we need for Authors. 53 | return { 54 | name: title, 55 | pageId: pageid, 56 | link: canonicalurl, 57 | bio, 58 | description, 59 | imagePath: pageimage, 60 | categories: categoryNames, 61 | images, 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /lib/wiki/findAuthorByName.js: -------------------------------------------------------------------------------- 1 | import { isEqual } from 'lodash-es' 2 | import { extractAuthorDetails } from './extractAuthorDetails.js' 3 | import { api } from './api.js' 4 | 5 | /** 6 | * Check to see if a wikipedia page returned by api is for a person. 7 | * There is no official way to do this. We check for a "Births YYYY" 8 | * category. 9 | */ 10 | const isPerson = page => { 11 | const { categories = [] } = page 12 | return categories.some(({ title }) => /birth/.test(title)) 13 | } 14 | 15 | const normalizeName = str => { 16 | return str.replace(/\s*\(.+\)\s*/g, '').toLowerCase() 17 | } 18 | 19 | /** 20 | * Uses the wiki API to search for pages matching the given author name. 21 | * It returns an array of matching pages. Returns an empty array if no 22 | * matching pages are found. 23 | */ 24 | export async function findAuthorByName(name) { 25 | const rawResults = await api.search(name) 26 | // Search for pages matching the given author `name` 27 | const results = rawResults 28 | // 1. Filter the search results to only include pages for people 29 | .filter(isPerson) 30 | // 2. Extract author details from search results. This maps the results to 31 | // an array of `Author` objects, which can be used to create a new entry in 32 | // the authors collection. 33 | .map(extractAuthorDetails) 34 | 35 | // Only includes results that are an exact match for the author name 36 | const exactMatches = results.filter(result => { 37 | return isEqual(...[result.name, name].map(normalizeName)) 38 | }) 39 | // If any of the results were an exact match for the given name, we only 40 | // return those 41 | return exactMatches.length ? exactMatches : results 42 | } 43 | 44 | // ;(async () => { 45 | // const results = await findAuthorByName('Pete Seger') 46 | // console.log(results.map(r => r.name)) 47 | // })() 48 | -------------------------------------------------------------------------------- /lib/wiki/getImageInfo.js: -------------------------------------------------------------------------------- 1 | import * as path from 'path' 2 | import { toString, isEmpty } from 'lodash-es' 3 | import { log } from '../log.js' 4 | import { api } from './api.js' 5 | 6 | const getFileName = title => { 7 | const str = toString(title) 8 | return str.startsWith('File:') ? str : `File:${str}` 9 | } 10 | 11 | /** 12 | * Gets the full URL for one or more wikipedia images. 13 | * 14 | * This can be used to get the full URL for various types of images in API 15 | * results. 16 | * 17 | * @param {string[]} titles An array of image titles 18 | * @param {any} customParams custom params for the wiki API 19 | * @return {Promise>} Array of objects containing image info 26 | * @async 27 | * @see https://en.wikipedia.org/w/api.php?action=help&modules=query%2Bimageinfo 28 | */ 29 | export async function getImageInfo(titles, customParams = {}) { 30 | try { 31 | const params = { 32 | prop: 'imageinfo', 33 | iiprop: 'url|canonicaltitle|size|mediatype', 34 | ...customParams, 35 | } 36 | const results = await api.getPagesByTitle(titles.map(getFileName), params) 37 | 38 | return results.map(({ imageinfo: imageInfo, title }) => { 39 | if (isEmpty(imageInfo)) { 40 | // If the image doesn't exist, return an object with error message 41 | return { title, error: `${title} does not exist` } 42 | } 43 | 44 | const format = (path.extname(title) || '').replace('.', '') 45 | return { title: title.replace(/file:/i, ''), format, ...imageInfo[0] } 46 | }) 47 | } catch (error) { 48 | log.error(error) 49 | return [] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /lib/wiki/index.js: -------------------------------------------------------------------------------- 1 | import { findAuthorByName } from './findAuthorByName.js' 2 | import { extractAuthorDetails } from './extractAuthorDetails.js' 3 | import { getImageInfo } from './getImageInfo.js' 4 | import { rateLimit } from './rateLimit.js' 5 | import { api } from './api.js' 6 | 7 | export const wiki = { 8 | api, 9 | findAuthorByName, 10 | extractAuthorDetails, 11 | getImageInfo, 12 | rateLimit, 13 | } 14 | -------------------------------------------------------------------------------- /lib/wiki/rateLimit.js: -------------------------------------------------------------------------------- 1 | import { delay } from '../delay.js' 2 | /** 3 | * Returns the delay (in ms) that is required between requests to avoid 4 | * exceeding the wiki API rate limit. 5 | * 6 | * @param {number} index the index of the current request 7 | * @param {number} total the total number of requests 8 | */ 9 | export const rateLimit = (index, total) => { 10 | let ms = 0 11 | if (index) { 12 | ms = total < 5 ? 250 : Math.min(Math.floor(total / 10) + 1, 5) * 500 13 | } 14 | return delay(ms) 15 | } 16 | -------------------------------------------------------------------------------- /lib/writeJSONFiles.js: -------------------------------------------------------------------------------- 1 | import * as fs from 'fs' 2 | import prettier from 'prettier' 3 | import * as path from 'path' 4 | 5 | /** 6 | * Writes JSON data to files on disk. The JSON is formatted using Prettier to 7 | * match to code formatting style of the project. 8 | * 9 | * @signature writeJSONFiles(files) 10 | * 11 | * @param {{[path: string]: any}} files An object map of files to create. Each 12 | * key should be a file path and the value should be the JSON data that 13 | * will be written to that file. File paths are resolved relative to the 14 | * project root. 15 | * @return {void} 16 | * 17 | * @signature writeJSONFiles(dir, files) 18 | * 19 | * When called with two arguments, the first argument `dir` is the base 20 | * directory in which files will be created. The second argument is the files 21 | * object. In this case, file paths will be resolved relative to `dir` 22 | * 23 | * @param {string} dir The root directory in which files will be created. Each 24 | * file path in the `files` object will be created relative to this 25 | * directory. 26 | * @param {{[path: string]: any}} files A map of files to create. Each key 27 | * should be a file path and the value the data that will be written to 28 | * that file. The file paths are resolved relative to `dir` 29 | * @return {void} 30 | * 31 | * @signature writeJSONFiles(files) 32 | * 33 | * 34 | * @example 35 | * const DEST = 'some/path' 36 | * const data = { quotes: [], authors: [] } 37 | * writeJSONFiles(DEST, data) 38 | */ 39 | export async function writeJSONFiles(...args) { 40 | const [files, DIR = ''] = args.length === 1 ? args : args.reverse() 41 | // Get the Prettier settings for the project 42 | const prettierBaseConfig = await prettier.resolveConfig(process.cwd()) 43 | // Set the parser to 'json' 44 | const prettierConfig = { ...prettierBaseConfig, parser: 'json' } 45 | 46 | Object.entries(files).forEach(([NAME, data]) => { 47 | // If the file name doesn't have an extension, use '.json' 48 | const PATH = path.resolve(DIR, path.extname(NAME) ? NAME : `${NAME}.json`) 49 | // Create the directory recursively if it doesn't exist 50 | if (!fs.existsSync(path.dirname(PATH))) { 51 | fs.mkdirSync(path.dirname(PATH), { recursive: true }) 52 | } 53 | const formattedJSON = prettier.format(JSON.stringify(data), prettierConfig) 54 | fs.writeFileSync(PATH, formattedJSON) 55 | }) 56 | } 57 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "quotable-data", 3 | "version": "0.3.7", 4 | "description": "The source data for the quotable API", 5 | "type": "module", 6 | "scripts": { 7 | "test": "jest", 8 | "test:watch": "jest --watchAll", 9 | "build": "node ./cli/build", 10 | "lint": "./node_modules/.bin/eslint . --color && echo \"eslint: no lint errors\"" 11 | }, 12 | "bin": { 13 | "addQuotes": "cli/addQuotes/index.js", 14 | "addAuthors": "cli/addAuthors/index.js", 15 | "addTags": "cli/addTags/index.js", 16 | "validate": "cli/validate/index.js" 17 | }, 18 | "main": "data/generated/index.js", 19 | "files": [ 20 | "data/generated/*.js", 21 | "data/generated/*.json" 22 | ], 23 | "repository": { 24 | "type": "git", 25 | "url": "git+https://github.com/quotable-io/data.git" 26 | }, 27 | "author": "Luke Peavey (http://lukepeavey.com)", 28 | "license": "MIT", 29 | "bugs": { 30 | "url": "https://github.com/quotable-io/data/issues" 31 | }, 32 | "homepage": "https://github.com/quotable-io/data/", 33 | "devDependencies": { 34 | "@babel/eslint-parser": "^7.18.2", 35 | "@babel/plugin-syntax-import-assertions": "^7.18.6", 36 | "@types/jest": "^27.0.2", 37 | "eslint": "^8.18.0", 38 | "eslint-config-airbnb-base": "^15.0.0", 39 | "eslint-config-prettier": "^8.5.0", 40 | "eslint-plugin-import": "^2.26.0", 41 | "jest": "^28.1.1", 42 | "jest-json-schema": "^5.0.0", 43 | "prettier": "^2.7.1" 44 | }, 45 | "dependencies": { 46 | "@faker-js/faker": "^7.3.0", 47 | "@inquirer/select": "^0.0.21-alpha.0", 48 | "@lukepeavey/slugify": "^0.1.3", 49 | "@lukepeavey/stopwords": "^0.1.0", 50 | "@stoplight/json": "^3.18.1", 51 | "ajv": "^8.11.0", 52 | "ajv-formats": "^2.1.1", 53 | "chalk": "^5.0.1", 54 | "cheerio": "*", 55 | "cli-progress": "^3.11.1", 56 | "cli-table3": "^0.6.2", 57 | "dotenv": "^16.0.1", 58 | "figures": "^4.0.1", 59 | "html-entities": "^2.3.3", 60 | "inquirer": "^9.0.0", 61 | "is-interactive": "^2.0.0", 62 | "json-source-map": "^0.6.1", 63 | "level": "^8.0.0", 64 | "lodash-es": "^4.17.21", 65 | "minimist": "^1.2.6", 66 | "moment": "^2.29.3", 67 | "mongodb": "^4.7.0", 68 | "mongoose": "^6.4.0", 69 | "node-fetch": "^2.6.1", 70 | "node-html-parser": "^5.3.3", 71 | "ora": "^6.1.0", 72 | "pluralize": "^8.0.0", 73 | "prettier": "^2.2.1", 74 | "query-string": "^7.1.1", 75 | "sharp": "^0.30.7", 76 | "shelljs": "^0.8.4", 77 | "shortid": "^2.2.16", 78 | "word-wrap": "^1.2.3" 79 | }, 80 | "jest": { 81 | "watchman": false 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /schema/models/Author.js: -------------------------------------------------------------------------------- 1 | import { types } from '../types.js' 2 | 3 | // Author properties 4 | export const properties = { 5 | _id: types.id, 6 | name: types.nonEmptyString, 7 | aka: { type: 'array', items: types.nonEmptyString, default: [] }, 8 | bio: types.nonEmptyString, 9 | description: { type: 'string', minLength: 1, maxLength: 50 }, 10 | link: { type: 'string', format: 'uri', minLength: 1 }, 11 | } 12 | 13 | // Computed properties 14 | export const computedProperties = { 15 | slug: types.slug, 16 | quoteCount: { type: 'integer' }, 17 | dateAdded: types.nonEmptyString, 18 | dateModified: types.nonEmptyString, 19 | } 20 | 21 | /** 22 | * Schema for a single `Author` (source) 23 | * This is the schema for authors in the source data (`data/source`). Source 24 | * data does not include computed properties. 25 | */ 26 | export const AuthorSource = { 27 | title: 'Author', 28 | type: 'object', 29 | properties, 30 | required: Object.keys(properties), 31 | additionalProperties: false, 32 | } 33 | 34 | /** 35 | * Schema for a single `Author` 36 | * This is the schema for a single Author in the generated data files files 37 | * that are synced with the mongodb database. It includes additional computed 38 | * properties that not included in the source data. 39 | */ 40 | export const Author = { 41 | title: 'Author', 42 | type: 'object', 43 | properties: { ...properties, ...computedProperties }, 44 | required: [...Object.keys(properties), ...Object.keys(computedProperties)], 45 | additionalProperties: false, 46 | } 47 | 48 | export const authors = { type: 'array', items: Author } 49 | -------------------------------------------------------------------------------- /schema/models/Quote.js: -------------------------------------------------------------------------------- 1 | import { types } from '../types.js' 2 | 3 | export const properties = { 4 | _id: types.id, 5 | content: types.nonEmptyString, 6 | author: types.nonEmptyString, 7 | tags: { type: 'array', items: types.title }, 8 | } 9 | 10 | export const computedProperties = { 11 | authorSlug: types.slug, 12 | authorId: types.id, 13 | length: { type: 'integer' }, 14 | dateAdded: types.nonEmptyString, 15 | dateModified: types.nonEmptyString, 16 | } 17 | 18 | /** 19 | * Schema for a single `Quote` (source) 20 | * This is the schema for quotes in the source data (`data/source`). Source 21 | * data does not include computed properties. 22 | */ 23 | export const QuoteSource = { 24 | title: 'Quote', 25 | type: 'object', 26 | properties, 27 | required: Object.keys(properties), 28 | additionalProperties: false, 29 | } 30 | 31 | /** 32 | * Schema for a single `Quote` 33 | * This is the schema for a single Quote in the generated data files files 34 | * that are synced with the mongodb database. It includes additional computed 35 | * properties that not included in the source data. 36 | */ 37 | export const Quote = { 38 | title: 'Quote', 39 | type: 'object', 40 | properties: { ...properties, ...computedProperties }, 41 | required: [...Object.keys(properties), ...Object.keys(computedProperties)], 42 | additionalProperties: false, 43 | } 44 | 45 | export const quotes = { 46 | type: 'array', 47 | items: Quote, 48 | } 49 | 50 | // Named Exports 51 | // export { QuoteSource } 52 | // export { Quote } 53 | // export { quotes } 54 | // export { properties } 55 | // export { computedProperties } 56 | -------------------------------------------------------------------------------- /schema/models/Tag.js: -------------------------------------------------------------------------------- 1 | import { types } from '../types.js' 2 | 3 | export const properties = { 4 | _id: types.id, 5 | name: types.title, 6 | } 7 | 8 | export const computedProperties = { 9 | slug: types.slug, 10 | quoteCount: { type: 'integer' }, 11 | dateAdded: types.nonEmptyString, 12 | dateModified: types.nonEmptyString, 13 | } 14 | 15 | /** 16 | * Schema for a single `Tag` (source) 17 | * This is the schema for tags in the source data (`data/source`). Source 18 | * data does not include computed properties. 19 | */ 20 | export const TagSource = { 21 | title: 'Tag', 22 | type: 'object', 23 | properties, 24 | required: Object.keys(properties), 25 | additionalProperties: false, 26 | } 27 | 28 | /** 29 | * Schema for a single `Tag` 30 | * This is the schema for a single Tag in the generated data files files 31 | * that are synced with the mongodb database. It includes additional computed 32 | * properties that not included in the source data. 33 | */ 34 | export const Tag = { 35 | title: 'Tag', 36 | type: 'object', 37 | properties: { ...properties, ...computedProperties }, 38 | required: [...Object.keys(properties), ...Object.keys(computedProperties)], 39 | additionalProperties: false, 40 | } 41 | 42 | export const tags = { 43 | type: 'array', 44 | items: Tag, 45 | } 46 | 47 | // Named Exports 48 | // export { TagSource } 49 | // export { Tag } 50 | // export { tags } 51 | // export { properties } 52 | // export { computedProperties } 53 | -------------------------------------------------------------------------------- /schema/types.js: -------------------------------------------------------------------------------- 1 | export const types = { 2 | id: { 3 | type: 'string', 4 | pattern: '^[\\w\\-]+$', 5 | minLength: 5, 6 | maxLength: 20, 7 | }, 8 | slug: { 9 | type: 'string', 10 | pattern: '^([a-z0-9]+(-[a-z0-9]+)*)$', 11 | minLength: 1, 12 | }, 13 | title: { 14 | type: 'string', 15 | pattern: '^[A-Z][a-z0-9]+(\\s[A-Z][a-z0-9]+)*$', 16 | minLength: 1, 17 | }, 18 | nonEmptyString: { 19 | type: 'string', 20 | minLength: 1, 21 | }, 22 | } 23 | --------------------------------------------------------------------------------