├── .eslintignore ├── logo.jpg ├── .gitignore ├── jest.config.js ├── src ├── index.ts ├── utils │ ├── countCharacters.ts │ ├── countParagraphs.ts │ ├── countWords.ts │ ├── countSentences.ts │ ├── common │ │ ├── formatReadingTimeText.ts │ │ ├── containsCJK.ts │ │ └── splitWords.ts │ ├── findMostAndLeastFrequent.ts │ ├── determineReadingSpeed.ts │ ├── calculateWordFrequencies.ts │ ├── countSearchFrequency.ts │ ├── estimateReadingTime.ts │ ├── calculateCharFrequencies.ts │ └── calculateStats.ts ├── constants │ └── index.ts ├── useTextAnalyzer.ts └── interfaces │ └── index.ts ├── .prettierrc ├── tsconfig.json ├── .husky └── pre-commit ├── .github └── dependabot.yml ├── .eslintrc ├── tests ├── utils │ ├── containsCJK.test.ts │ ├── countCharacters.test.ts │ ├── countParagraphs.test.ts │ ├── determineReadingSpeed.test.ts │ ├── findMostAndLeastFrequent.test.ts │ ├── countWords.test.ts │ ├── estimateReadingTime.test.ts │ ├── countSentences.test.ts │ ├── countSearchFrequency.test.ts │ ├── calculateCharFrequencies.test.ts │ └── calculateWordFrequencies.test.ts └── hooks │ └── useTextAnalyzer.test.ts ├── LICENSE ├── package.json ├── README.md └── CHANGELOG.md /.eslintignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | dist 3 | -------------------------------------------------------------------------------- /logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Invulner/use-text-analyzer/HEAD/logo.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | .git 4 | .DS_store 5 | node_modules 6 | dist 7 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: 'ts-jest', 3 | testEnvironment: 'jsdom', 4 | }; 5 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import useTextAnalyzer from './useTextAnalyzer'; 2 | 3 | export { useTextAnalyzer }; 4 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 120, 3 | "tabWidth": 2, 4 | "singleQuote": true, 5 | "arrowParens": "always" 6 | } 7 | -------------------------------------------------------------------------------- /src/utils/countCharacters.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Counts the number of characters in the text. 3 | * @param {string} text - The text to analyze. 4 | * @returns {number} The number of characters in the text. 5 | */ 6 | export function countCharacters(text: string): number { 7 | return text.length; 8 | } 9 | -------------------------------------------------------------------------------- /src/utils/countParagraphs.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Counts the number of paragraphs in the text. 3 | * @param {string} text - The text to analyze. 4 | * @returns {number} The number of paragraphs in the text. 5 | */ 6 | export function countParagraphs(text: string): number { 7 | return text.trim().split(/\n+/).filter(Boolean).length; 8 | } 9 | -------------------------------------------------------------------------------- /src/utils/countWords.ts: -------------------------------------------------------------------------------- 1 | import { splitWords } from './common/splitWords'; 2 | 3 | /** 4 | * Counts the number of words in the text. 5 | * @param {string} text - The text to analyze. 6 | * @returns {number} The number of words in the text. 7 | */ 8 | export function countWords(text: string): number { 9 | return splitWords(text).length; 10 | } 11 | -------------------------------------------------------------------------------- /src/utils/countSentences.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Counts the number of sentences in the text. 3 | * @param {string} text - The text to analyze. 4 | * @returns {number} The number of sentences in the text. 5 | */ 6 | export function countSentences(text: string): number { 7 | return text 8 | .trim() 9 | .split(/[.!?。!?]+/) 10 | .filter(Boolean).length; 11 | } 12 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowJs": true, 4 | "esModuleInterop": true, 5 | "target": "ESNext", 6 | "module": "ESNext", 7 | "moduleResolution": "node", 8 | "lib": ["dom", "es5", "es2015"], 9 | "jsx": "react", 10 | "declaration": true 11 | }, 12 | "include": ["src/**/*"], 13 | "exclude": ["node_modules"] 14 | } 15 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | yarn run prettier:check || 4 | ( 5 | echo "❌ Prettier check failed. You can use 'yarn prettier:fix'. ❌" 6 | false; 7 | ) 8 | 9 | yarn run lint:check || 10 | ( 11 | echo "❌ ESLint check failed. You can use 'yarn lint:fix'. ❌"; 12 | false; 13 | ) 14 | 15 | yarn run test || 16 | ( 17 | echo '❌ Testing failed. ❌'; 18 | false; 19 | ) 20 | -------------------------------------------------------------------------------- /src/constants/index.ts: -------------------------------------------------------------------------------- 1 | // The default reading speed is set to 250 words per minute. 2 | // This rate is an average based on extensive research into reading habits. 3 | // https://www.sciencedirect.com/science/article/abs/pii/S0749596X19300786 4 | // Users can adjust this setting to match their reading speed or the specific 5 | // requirements of different text types. 6 | export const WORDS_PER_MINUTE = 250; 7 | 8 | export const CJK_WORDS_PER_MINUTE = 500; 9 | -------------------------------------------------------------------------------- /src/utils/common/formatReadingTimeText.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Formats the reading time into a human-readable string. 3 | * @param {number} minutes - The total number of minutes. 4 | * @returns {string} The formatted reading time as a human-readable string. 5 | */ 6 | export function formatReadingTimeText(minutes: number): string { 7 | if (minutes < 1) { 8 | return 'less than a minute read'; 9 | } 10 | 11 | return `${minutes} min read`; 12 | } 13 | -------------------------------------------------------------------------------- /src/utils/common/containsCJK.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Determines if the provided text contains any CJK (Chinese, Japanese, Korean) characters. 3 | * @param {string} text - The text to check for CJK characters. 4 | * @returns {boolean} True if CJK characters are found, otherwise false. 5 | */ 6 | 7 | export const containsCJK = (text: string): boolean => { 8 | return /[\u4E00-\u9FFF\u3400-\u4DBF\u3000-\u303F\uAC00-\uD7AF\u3040-\u309F\u30A0-\u30FF]/.test(text); 9 | }; 10 | -------------------------------------------------------------------------------- /src/utils/common/splitWords.ts: -------------------------------------------------------------------------------- 1 | import { containsCJK } from './containsCJK'; 2 | 3 | /** 4 | * Splits the text into an array of words. 5 | * @param {string} text - The text to split into words. 6 | * @returns {string[]} An array containing the words extracted from the text. 7 | */ 8 | export function splitWords(text: string) { 9 | if (containsCJK(text)) { 10 | return Array.from(text.replace(/\s+/g, '')); 11 | } 12 | 13 | return text.match(/[\w']+(?:'\w+)*/g) || []; 14 | } 15 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: 'npm' 9 | directory: '/' 10 | schedule: 11 | interval: 'weekly' 12 | versioning-strategy: increase 13 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "extends": [ 4 | "plugin:@typescript-eslint/recommended", 5 | "prettier", 6 | "plugin:prettier/recommended" 7 | ], 8 | "parserOptions": { 9 | "ecmaVersion": 2020, 10 | "sourceType": "module" 11 | }, 12 | "plugins": ["@typescript-eslint", "react-hooks", "import"], 13 | "rules": { 14 | "react-hooks/rules-of-hooks": "error", 15 | "react-hooks/exhaustive-deps": "error", 16 | "semi": ["error", "always"], 17 | "import/order": [ 18 | "error", 19 | { 20 | "groups": ["builtin", "external", "internal", "sibling", "parent"], 21 | "newlines-between": "always" 22 | } 23 | ] 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/findMostAndLeastFrequent.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Finds the most and least frequent items in the given map. 3 | * @param {Map} map - The map to analyze. 4 | * @returns {[string, string]} An array containing the most and least frequent items. 5 | */ 6 | export function findMostAndLeastFrequent(map: Map): [string, string] { 7 | let mostFrequent = ''; 8 | let leastFrequent = ''; 9 | let maxCount = 0; 10 | let minCount = Number.MAX_SAFE_INTEGER; 11 | 12 | map.forEach((count, item) => { 13 | if (count > maxCount) { 14 | mostFrequent = item; 15 | maxCount = count; 16 | } 17 | 18 | if (count < minCount) { 19 | leastFrequent = item; 20 | minCount = count; 21 | } 22 | }); 23 | 24 | return [mostFrequent, leastFrequent]; 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/determineReadingSpeed.ts: -------------------------------------------------------------------------------- 1 | import { containsCJK } from './common/containsCJK'; 2 | 3 | import { WORDS_PER_MINUTE, CJK_WORDS_PER_MINUTE } from '../constants'; 4 | 5 | /** 6 | * Determines the appropriate reading speed based on the content of the text or a provided custom value. 7 | * @param {string} text - The text to analyze. 8 | * @param {number} wordsPerMinute - The number of words a person can read per minute (optional). 9 | * @returns {number} The effective words per minute based on the input or text analysis. 10 | */ 11 | export const determineReadingSpeed = (text: string, wordsPerMinute?: number): number => { 12 | const wpmNumber = Number(wordsPerMinute); 13 | 14 | if (!isNaN(wpmNumber) && wpmNumber > 0) { 15 | return wpmNumber; 16 | } 17 | 18 | return containsCJK(text) ? CJK_WORDS_PER_MINUTE : WORDS_PER_MINUTE; 19 | }; 20 | -------------------------------------------------------------------------------- /src/utils/calculateWordFrequencies.ts: -------------------------------------------------------------------------------- 1 | import { splitWords } from './common/splitWords'; 2 | 3 | /** 4 | * Calculates word frequencies in the given text. 5 | * @param {string} text - The text to analyze. 6 | * @param {boolean} ignoreCase - Whether to ignore case when counting word frequencies. 7 | * @returns {Map} A map containing word frequencies. 8 | */ 9 | export function calculateWordFrequencies(text: string, ignoreCase: boolean): Map { 10 | const wordsMap = new Map(); 11 | 12 | if (!text) { 13 | return wordsMap; 14 | } 15 | 16 | const words = splitWords(text); 17 | 18 | words.forEach((word) => { 19 | const normalizedWord = ignoreCase ? word.toLowerCase() : word; 20 | const count = wordsMap.get(normalizedWord) || 0; 21 | wordsMap.set(normalizedWord, count + 1); 22 | }); 23 | 24 | return wordsMap; 25 | } 26 | -------------------------------------------------------------------------------- /tests/utils/containsCJK.test.ts: -------------------------------------------------------------------------------- 1 | import { containsCJK } from '../../src/utils/common/containsCJK'; 2 | 3 | describe('containsCJK', () => { 4 | it('returns true for strings containing Chinese characters', () => { 5 | expect(containsCJK('这是中文')).toBe(true); 6 | }); 7 | 8 | it('returns true for strings containing Japanese characters', () => { 9 | expect(containsCJK('これは日本語です')).toBe(true); 10 | expect(containsCJK('カタカナ')).toBe(true); 11 | expect(containsCJK('ひらがな')).toBe(true); 12 | }); 13 | 14 | it('returns true for strings containing Korean characters', () => { 15 | expect(containsCJK('이것은 한국어입니다')).toBe(true); 16 | }); 17 | 18 | it('returns false for strings containing only Latin alphabet characters', () => { 19 | expect(containsCJK('This is English text')).toBe(false); 20 | }); 21 | 22 | it('returns false for empty strings', () => { 23 | expect(containsCJK('')).toBe(false); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /src/utils/countSearchFrequency.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Counts the frequency of the search term in the text. 3 | * @param {string} text - The text to analyze. 4 | * @param {string} searchTerm - The term to search for. 5 | * @param {boolean} ignoreCase - Whether to ignore case when searching for the term. 6 | * @returns {number} The frequency of the search term in the text. 7 | */ 8 | export function countSearchFrequency(text: string, searchTerm: string, ignoreCase: boolean): number { 9 | if (!searchTerm) { 10 | return 0; 11 | } 12 | 13 | const regex = new RegExp(escapeRegExp(searchTerm), ignoreCase ? 'gi' : 'g'); 14 | const matches = text.match(regex); 15 | return matches ? matches.length : 0; 16 | } 17 | 18 | /** 19 | * Escapes special characters in a regular expression. 20 | * @param {string} string - The input string. 21 | * @returns {string} The string with special characters escaped. 22 | */ 23 | function escapeRegExp(string: string): string { 24 | return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/estimateReadingTime.ts: -------------------------------------------------------------------------------- 1 | import { formatReadingTimeText } from './common/formatReadingTimeText'; 2 | 3 | /** 4 | * Estimates the reading time of the text in seconds. 5 | * @param wordCount - The number of words in the text. 6 | * @param wordsPerMinute - The number of words a person can read per minute (optional, default from constants). 7 | * @returns {number} The estimated reading time of the text in seconds. 8 | */ 9 | export function estimateReadingTime( 10 | wordCount: number, 11 | wordsPerMinute: number, 12 | ): { 13 | minutes: number; 14 | seconds: number; 15 | total: number; 16 | text: string; 17 | } { 18 | const totalMinutes = wordCount / wordsPerMinute; 19 | const roundedMinutes = Math.floor(totalMinutes); 20 | const seconds = Math.round((totalMinutes - roundedMinutes) * 60); 21 | const totalSeconds = Math.round(totalMinutes * 60); 22 | const readableText = formatReadingTimeText(roundedMinutes); 23 | 24 | return { 25 | minutes: roundedMinutes, 26 | seconds, 27 | total: totalSeconds, 28 | text: readableText, 29 | }; 30 | } 31 | -------------------------------------------------------------------------------- /tests/utils/countCharacters.test.ts: -------------------------------------------------------------------------------- 1 | import { countCharacters } from '../../src/utils/countCharacters'; 2 | 3 | describe('countCharacters', () => { 4 | it('should return 0 when the input text is empty', () => { 5 | expect(countCharacters('')).toBe(0); 6 | }); 7 | 8 | it('should return the correct number of characters in the text', () => { 9 | expect(countCharacters('This is a test sentence.')).toBe(24); 10 | expect(countCharacters('This is a test sentence.')).toBe(33); 11 | expect(countCharacters(' This is a test sentence. ')).toBe(28); 12 | expect(countCharacters(' ')).toBe(6); 13 | expect(countCharacters('This sentence contains special characters! @#$%^&*')).toBe(50); 14 | }); 15 | 16 | it('should handle text with newline characters', () => { 17 | const text = 'This is a test\nwith\nmultiple\nlines.'; 18 | expect(countCharacters(text)).toBe(35); 19 | }); 20 | 21 | it('should handle text with Unicode characters', () => { 22 | const text = '😀🚀🌟'; 23 | expect(countCharacters(text)).toBe(6); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Anton Nikiforov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/utils/countParagraphs.test.ts: -------------------------------------------------------------------------------- 1 | import { countParagraphs } from '../../src/utils/countParagraphs'; 2 | 3 | describe('countParagraphs', () => { 4 | it('should return 0 when the input text is empty', () => { 5 | expect(countParagraphs('')).toBe(0); 6 | }); 7 | 8 | it('should return the correct number of paragraphs in the text', () => { 9 | expect(countParagraphs('This is a single paragraph. This is second sentence.')).toBe(1); 10 | expect(countParagraphs('This is a paragraph.\nThis is another paragraph.')).toBe(2); 11 | }); 12 | 13 | it('should handle multiple newlines between paragraphs', () => { 14 | expect(countParagraphs('This is a paragraph.\n\n\nThis is another paragraph.')).toBe(2); 15 | }); 16 | 17 | it('should handle leading and trailing newlines', () => { 18 | expect(countParagraphs('\nThis is a paragraph.\nThis is another paragraph.\n')).toBe(2); 19 | }); 20 | 21 | it('should handle leading and trailing whitespace', () => { 22 | expect(countParagraphs(' \nThis is a paragraph.\nThis is another paragraph.\n ')).toBe(2); 23 | }); 24 | 25 | it('should handle mixed whitespace characters', () => { 26 | expect( 27 | countParagraphs('This is a paragraph.\r\nThis is another paragraph.\n\nThis is yet another paragraph.'), 28 | ).toBe(3); 29 | }); 30 | }); 31 | -------------------------------------------------------------------------------- /src/useTextAnalyzer.ts: -------------------------------------------------------------------------------- 1 | import { useMemo } from 'react'; 2 | 3 | import { TextAnalysisResult, TextAnalyzerOptions } from './interfaces'; 4 | import { determineReadingSpeed } from './utils/determineReadingSpeed'; 5 | import { calculateStats } from './utils/calculateStats'; 6 | 7 | /** 8 | * Analyzes the given text and returns various statistics about it. 9 | * @param {TextAnalyzerOptions} options - Options for text analysis. 10 | * @returns {TextAnalysisResult} An object containing various statistics about the text. 11 | */ 12 | function useTextAnalyzer({ 13 | text, 14 | searchTerm = '', 15 | ignoreCase = true, 16 | trimText = true, 17 | wordsPerMinute, 18 | }: TextAnalyzerOptions): TextAnalysisResult { 19 | const processedText = useMemo(() => { 20 | return trimText ? text.trim() : text; 21 | }, [text, trimText]); 22 | 23 | const effectiveWPM = useMemo(() => { 24 | return determineReadingSpeed(processedText, wordsPerMinute); 25 | }, [processedText, wordsPerMinute]); 26 | 27 | const analysisResult = useMemo(() => { 28 | return calculateStats({ 29 | text: processedText, 30 | searchTerm, 31 | ignoreCase, 32 | wordsPerMinute: effectiveWPM, 33 | }); 34 | }, [processedText, searchTerm, ignoreCase, effectiveWPM]); 35 | 36 | return analysisResult; 37 | } 38 | 39 | export default useTextAnalyzer; 40 | -------------------------------------------------------------------------------- /src/utils/calculateCharFrequencies.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Calculates character frequencies in the given text. 3 | * @param {string} text - The text to analyze. 4 | * @param {boolean} ignoreCase - Whether to ignore case when counting character frequencies. 5 | * @returns {Map} A map containing character frequencies. 6 | */ 7 | export function calculateCharFrequencies(text: string, ignoreCase: boolean): Map { 8 | const charsMap = new Map(); 9 | 10 | // Split the text into individual Unicode characters 11 | // The Array.from() method is used to split the text into an array of individual Unicode characters. 12 | // This ensures that each character, including emojis and characters outside the Basic Multilingual Plane (BMP), 13 | // is treated as a single element in the array. 14 | // The regular expression /\s+/g is used to remove whitespace characters from the text before splitting it. 15 | // This ensures that whitespace characters are not counted as separate characters in the result. 16 | const characters = Array.from(text.replace(/\s+/g, '')); 17 | 18 | characters.forEach((char) => { 19 | const normalizedChar = ignoreCase ? char.toLowerCase() : char; 20 | const count = charsMap.get(normalizedChar) || 0; 21 | charsMap.set(normalizedChar, count + 1); 22 | }); 23 | 24 | return charsMap; 25 | } 26 | -------------------------------------------------------------------------------- /tests/utils/determineReadingSpeed.test.ts: -------------------------------------------------------------------------------- 1 | import { determineReadingSpeed } from '../../src/utils/determineReadingSpeed'; 2 | import { WORDS_PER_MINUTE, CJK_WORDS_PER_MINUTE } from '../../src/constants'; 3 | 4 | describe('determineReadingSpeed', () => { 5 | it('should return user-provided WPM ', () => { 6 | expect(determineReadingSpeed('Example text.', 300)).toBe(300); 7 | expect(determineReadingSpeed('这是一些示例文本。', 300)).toBe(300); 8 | }); 9 | 10 | it('should return default WPM for non-CJK text if WPM is not provided by user', () => { 11 | expect(determineReadingSpeed('This is some example text.')).toBe(WORDS_PER_MINUTE); 12 | }); 13 | 14 | it('should return default WPM for CJK text if WPM is not provided by user', () => { 15 | expect(determineReadingSpeed('这是一些示例文本。')).toBe(CJK_WORDS_PER_MINUTE); 16 | }); 17 | 18 | it('should ignore invalid WPM inputs and use default WPM', () => { 19 | // @ts-expect-error edge case when wordsPerMinute is a string 20 | expect(determineReadingSpeed('This is some example text.', 'not-a-number')).toBe(WORDS_PER_MINUTE); 21 | // @ts-expect-error edge case when wordsPerMinute is a string 22 | expect(determineReadingSpeed('这是一些示例文本。', 'not-a-number')).toBe(CJK_WORDS_PER_MINUTE); 23 | }); 24 | 25 | it('should handle empty string inputs', () => { 26 | expect(determineReadingSpeed('', 300)).toBe(300); 27 | expect(determineReadingSpeed('')).toBe(WORDS_PER_MINUTE); 28 | }); 29 | }); 30 | -------------------------------------------------------------------------------- /tests/utils/findMostAndLeastFrequent.test.ts: -------------------------------------------------------------------------------- 1 | import { findMostAndLeastFrequent } from '../../src/utils/findMostAndLeastFrequent'; 2 | 3 | describe('findMostAndLeastFrequent', () => { 4 | it('should return the most and least frequent items from the map', () => { 5 | const map1 = new Map(); 6 | map1.set('apple', 3); 7 | map1.set('banana', 2); 8 | map1.set('orange', 1); 9 | expect(findMostAndLeastFrequent(map1)).toEqual(['apple', 'orange']); 10 | 11 | const map2 = new Map(); 12 | map2.set('apple', 1); 13 | map2.set('banana', 2); 14 | map2.set('orange', 3); 15 | expect(findMostAndLeastFrequent(map2)).toEqual(['orange', 'apple']); 16 | }); 17 | 18 | it('should handle empty map', () => { 19 | const map = new Map(); 20 | expect(findMostAndLeastFrequent(map)).toEqual(['', '']); 21 | }); 22 | 23 | it('should handle the scenario where multiple items have equal frequencies', () => { 24 | const map3 = new Map(); 25 | map3.set('apple', 2); 26 | map3.set('banana', 2); 27 | map3.set('orange', 2); 28 | expect(findMostAndLeastFrequent(map3)).toEqual(['apple', 'apple']); 29 | }); 30 | 31 | it('should handle the scenario where there is only one item in the map', () => { 32 | const map = new Map(); 33 | map.set('apple', 1); 34 | expect(findMostAndLeastFrequent(map)).toEqual(['apple', 'apple']); 35 | }); 36 | 37 | it('should handle unicode characters', () => { 38 | const map = new Map(); 39 | map.set('😀', 3); 40 | map.set('🚀', 2); 41 | map.set('🌟', 1); 42 | expect(findMostAndLeastFrequent(map)).toEqual(['😀', '🌟']); 43 | }); 44 | }); 45 | -------------------------------------------------------------------------------- /tests/utils/countWords.test.ts: -------------------------------------------------------------------------------- 1 | import { countWords } from '../../src/utils/countWords'; 2 | 3 | describe('countWords', () => { 4 | it('should return 0 when the input text is empty', () => { 5 | expect(countWords('')).toBe(0); 6 | expect(countWords(' ')).toBe(0); 7 | }); 8 | 9 | it('should return the correct number of words in the text', () => { 10 | expect(countWords('This is a test sentence.')).toBe(5); 11 | expect(countWords('This is a test sentence.')).toBe(5); 12 | }); 13 | 14 | it('should handle leading and trailing whitespace', () => { 15 | expect(countWords(' This is a test sentence. ')).toBe(5); 16 | }); 17 | 18 | it('should handle mixed whitespace characters', () => { 19 | expect(countWords('This\tis\na\ntest\tsentence.')).toBe(5); 20 | }); 21 | 22 | it('should handle non-alphanumeric characters', () => { 23 | expect(countWords('This sentence contains special characters! @#$%^&*')).toBe(5); 24 | expect(countWords('This,sentence.contains:non-alphanumeric?characters!')).toBe(6); 25 | expect(countWords('This sentence contains 😊 emojis!')).toBe(4); 26 | }); 27 | 28 | it('should handle words containing digits', () => { 29 | expect(countWords('This sentence contains numbers like 123 and 456.')).toBe(8); 30 | }); 31 | 32 | it('should handle contractions as single words', () => { 33 | expect(countWords("don't count this as two words")).toBe(6); 34 | expect(countWords("it's a beautiful day, isn't it?")).toBe(6); 35 | expect(countWords("it'''''s a trap!")).toBe(3); 36 | }); 37 | 38 | it('should correctly count words in CJK text', () => { 39 | expect(countWords('这是中文')).toBe(4); 40 | expect(countWords('这是中文')).toBe(4); 41 | expect(countWords('これはテストです')).toBe(8); 42 | expect(countWords('테스트입니다')).toBe(6); 43 | }); 44 | }); 45 | -------------------------------------------------------------------------------- /src/utils/calculateStats.ts: -------------------------------------------------------------------------------- 1 | import { countWords } from './countWords'; 2 | import { countCharacters } from './countCharacters'; 3 | import { countSentences } from './countSentences'; 4 | import { countParagraphs } from './countParagraphs'; 5 | import { countSearchFrequency } from './countSearchFrequency'; 6 | import { estimateReadingTime } from './estimateReadingTime'; 7 | import { calculateCharFrequencies } from './calculateCharFrequencies'; 8 | import { calculateWordFrequencies } from './calculateWordFrequencies'; 9 | import { findMostAndLeastFrequent } from './findMostAndLeastFrequent'; 10 | 11 | import { TextAnalysisResult, TextAnalyzerOptions } from '../interfaces'; 12 | 13 | /** 14 | * Analyzes the given text and returns various statistics about it. 15 | * @param {TextAnalyzerOptions} options - Options for text analysis. 16 | * @returns {TextAnalysisResult} An object containing various statistics about the text. 17 | */ 18 | export function calculateStats({ 19 | text, 20 | searchTerm, 21 | ignoreCase, 22 | wordsPerMinute, 23 | }: TextAnalyzerOptions): TextAnalysisResult { 24 | const wordCount = countWords(text); 25 | const charCount = countCharacters(text); 26 | const sentenceCount = countSentences(text); 27 | const paragraphCount = countParagraphs(text); 28 | const searchFrequency = countSearchFrequency(text, searchTerm, ignoreCase); 29 | const readingTime = estimateReadingTime(wordCount, wordsPerMinute); 30 | const wordsMap = calculateWordFrequencies(text, ignoreCase); 31 | const charsMap = calculateCharFrequencies(text, ignoreCase); 32 | const [mostFrequentWord, leastFrequentWord] = findMostAndLeastFrequent(wordsMap); 33 | const [mostFrequentCharacter, leastFrequentCharacter] = findMostAndLeastFrequent(charsMap); 34 | 35 | return { 36 | wordCount, 37 | charCount, 38 | sentenceCount, 39 | paragraphCount, 40 | searchFrequency, 41 | readingTime, 42 | mostFrequentWord, 43 | leastFrequentWord, 44 | mostFrequentCharacter, 45 | leastFrequentCharacter, 46 | }; 47 | } 48 | -------------------------------------------------------------------------------- /tests/utils/estimateReadingTime.test.ts: -------------------------------------------------------------------------------- 1 | import { estimateReadingTime } from '../../src/utils/estimateReadingTime'; 2 | 3 | describe('estimateReadingTime', () => { 4 | const defaultWPM = 250; 5 | const cjkWPM = 500; 6 | 7 | it('should return the correct reading time for various word counts with default WPM', () => { 8 | expect(estimateReadingTime(0, defaultWPM)).toEqual({ 9 | minutes: 0, 10 | seconds: 0, 11 | total: 0, 12 | text: 'less than a minute read', 13 | }); 14 | 15 | expect(estimateReadingTime(100, defaultWPM)).toEqual({ 16 | minutes: 0, 17 | seconds: 24, 18 | total: 24, 19 | text: 'less than a minute read', 20 | }); 21 | 22 | expect(estimateReadingTime(250, defaultWPM)).toEqual({ 23 | minutes: 1, 24 | seconds: 0, 25 | total: 60, 26 | text: '1 min read', 27 | }); 28 | 29 | expect(estimateReadingTime(275, defaultWPM)).toEqual({ 30 | minutes: 1, 31 | seconds: 6, 32 | total: 66, 33 | text: '1 min read', 34 | }); 35 | 36 | expect(estimateReadingTime(500, defaultWPM)).toEqual({ 37 | minutes: 2, 38 | seconds: 0, 39 | total: 120, 40 | text: '2 min read', 41 | }); 42 | }); 43 | 44 | it('should return the correct reading time for various word counts with CJK WPM', () => { 45 | expect(estimateReadingTime(0, cjkWPM)).toEqual({ 46 | minutes: 0, 47 | seconds: 0, 48 | total: 0, 49 | text: 'less than a minute read', 50 | }); 51 | 52 | expect(estimateReadingTime(250, cjkWPM)).toEqual({ 53 | minutes: 0, 54 | seconds: 30, 55 | total: 30, 56 | text: 'less than a minute read', 57 | }); 58 | 59 | expect(estimateReadingTime(500, cjkWPM)).toEqual({ 60 | minutes: 1, 61 | seconds: 0, 62 | total: 60, 63 | text: '1 min read', 64 | }); 65 | 66 | expect(estimateReadingTime(750, cjkWPM)).toEqual({ 67 | minutes: 1, 68 | seconds: 30, 69 | total: 90, 70 | text: '1 min read', 71 | }); 72 | 73 | expect(estimateReadingTime(1000, cjkWPM)).toEqual({ 74 | minutes: 2, 75 | seconds: 0, 76 | total: 120, 77 | text: '2 min read', 78 | }); 79 | }); 80 | }); 81 | -------------------------------------------------------------------------------- /src/interfaces/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Options for text analysis. 3 | */ 4 | export interface TextAnalyzerOptions { 5 | /** 6 | * The text to analyze. 7 | */ 8 | text: string; 9 | /** 10 | * The term to search for in the text. 11 | */ 12 | searchTerm?: string; 13 | /** 14 | * Whether to ignore case when searching for the term and calculating word and character frequencies. 15 | * @default true 16 | */ 17 | ignoreCase?: boolean; 18 | /** 19 | * Whether to trim the text before analysis. 20 | * @default true 21 | */ 22 | trimText?: boolean; 23 | /** 24 | * The number of words a person can read per minute. 25 | * @default 250 or 500 for CJK characters 26 | */ 27 | wordsPerMinute?: number; 28 | } 29 | 30 | /** 31 | * Text analysis result. 32 | */ 33 | export interface TextAnalysisResult { 34 | /** 35 | * The number of words in the text. 36 | */ 37 | wordCount: number; 38 | /** 39 | * The number of characters in the text. 40 | */ 41 | charCount: number; 42 | /** 43 | * The number of sentences in the text. 44 | */ 45 | sentenceCount: number; 46 | /** 47 | * The number of paragraphs in the text. 48 | */ 49 | paragraphCount: number; 50 | /** 51 | * The frequency of the search term in the text. 52 | */ 53 | searchFrequency: number; 54 | /** 55 | * The estimated reading time of the text, detailed in minutes and seconds. 56 | */ 57 | readingTime: { 58 | /** 59 | * Estimated minutes to read the text. 60 | */ 61 | minutes: number; 62 | /** 63 | * Remaining seconds beyond counted minutes. 64 | */ 65 | seconds: number; 66 | /** 67 | * Total estimated reading time in seconds. 68 | */ 69 | total: number; 70 | /** 71 | * Human-readable summary of the reading time. 72 | */ 73 | text: string; 74 | }; 75 | /** 76 | * The most frequent word in the text. 77 | */ 78 | mostFrequentWord: string; 79 | /** 80 | * The least frequent word in the text. 81 | */ 82 | leastFrequentWord: string; 83 | /** 84 | * The most frequent character in the text. 85 | */ 86 | mostFrequentCharacter: string; 87 | /** 88 | * The least frequent character in the text. 89 | */ 90 | leastFrequentCharacter: string; 91 | } 92 | -------------------------------------------------------------------------------- /tests/utils/countSentences.test.ts: -------------------------------------------------------------------------------- 1 | import { countSentences } from '../../src/utils/countSentences'; 2 | 3 | describe('countSentences', () => { 4 | it('should return 0 when the input text is empty', () => { 5 | expect(countSentences('')).toBe(0); 6 | }); 7 | 8 | it('should return the correct number of sentences in the text', () => { 9 | expect(countSentences('This is a test sentence.')).toBe(1); 10 | expect(countSentences('This is a test sentence. Another sentence.')).toBe(2); 11 | }); 12 | 13 | it('should handle leading and trailing whitespace', () => { 14 | expect(countSentences(' This is a test sentence. Another Sentence. ')).toBe(2); 15 | }); 16 | 17 | it('should count sentences when the input text contains sentences with different punctuation marks', () => { 18 | expect(countSentences('This is a test sentence. Another sentence? Yes, another one!')).toBe(3); 19 | }); 20 | 21 | it('should count sentences when the input text contains sentences separated by different whitespace characters', () => { 22 | expect(countSentences('This is a test sentence.\nAnother sentence?\tYes, another one!\rAnd one more!')).toBe(4); 23 | }); 24 | 25 | it('should count sentences when there are no spaces between sentences', () => { 26 | expect(countSentences('This is a test sentence.Another sentence.Yes, another one!')).toBe(3); 27 | }); 28 | 29 | it('should count sentences with ellipses (...)', () => { 30 | expect(countSentences('This is a test sentence...Another sentence...Yes, another one!')).toBe(3); 31 | }); 32 | 33 | it('should count sentences with multiple exclamation marks (!!!)', () => { 34 | expect(countSentences('This is a test sentence!!!Another sentence!!!Yes, another one!')).toBe(3); 35 | }); 36 | 37 | it('should count sentences with multiple question marks (???)', () => { 38 | expect(countSentences('This is a test sentence???Another sentence???Yes, another one???')).toBe(3); 39 | }); 40 | 41 | it('should count sentences with multiple combinations of sentence-breaking characters', () => { 42 | expect(countSentences('This is a test sentence ??...!! Another sentence.?.!.')).toBe(2); 43 | }); 44 | 45 | it('should correctly count sentences in texts containing CJK punctuation', () => { 46 | expect(countSentences('这是一个测试。另一个测试?是的,还有一个!')).toBe(3); 47 | expect(countSentences('這是一個測試。還有一個測試!最後一句話?')).toBe(3); 48 | expect(countSentences('これはテストです。もう一つのテストですか?はい、もう一つあります!')).toBe(3); 49 | expect(countSentences('이것은 테스트입니다. 또 다른 테스트입니까? 네, 또 하나 있습니다!')).toBe(3); 50 | }); 51 | }); 52 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "use-text-analyzer", 3 | "version": "2.1.6", 4 | "description": "A React hook for analyzing text.", 5 | "source": "src/index.ts", 6 | "main": "dist/index.js", 7 | "module": "dist/index.module.js", 8 | "esmodule": "dist/index.mjs", 9 | "umd:main": "dist/index.umd.js", 10 | "exports": { 11 | ".": { 12 | "types": "./dist/index.d.ts", 13 | "module": "./dist/index.module.js", 14 | "import": "./dist/index.mjs", 15 | "require": "./dist/index.js" 16 | }, 17 | "./package.json": "./package.json" 18 | }, 19 | "sideEffects": false, 20 | "scripts": { 21 | "lint:check": "eslint .", 22 | "lint:fix": "eslint --fix .", 23 | "prettier:check": "prettier --check .", 24 | "prettier:fix": "prettier --write .", 25 | "build": "rm -rf ./dist/*; microbundle build --entry src/index.ts --name use-text-analyzer --tsconfig tsconfig.json", 26 | "test": "jest", 27 | "prepare": "husky" 28 | }, 29 | "engines": { 30 | "node": ">= 16.0.0" 31 | }, 32 | "repository": { 33 | "type": "git", 34 | "url": "git+https://github.com/Invulner/use-text-analyzer.git" 35 | }, 36 | "keywords": [ 37 | "react", 38 | "hook", 39 | "react-hook", 40 | "javascript", 41 | "typescript", 42 | "text", 43 | "analyzer", 44 | "text-analysis", 45 | "word-frequency", 46 | "wordcount", 47 | "reading", 48 | "time", 49 | "reading-time" 50 | ], 51 | "author": "invulner ", 52 | "license": "MIT", 53 | "files": [ 54 | "dist" 55 | ], 56 | "bugs": { 57 | "url": "https://github.com/Invulner/use-text-analyzer/issues" 58 | }, 59 | "homepage": "https://github.com/Invulner/use-text-analyzer#readme", 60 | "devDependencies": { 61 | "@testing-library/dom": "^10.4.0", 62 | "@testing-library/react": "^16.0.1", 63 | "@testing-library/react-hooks": "^8.0.1", 64 | "@types/jest": "^29.5.7", 65 | "@types/react": "^18.3.10", 66 | "@types/react-dom": "^18.3.0", 67 | "@typescript-eslint/eslint-plugin": "^7.18.0", 68 | "@typescript-eslint/parser": "^7.18.0", 69 | "eslint": "^8.57.0", 70 | "eslint-config-prettier": "^9.1.0", 71 | "eslint-config-standard": "^17.1.0", 72 | "eslint-plugin-import": "^2.29.1", 73 | "eslint-plugin-prettier": "^5.2.1", 74 | "eslint-plugin-react-hooks": "^4.6.2", 75 | "husky": "^9.1.6", 76 | "jest": "^29.7.0", 77 | "jest-environment-jsdom": "^29.7.0", 78 | "microbundle": "^0.15.1", 79 | "prettier": "^3.3.3", 80 | "react": "18.3.1", 81 | "react-dom": "18.3.1", 82 | "ts-jest": "^29.2.5", 83 | "typescript": "^5.6.2" 84 | }, 85 | "peerDependencies": { 86 | "react": ">=16.8.0" 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /tests/utils/countSearchFrequency.test.ts: -------------------------------------------------------------------------------- 1 | import { countSearchFrequency } from '../../src/utils/countSearchFrequency'; 2 | 3 | describe('countSearchFrequency', () => { 4 | describe('with ignoreCase: true', () => { 5 | it('should return 0 when the text is empty', () => { 6 | expect(countSearchFrequency('', 'searchTerm', true)).toBe(0); 7 | }); 8 | 9 | it('should return 0 when no search term is provided', () => { 10 | expect(countSearchFrequency('This is a test text.', '', true)).toBe(0); 11 | }); 12 | 13 | it('should return 0 when the search term is not found', () => { 14 | expect(countSearchFrequency('This is a test text.', 'missing', true)).toBe(0); 15 | }); 16 | 17 | it('should return the correct frequency of the search term in the text, ignoring case', () => { 18 | expect(countSearchFrequency('This is a test text.', 'test', true)).toBe(1); 19 | expect(countSearchFrequency('This is a test test test text.', 'Test', true)).toBe(3); 20 | }); 21 | 22 | it('should return the correct frequency when the search term contains special characters', () => { 23 | expect(countSearchFrequency('This is a test text.', '$', true)).toBe(0); 24 | expect(countSearchFrequency('This is a test $$$ text.', '$$', true)).toBe(1); 25 | expect(countSearchFrequency('This is a test $$$ text.', '$', true)).toBe(3); 26 | }); 27 | 28 | it('should return the correct frequency when the search term contains emojis', () => { 29 | expect(countSearchFrequency('This is a test text.', '😊', true)).toBe(0); 30 | expect(countSearchFrequency('This is a test text. 😊😊😊', '😊', true)).toBe(3); 31 | expect(countSearchFrequency('This is a test text. 😊😊😊', '😊😊', true)).toBe(1); 32 | }); 33 | }); 34 | 35 | describe('with ignoreCase: false', () => { 36 | it('should return 0 when the text is empty', () => { 37 | expect(countSearchFrequency('', 'searchTerm', false)).toBe(0); 38 | }); 39 | 40 | it('should return 0 when no search term is provided', () => { 41 | expect(countSearchFrequency('This is a test text.', '', false)).toBe(0); 42 | }); 43 | 44 | it('should return 0 when the search term is not found', () => { 45 | expect(countSearchFrequency('This is a test text.', 'missing', false)).toBe(0); 46 | }); 47 | 48 | it('should return the correct frequency of the search term in the text, ignoring case', () => { 49 | expect(countSearchFrequency('This is a test text.', 'test', false)).toBe(1); 50 | expect(countSearchFrequency('This is a test test test text.', 'Test', false)).toBe(0); 51 | }); 52 | 53 | it('should return the correct frequency when the search term contains special characters', () => { 54 | expect(countSearchFrequency('This is a test text.', '$', false)).toBe(0); 55 | expect(countSearchFrequency('This is a test $$$ text.', '$$', true)).toBe(1); 56 | expect(countSearchFrequency('This is a test $$$ text.', '$', true)).toBe(3); 57 | }); 58 | 59 | it('should return the correct frequency when the search term contains emojis', () => { 60 | expect(countSearchFrequency('This is a test text.', '😊', false)).toBe(0); 61 | expect(countSearchFrequency('This is a test text. 😊😊😊', '😊', false)).toBe(3); 62 | expect(countSearchFrequency('This is a test text. 😊😊😊', '😊😊', false)).toBe(1); 63 | }); 64 | }); 65 | }); 66 | -------------------------------------------------------------------------------- /tests/utils/calculateCharFrequencies.test.ts: -------------------------------------------------------------------------------- 1 | import { calculateCharFrequencies } from '../../src/utils/calculateCharFrequencies'; 2 | 3 | describe('calculateCharFrequencies', () => { 4 | it('should return an empty map when the input text is empty', () => { 5 | const result = calculateCharFrequencies('', true); 6 | expect(result.size).toBe(0); 7 | }); 8 | 9 | it('should calculate character frequencies correctly with case sensitivity', () => { 10 | const text = 'Hello, WOrld!'; 11 | const result = calculateCharFrequencies(text, false); 12 | expect(result.get('H')).toBe(1); 13 | expect(result.get('e')).toBe(1); 14 | expect(result.get('l')).toBe(3); 15 | expect(result.get('o')).toBe(1); 16 | expect(result.get('O')).toBe(1); 17 | expect(result.get(',')).toBe(1); 18 | expect(result.get('W')).toBe(1); 19 | expect(result.get('r')).toBe(1); 20 | expect(result.get('d')).toBe(1); 21 | expect(result.get('!')).toBe(1); 22 | expect(result.size).toBe(10); 23 | }); 24 | 25 | it('should calculate character frequencies correctly without case sensitivity', () => { 26 | const text = 'Hello, WOrld!'; 27 | const result = calculateCharFrequencies(text, true); 28 | expect(result.get('h')).toBe(1); 29 | expect(result.get('e')).toBe(1); 30 | expect(result.get('l')).toBe(3); 31 | expect(result.get('o')).toBe(2); 32 | expect(result.get(',')).toBe(1); 33 | expect(result.get('w')).toBe(1); 34 | expect(result.get('r')).toBe(1); 35 | expect(result.get('d')).toBe(1); 36 | expect(result.get('!')).toBe(1); 37 | expect(result.size).toBe(9); 38 | }); 39 | 40 | it('should correctly count special characters', () => { 41 | const text = '!@#$%^&*()_+-=[]{}|;:\'",.<>/?`~'; 42 | const result = calculateCharFrequencies(text, true); 43 | expect(result.size).toBe(31); 44 | expect(result.get('!')).toBe(1); 45 | expect(result.get('@')).toBe(1); 46 | expect(result.get('#')).toBe(1); 47 | expect(result.get('$')).toBe(1); 48 | expect(result.get('%')).toBe(1); 49 | expect(result.get('^')).toBe(1); 50 | expect(result.get('&')).toBe(1); 51 | expect(result.get('*')).toBe(1); 52 | expect(result.get('(')).toBe(1); 53 | expect(result.get(')')).toBe(1); 54 | expect(result.get('_')).toBe(1); 55 | expect(result.get('+')).toBe(1); 56 | expect(result.get('-')).toBe(1); 57 | expect(result.get('=')).toBe(1); 58 | expect(result.get('[')).toBe(1); 59 | expect(result.get(']')).toBe(1); 60 | expect(result.get('{')).toBe(1); 61 | expect(result.get('}')).toBe(1); 62 | expect(result.get('|')).toBe(1); 63 | expect(result.get(';')).toBe(1); 64 | expect(result.get(':')).toBe(1); 65 | expect(result.get("'")).toBe(1); 66 | expect(result.get('"')).toBe(1); 67 | expect(result.get(',')).toBe(1); 68 | expect(result.get('.')).toBe(1); 69 | expect(result.get('<')).toBe(1); 70 | expect(result.get('>')).toBe(1); 71 | expect(result.get('/')).toBe(1); 72 | expect(result.get('?')).toBe(1); 73 | expect(result.get('`')).toBe(1); 74 | expect(result.get('~')).toBe(1); 75 | }); 76 | 77 | it('should correctly count emojis and other Unicode characters', () => { 78 | const text = '😀🚀🌟éñö'; 79 | const result = calculateCharFrequencies(text, true); 80 | expect(result.size).toBe(6); 81 | expect(result.get('😀')).toBe(1); 82 | expect(result.get('🚀')).toBe(1); 83 | expect(result.get('🌟')).toBe(1); 84 | expect(result.get('é')).toBe(1); 85 | expect(result.get('ñ')).toBe(1); 86 | expect(result.get('ö')).toBe(1); 87 | }); 88 | 89 | it('should handle different whitespace characters correctly', () => { 90 | const text = ' \t\n\r'; 91 | const result = calculateCharFrequencies(text, true); 92 | expect(result.size).toBe(0); 93 | }); 94 | 95 | it('should handle a mix of alphanumeric and non-alphanumeric characters correctly', () => { 96 | const text = 'Mix of characters: abc123!@#$'; 97 | const result = calculateCharFrequencies(text, true); 98 | expect(result.size).toBe(21); 99 | expect(result.get('m')).toBe(1); 100 | expect(result.get('i')).toBe(1); 101 | expect(result.get('x')).toBe(1); 102 | expect(result.get('o')).toBe(1); 103 | expect(result.get('f')).toBe(1); 104 | expect(result.get('h')).toBe(1); 105 | expect(result.get('r')).toBe(2); 106 | expect(result.get('e')).toBe(1); 107 | expect(result.get('a')).toBe(3); 108 | expect(result.get('b')).toBe(1); 109 | expect(result.get('c')).toBe(3); 110 | expect(result.get('t')).toBe(1); 111 | expect(result.get('s')).toBe(1); 112 | expect(result.get('1')).toBe(1); 113 | expect(result.get('2')).toBe(1); 114 | expect(result.get('3')).toBe(1); 115 | expect(result.get(':')).toBe(1); 116 | expect(result.get('!')).toBe(1); 117 | expect(result.get('@')).toBe(1); 118 | expect(result.get('#')).toBe(1); 119 | expect(result.get('$')).toBe(1); 120 | }); 121 | }); 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | use-text-analyzer 3 |
4 | 5 |

6 | A lightweight React hook for analyzing text and providing various statistics such as estimated reading time, search frequency, word count, and more. 7 |

8 | 9 |
10 | NPM Version 11 | npm bundle size 12 | GitHub top language 13 | GitHub License 14 |
15 | 16 | ## Installation 🛠️ 17 | 18 | ### npm 19 | 20 | ```bash 21 | npm install use-text-analyzer 22 | ``` 23 | 24 | ### yarn 25 | 26 | ```bash 27 | yarn add use-text-analyzer 28 | ``` 29 | 30 | ## Features ✨ 31 | 32 | - **Lightweight 🪶:** 1KB in size, making it lightweight and efficient. 33 | - **TypeScript Support 🇹🇸:** Works seamlessly with both JavaScript and TypeScript projects. 34 | - **Advanced Reading Time Estimation 🕒:** Provides detailed reading time estimates in minutes and seconds, along with a human-readable format for immediate use in interfaces. 35 | - **Efficient Algorithms ⚡:** Utilizes efficient algorithms for finding the most and least frequent words/characters. 36 | - **CJK Support 📜:** Enhanced support for texts in Chinese, Japanese, and Korean, ensuring accurate analysis across these languages. 37 | - **SSR Compatible 🌐:** Fully compatible with server-side rendering, ensuring reliable performance across server and client environments. 38 | 39 | ## Usage 📝 40 | 41 | ```jsx 42 | import React from 'react'; 43 | import useTextAnalyzer from 'use-text-analyzer'; 44 | 45 | function TextAnalyzerExample() { 46 | const text = 47 | 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.'; 48 | const searchTerm = 'Lorem'; 49 | 50 | const { 51 | wordCount, 52 | charCount, 53 | sentenceCount, 54 | paragraphCount, 55 | searchFrequency, 56 | readingTime, 57 | mostFrequentWord, 58 | leastFrequentWord, 59 | mostFrequentCharacter, 60 | leastFrequentCharacter, 61 | } = useTextAnalyzer({ text, searchTerm }); 62 | 63 | return ( 64 |
65 |

Text Analysis Result

66 |

67 | Estimated reading time: {readingTime.minutes}m {readingTime.seconds}s 68 |

69 |

Total reading time in seconds: {readingTime.total}

70 |

Human-readable reading time: {readingTime.text}

71 |

72 | Search frequency of '{searchTerm}': {searchFrequency} 73 |

74 |

Word count: {wordCount}

75 |

Character count: {charCount}

76 |

Sentence count: {sentenceCount}

77 |

Paragraph count: {paragraphCount}

78 |

Most frequent word: {mostFrequentWord}

79 |

Least frequent word: {leastFrequentWord}

80 |

Most frequent character: {mostFrequentCharacter}

81 |

Least frequent character: {leastFrequentCharacter}

82 |
83 | ); 84 | } 85 | 86 | export default TextAnalyzerExample; 87 | ``` 88 | 89 | ## API ⚙️ 90 | 91 | ### `useTextAnalyzer(options: TextAnalyzerOptions): TextAnalysisResult` 92 | 93 | A React hook that analyzes the given text and returns various statistics about it. 94 | 95 | #### Parameters 96 | 97 | - `options` (`TextAnalyzerOptions`, required): An object containing options for text analysis. 98 | - `text` (string, required): The text to analyze. 99 | - `searchTerm` (string, optional): The term to search for in the text. 100 | - `ignoreCase` (boolean, optional, default: true): Whether to ignore case when searching for the term and calculating word and character frequencies. 101 | - `trimText` (boolean, optional, default: true): Whether to trim the text before analysis. 102 | - `wordsPerMinute` (number, optional, default: 250 or 500 for CJK): Custom reading speed in words per minute, automatically adjusted for CJK text if not specified. 103 | 104 | #### Returns 105 | 106 | A `TextAnalysisResult` object containing various statistics about the text. 107 | 108 | - `readingTime` (object): 109 | - `minutes` (number): The total estimated reading time expressed in whole minutes. 110 | - `seconds` (number): Remaining seconds beyond the counted minutes. 111 | - `total` (number): The total estimated reading time expressed in total seconds. 112 | - `text` (string): A human-readable summary of the reading time. 113 | - `searchFrequency` (number): The frequency of the search term in the text. 114 | - `wordCount` (number): The number of words in the text. 115 | - `charCount` (number): The number of characters in the text. 116 | - `sentenceCount` (number): The number of sentences in the text. 117 | - `paragraphCount` (number): The number of paragraphs in the text. 118 | - `mostFrequentWord` (string): The most frequent word in the text. 119 | - `leastFrequentWord` (string): The least frequent word in the text. 120 | - `mostFrequentCharacter` (string): The most frequent character in the text. 121 | - `leastFrequentCharacter` (string): The least frequent character in the text. 122 | -------------------------------------------------------------------------------- /tests/hooks/useTextAnalyzer.test.ts: -------------------------------------------------------------------------------- 1 | import { renderHook } from '@testing-library/react'; 2 | 3 | import useTextAnalyzer from '../../src/useTextAnalyzer'; 4 | 5 | describe('useTextAnalyzer', () => { 6 | const text = ` 7 | This is the first paragraph. It's SeCoNd contains multiple words. 8 | 9 | This is the second paragraph! it Contains numbers like 123 and special characters like @#$. 10 | 11 | This iS the third paragraph? It includes emojis like 😀 and unicode characters like éñö. 12 | 13 | This is the fourth paragraph. It's followed by a paragraph. 14 | 15 | This iS tHe fifth Paragraph. It contains whitespace characters like \t and \r. 16 | 17 | This is the sixth Paragraph. It also includes non-alphanumeric characters: %^&*()_+-=[]{}|;:'",<>/\`~. 18 | 19 | This is the seventh paragraph. It's shorter. 20 | `; 21 | 22 | it('should analyze text correctly with searchTerm and when ignoreCase is true', () => { 23 | const { result } = renderHook(() => useTextAnalyzer({ text: text.trim(), searchTerm: 'SeCoNd', ignoreCase: true })); 24 | 25 | expect(result.current).toEqual({ 26 | wordCount: 78, 27 | charCount: 560, 28 | sentenceCount: 14, 29 | paragraphCount: 7, 30 | searchFrequency: 2, 31 | readingTime: { 32 | minutes: 0, 33 | seconds: 19, 34 | total: 19, 35 | text: 'less than a minute read', 36 | }, 37 | mostFrequentWord: 'paragraph', 38 | leastFrequentWord: 'first', 39 | mostFrequentCharacter: 'a', 40 | leastFrequentCharacter: '!', 41 | }); 42 | }); 43 | 44 | it('should analyze text correctly with searchTerm and when ignoreCase is false', () => { 45 | const { result } = renderHook(() => 46 | useTextAnalyzer({ 47 | text: text.trim(), 48 | searchTerm: 'SeCoNd', 49 | ignoreCase: false, 50 | }), 51 | ); 52 | 53 | expect(result.current).toEqual({ 54 | wordCount: 78, 55 | charCount: 560, 56 | sentenceCount: 14, 57 | paragraphCount: 7, 58 | searchFrequency: 1, 59 | readingTime: { 60 | minutes: 0, 61 | seconds: 19, 62 | total: 19, 63 | text: 'less than a minute read', 64 | }, 65 | mostFrequentWord: 'This', 66 | leastFrequentWord: 'first', 67 | mostFrequentCharacter: 'a', 68 | leastFrequentCharacter: 'N', 69 | }); 70 | }); 71 | 72 | it('should analyze text correctly with trimText', () => { 73 | const { result } = renderHook(() => useTextAnalyzer({ text, trimText: false })); 74 | 75 | expect(result.current).toEqual({ 76 | wordCount: 78, 77 | charCount: 566, 78 | sentenceCount: 14, 79 | paragraphCount: 7, 80 | searchFrequency: 0, 81 | readingTime: { 82 | minutes: 0, 83 | seconds: 19, 84 | total: 19, 85 | text: 'less than a minute read', 86 | }, 87 | mostFrequentWord: 'paragraph', 88 | leastFrequentWord: 'first', 89 | mostFrequentCharacter: 'a', 90 | leastFrequentCharacter: '!', 91 | }); 92 | }); 93 | 94 | it('should analyze text correctly with searchTerm containing special characters', () => { 95 | const { result } = renderHook(() => 96 | useTextAnalyzer({ 97 | text: text.trim(), 98 | searchTerm: '@#$', 99 | ignoreCase: true, 100 | }), 101 | ); 102 | 103 | expect(result.current).toEqual({ 104 | wordCount: 78, 105 | charCount: 560, 106 | sentenceCount: 14, 107 | paragraphCount: 7, 108 | searchFrequency: 1, 109 | readingTime: { 110 | minutes: 0, 111 | seconds: 19, 112 | total: 19, 113 | text: 'less than a minute read', 114 | }, 115 | mostFrequentWord: 'paragraph', 116 | leastFrequentWord: 'first', 117 | mostFrequentCharacter: 'a', 118 | leastFrequentCharacter: '!', 119 | }); 120 | }); 121 | 122 | it('should analyze text correctly with searchTerm and when wordsPerMinute is string', () => { 123 | const { result } = renderHook(() => 124 | // @ts-expect-error edge case when wordsPerMinute is a string 125 | useTextAnalyzer({ text: text.trim(), searchTerm: 'SeCoNd', ignoreCase: true, wordsPerMinute: '250' }), 126 | ); 127 | 128 | expect(result.current).toEqual({ 129 | wordCount: 78, 130 | charCount: 560, 131 | sentenceCount: 14, 132 | paragraphCount: 7, 133 | searchFrequency: 2, 134 | readingTime: { 135 | minutes: 0, 136 | seconds: 19, 137 | total: 19, 138 | text: 'less than a minute read', 139 | }, 140 | mostFrequentWord: 'paragraph', 141 | leastFrequentWord: 'first', 142 | mostFrequentCharacter: 'a', 143 | leastFrequentCharacter: '!', 144 | }); 145 | }); 146 | 147 | it('should analyze text correctly with searchTerm and when wordsPerMinute is empty string', () => { 148 | const { result } = renderHook(() => 149 | // @ts-expect-error edge case when wordsPerMinute is a string 150 | useTextAnalyzer({ text: text.trim(), searchTerm: 'SeCoNd', ignoreCase: true, wordsPerMinute: '' }), 151 | ); 152 | 153 | expect(result.current).toEqual({ 154 | wordCount: 78, 155 | charCount: 560, 156 | sentenceCount: 14, 157 | paragraphCount: 7, 158 | searchFrequency: 2, 159 | readingTime: { 160 | minutes: 0, 161 | seconds: 19, 162 | total: 19, 163 | text: 'less than a minute read', 164 | }, 165 | mostFrequentWord: 'paragraph', 166 | leastFrequentWord: 'first', 167 | mostFrequentCharacter: 'a', 168 | leastFrequentCharacter: '!', 169 | }); 170 | }); 171 | }); 172 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 2.1.6 2 | 3 | ### Fixes 4 | 5 | - Resolved deprecation warnings in the test suite by installing additional testing libraries: 6 | - Added `@testing-library/dom` version `^10.4.0`. 7 | - Added `@testing-library/react` version `^16.0.1`. 8 | 9 | ## 2.1.5 10 | 11 | ### Dependency Updates 12 | 13 | - Updated `@types/react` from `18.3.3` to `18.3.10`. 14 | - Updated `eslint-plugin-prettier` from `5.1.3` to `5.2.1`. 15 | - Updated `husky` from `9.1.4` to `9.1.6`. 16 | - Updated `ts-jest` from `29.2.3` to `29.2.5`. 17 | - Updated `typescript` from `5.5.3` to `5.6.2`. 18 | 19 | ## 2.1.4 20 | 21 | ### Dependency Updates 22 | 23 | - Updated `husky` from `9.0.11` to `9.1.4`. 24 | - Updated `prettier` from `3.3.0` to `3.3.3`. 25 | - Updated `@typescript-eslint/parser` from `7.12.0` to `7.18.0`. 26 | - Updated `@typescript-eslint/eslint-plugin` from `7.12.0` to `7.18.0`. 27 | - Updated `ts-jest` from `29.1.4` to `29.2.3`. 28 | - Updated `typescript` from `5.4.5` to `5.5.3`. 29 | 30 | ## 2.1.3 31 | 32 | ### Dependency Updates 33 | 34 | - Updated `react-dom` from `18.2.0` to `18.3.1`. 35 | - Updated `@types/react-dom` from `18.2.25` to `18.3.0`. 36 | - Updated `@typescript-eslint/parser` from `7.8.0` to `7.12.0`. 37 | - Updated `@types/react` from `18.3.1` to `18.3.3`. 38 | - Updated `@typescript-eslint/eslint-plugin` from `7.8.0` to `7.12.0`. 39 | - Updated `ts-jest` from `29.1.2` to `29.1.4`. 40 | - Updated `prettier` from `3.2.5` to `3.3.0`. 41 | 42 | ## 2.1.2 43 | 44 | ### Dependency Updates 45 | 46 | - Bump `ejs` from version `3.1.9` to `3.1.10` in the npm_and_yarn group. 47 | - Bump `@typescript-eslint/parser` from version `7.7.1` to `7.8.0`. 48 | - Bump `eslint-plugin-react-hooks` from version `4.6.0` to `4.6.2`. 49 | - Bump `@typescript-eslint/eslint-plugin` from version `7.7.1` to `7.8.0`. 50 | - Updates `react` from version `18.2.0` to `18.3.1`. 51 | - Updates `@types/react` from version `18.2.79` to `18.3.1`. 52 | 53 | ## 2.1.1 54 | 55 | ### Enhancements 56 | 57 | - **Optimizations:** Enhanced the performance of the `useTextAnalyzer` hook by wrapping several calculations in `useMemo` to avoid unnecessary re-computations. 58 | 59 | ### Documentation 60 | 61 | - Updated README to clarify features. 62 | 63 | ### Dependency Updates 64 | 65 | - Updated `@typescript-eslint/eslint-plugin` from `7.7.0` to `7.7.1`. 66 | - Updated `@typescript-eslint/parser` from `7.7.0` to `7.7.1`. 67 | 68 | ## 2.1.0 69 | 70 | ### Enhancements 71 | 72 | - **CJK Support Enhancements:** Improved support for CJK text in various utilities: 73 | - Added automatic detection of CJK text to adjust reading time calculations based on the typical reading speed for CJK languages. 74 | - Updated `countWords` utility to correctly treat each CJK character as a separate word, enhancing accuracy in word counting for Chinese, Japanese, and Korean texts. 75 | - Enhanced `countSentences` to recognize CJK-specific punctuation, improving sentence detection accuracy in CJK languages. 76 | 77 | ### Dependency Updates 78 | 79 | - Updated `typescript` from `5.2.2` to `5.4.5`. 80 | - Updated `@types/react` from `18.2.73` to `18.2.79`. 81 | - Updated `@types/react-dom` from `18.2.23` to `18.2.25`. 82 | - Updated `@typescript-eslint/eslint-plugin` from `7.4.0` to `7.7.0`. 83 | - Updated `@typescript-eslint/parser` from `7.4.0` to `7.7.0`. 84 | 85 | ## 2.0.1 86 | 87 | ### Fixes 88 | 89 | - process cases when `wordsPerMinute` is a string 90 | 91 | ## 2.0.0 92 | 93 | ### Breaking Changes 94 | 95 | - **Reading Time Format:** The format of the `readingTime` output in the `useTextAnalyzer` hook has been modified. Previously, `readingTime` was returned as a single integer representing the total number of seconds. Now, it is an object containing detailed aspects of the estimated reading time: 96 | - `minutes`: Total estimated reading time expressed in whole minutes. 97 | - `seconds`: Remaining seconds beyond the counted minutes. 98 | - `total`: Total estimated reading time expressed in seconds. 99 | - `text`: Human-readable summary of the reading time. 100 | 101 | ### Added 102 | 103 | - **Custom Reading Speed:** Added a new optional parameter `wordsPerMinute` to the `useTextAnalyzer` hook. This parameter allows users to specify their reading speed, which the hook uses to calculate the estimated reading time. The default value is set to 250 words per minute. 104 | 105 | ## 1.1.15 106 | 107 | - fix word splitter to count contractions like "don't" as one word. 108 | 109 | ## 1.1.14 110 | 111 | - store default value for reading speed in constants 112 | 113 | ## 1.1.13 114 | 115 | - update keywords 116 | 117 | ## 1.1.12 118 | 119 | - update `README.md` styling 120 | 121 | ## 1.1.11 122 | 123 | - use `useMemo` to calculate all stats 124 | 125 | ## 1.1.10 126 | 127 | - fix `calculateCharFrequencies` logic 128 | 129 | ## 1.1.9 130 | 131 | - update `calculateCharFrequencies` to correctly calc emoji 132 | 133 | ## 1.1.8 134 | 135 | - update word splitting mechanism in `countWordFrequencies` 136 | 137 | ## 1.1.7 138 | 139 | - update `countParagraphs` to ignore trailing and leading whitespaces 140 | 141 | ## 1.1.6 142 | 143 | - update `countWords` to exclude non alphanumeric characters 144 | 145 | ## 1.1.5 146 | 147 | - fix `calculateWordFrequencies` logic to omit multiple whitespaces 148 | 149 | ## 1.1.4 150 | 151 | - fix `countSentences` logic to omit multiple whitespaces 152 | 153 | ## 1.1.3 154 | 155 | - update code structure, move utils to separate files 156 | 157 | ## 1.1.2 158 | 159 | - fix `calculateWordFrequencies` for empty text 160 | 161 | ## 1.1.1 162 | 163 | - fix bug in `countParagraphs` 164 | - update `countSearchFrequency` logic 165 | 166 | ## 1.1.0 167 | 168 | - add `trimText` param 169 | 170 | ## 1.0.0 171 | 172 | - add basic functionality: 173 | - calculate number of words 174 | - calculate number of characters 175 | - calculate number of sentences 176 | - calculate number of paragraphs 177 | - calculate frequency of the search term 178 | - calculate estimated reading time 179 | - find most frequent word 180 | - find least frequent word 181 | - find most frequent character 182 | - find least frequent character 183 | -------------------------------------------------------------------------------- /tests/utils/calculateWordFrequencies.test.ts: -------------------------------------------------------------------------------- 1 | import { calculateWordFrequencies } from '../../src/utils/calculateWordFrequencies'; 2 | 3 | describe('calculateWordFrequencies', () => { 4 | it('should return an empty map when the input text is empty', () => { 5 | const wordFrequencies = calculateWordFrequencies('', true); 6 | expect(wordFrequencies.size).toBe(0); 7 | }); 8 | 9 | it('should return the correct word frequencies for the input text', () => { 10 | const text = 'This is a Test sentence. This Sentence contains repeated Words, repeated words.'; 11 | const wordFrequencies = calculateWordFrequencies(text, true); 12 | 13 | expect(wordFrequencies.size).toBe(8); 14 | expect(wordFrequencies.get('this')).toBe(2); 15 | expect(wordFrequencies.get('is')).toBe(1); 16 | expect(wordFrequencies.get('a')).toBe(1); 17 | expect(wordFrequencies.get('test')).toBe(1); 18 | expect(wordFrequencies.get('sentence')).toBe(2); 19 | expect(wordFrequencies.get('contains')).toBe(1); 20 | expect(wordFrequencies.get('repeated')).toBe(2); 21 | expect(wordFrequencies.get('words')).toBe(2); 22 | }); 23 | 24 | it('should not ignore case when counting word frequencies if ignoreCase is false', () => { 25 | const text = 'This is a Test sentence. This Sentence contains repeated Words, repeated words.'; 26 | const wordFrequencies = calculateWordFrequencies(text, false); 27 | 28 | expect(wordFrequencies.size).toBe(10); 29 | expect(wordFrequencies.get('This')).toBe(2); 30 | expect(wordFrequencies.get('is')).toBe(1); 31 | expect(wordFrequencies.get('a')).toBe(1); 32 | expect(wordFrequencies.get('Test')).toBe(1); 33 | expect(wordFrequencies.get('sentence')).toBe(1); 34 | expect(wordFrequencies.get('contains')).toBe(1); 35 | expect(wordFrequencies.get('repeated')).toBe(2); 36 | expect(wordFrequencies.get('words')).toBe(1); 37 | }); 38 | 39 | it('should handle leading and trailing whitespace if ignoreCase is true', () => { 40 | const text = ' This is a Test test sentence. '; 41 | const wordFrequencies = calculateWordFrequencies(text, true); 42 | 43 | expect(wordFrequencies.size).toBe(5); 44 | expect(wordFrequencies.get('this')).toBe(1); 45 | expect(wordFrequencies.get('is')).toBe(1); 46 | expect(wordFrequencies.get('a')).toBe(1); 47 | expect(wordFrequencies.get('test')).toBe(2); 48 | expect(wordFrequencies.get('sentence')).toBe(1); 49 | }); 50 | 51 | it('should handle leading and trailing whitespace if ignoreCase is false', () => { 52 | const text = ' This is a Test test sentence. '; 53 | const wordFrequencies = calculateWordFrequencies(text, false); 54 | 55 | expect(wordFrequencies.size).toBe(6); 56 | expect(wordFrequencies.get('this')).toBe(undefined); 57 | expect(wordFrequencies.get('is')).toBe(1); 58 | expect(wordFrequencies.get('a')).toBe(1); 59 | expect(wordFrequencies.get('Test')).toBe(1); 60 | expect(wordFrequencies.get('test')).toBe(1); 61 | expect(wordFrequencies.get('sentence')).toBe(1); 62 | }); 63 | 64 | it('should handle mixed whitespace characters if ignoreCase is true', () => { 65 | const text = 'This\tis\na\nTest\ttest sentence.'; 66 | const wordFrequencies = calculateWordFrequencies(text, true); 67 | 68 | expect(wordFrequencies.size).toBe(5); 69 | expect(wordFrequencies.get('this')).toBe(1); 70 | expect(wordFrequencies.get('is')).toBe(1); 71 | expect(wordFrequencies.get('a')).toBe(1); 72 | expect(wordFrequencies.get('test')).toBe(2); 73 | expect(wordFrequencies.get('Test')).toBe(undefined); 74 | expect(wordFrequencies.get('sentence')).toBe(1); 75 | }); 76 | 77 | it('should handle mixed whitespace characters if ignoreCase is false', () => { 78 | const text = 'This\tis\na\nTest\ttest sentence.'; 79 | const wordFrequencies = calculateWordFrequencies(text, false); 80 | 81 | expect(wordFrequencies.size).toBe(6); 82 | expect(wordFrequencies.get('this')).toBe(undefined); 83 | expect(wordFrequencies.get('This')).toBe(1); 84 | expect(wordFrequencies.get('is')).toBe(1); 85 | expect(wordFrequencies.get('a')).toBe(1); 86 | expect(wordFrequencies.get('test')).toBe(1); 87 | expect(wordFrequencies.get('Test')).toBe(1); 88 | expect(wordFrequencies.get('sentence')).toBe(1); 89 | }); 90 | 91 | it('should handle non-alphanumeric characters, including emojis if ignoreCase is true', () => { 92 | const text = 'This:sentence,contains?special-Test+test;characters!@#$%^&* 😊😊😊'; 93 | const wordFrequencies = calculateWordFrequencies(text, true); 94 | 95 | expect(wordFrequencies.size).toBe(6); 96 | expect(wordFrequencies.get('this')).toBe(1); 97 | expect(wordFrequencies.get('sentence')).toBe(1); 98 | expect(wordFrequencies.get('contains')).toBe(1); 99 | expect(wordFrequencies.get('special')).toBe(1); 100 | expect(wordFrequencies.get('characters')).toBe(1); 101 | expect(wordFrequencies.get('test')).toBe(2); 102 | }); 103 | 104 | it('should handle non-alphanumeric characters, including emojis if ignoreCase is false', () => { 105 | const text = 'This:sentence,contains?special-Test+test;characters!@#$%^&* 😊😊😊'; 106 | const wordFrequencies = calculateWordFrequencies(text, false); 107 | 108 | expect(wordFrequencies.size).toBe(7); 109 | expect(wordFrequencies.get('this')).toBe(undefined); 110 | expect(wordFrequencies.get('sentence')).toBe(1); 111 | expect(wordFrequencies.get('contains')).toBe(1); 112 | expect(wordFrequencies.get('special')).toBe(1); 113 | expect(wordFrequencies.get('characters')).toBe(1); 114 | expect(wordFrequencies.get('test')).toBe(1); 115 | expect(wordFrequencies.get('Test')).toBe(1); 116 | }); 117 | 118 | it('should handle text containing digits if ignoreCase is true', () => { 119 | const text = 'This sentence contains numbers like 123 and 456.'; 120 | const wordFrequencies = calculateWordFrequencies(text, true); 121 | 122 | expect(wordFrequencies.size).toBe(8); 123 | expect(wordFrequencies.get('this')).toBe(1); 124 | expect(wordFrequencies.get('sentence')).toBe(1); 125 | expect(wordFrequencies.get('contains')).toBe(1); 126 | expect(wordFrequencies.get('numbers')).toBe(1); 127 | expect(wordFrequencies.get('like')).toBe(1); 128 | expect(wordFrequencies.get('123')).toBe(1); 129 | expect(wordFrequencies.get('and')).toBe(1); 130 | expect(wordFrequencies.get('456')).toBe(1); 131 | }); 132 | 133 | it('should handle text containing digits if ignoreCase is false', () => { 134 | const text = 'This sentence contains numbers like 123 and 456.'; 135 | const wordFrequencies = calculateWordFrequencies(text, false); 136 | 137 | expect(wordFrequencies.size).toBe(8); 138 | expect(wordFrequencies.get('this')).toBe(undefined); 139 | expect(wordFrequencies.get('sentence')).toBe(1); 140 | expect(wordFrequencies.get('contains')).toBe(1); 141 | expect(wordFrequencies.get('numbers')).toBe(1); 142 | expect(wordFrequencies.get('like')).toBe(1); 143 | expect(wordFrequencies.get('123')).toBe(1); 144 | expect(wordFrequencies.get('and')).toBe(1); 145 | expect(wordFrequencies.get('456')).toBe(1); 146 | }); 147 | }); 148 | --------------------------------------------------------------------------------