├── src ├── types │ ├── index.ts │ └── options.ts ├── constants │ ├── index.ts │ ├── defaultOptions.ts │ └── defaultChars.ts ├── utils │ ├── preservation.ts │ └── charLevelReplace.ts ├── plugins │ ├── eol │ │ └── index.ts │ ├── lineBreaks │ │ └── index.ts │ ├── quotes │ │ └── index.ts │ ├── rlm │ │ └── index.ts │ ├── dashes │ │ └── index.ts │ ├── standardChars │ │ └── index.ts │ ├── diacritics │ │ ├── index.spec.ts │ │ └── index.ts │ ├── ellipsis │ │ ├── index.spec.ts │ │ └── index.ts │ ├── kashidas │ │ └── index.ts │ ├── dates │ │ └── index.ts │ ├── trim │ │ └── index.ts │ ├── numeralSymbols │ │ └── index.ts │ ├── html │ │ └── index.ts │ ├── nbsp │ │ └── index.ts │ ├── numbers │ │ └── index.ts │ ├── brace │ │ └── index.ts │ ├── entity │ │ └── index.ts │ ├── comment │ │ └── index.ts │ ├── bracket │ │ └── index.ts │ ├── frontmatter │ │ └── index.ts │ ├── marks │ │ └── index.ts │ ├── spaces │ │ └── index.ts │ ├── punctuations │ │ └── index.ts │ ├── braces │ │ └── index.ts │ ├── uri │ │ └── index.ts │ ├── chars │ │ └── index.ts │ └── zwnj │ │ └── index.ts └── steps │ ├── postfix.ts │ ├── restore.ts │ ├── preserve.ts │ ├── prefix.ts │ ├── fix.ts │ └── wordLevelFix.ts ├── images └── banner.png ├── .eslintignore ├── .prettierignore ├── .prettierrc ├── babel.config.js ├── index.spec.ts ├── .github └── workflows │ ├── greetings.yml │ ├── continuous-integration.yml │ └── publish.yml ├── jest.config.json ├── .eslintrc.js ├── LICENSE ├── .gitignore ├── package.json ├── index.ts ├── README.md └── tsconfig.json /src/types/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./options"; 2 | -------------------------------------------------------------------------------- /images/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Firastar/core/HEAD/images/banner.png -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- 1 | babel.config.js 2 | node_modules/* 3 | README.md 4 | jest.config.json 5 | dist/ -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | babel.config.js 2 | node_modules/* 3 | README.md 4 | jest.config.json 5 | dist/ -------------------------------------------------------------------------------- /src/constants/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./defaultOptions"; 2 | export * from "./defaultChars"; 3 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": false, 3 | "trailingComma": "all", 4 | "semi": true 5 | } -------------------------------------------------------------------------------- /src/utils/preservation.ts: -------------------------------------------------------------------------------- 1 | export function preservation(text: string): string { 2 | return ` __FIRAST__${text.toUpperCase()}__PRESERVATION__ `; 3 | } 4 | -------------------------------------------------------------------------------- /src/plugins/eol/index.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Replaces windows end of lines with unix eol 3 | */ 4 | 5 | export function fixEOL(text: string): string { 6 | return text.replace(/(\r?\n)|(\r\n?)/g, "\n"); 7 | } 8 | -------------------------------------------------------------------------------- /src/plugins/lineBreaks/index.ts: -------------------------------------------------------------------------------- 1 | export function fixLineBreaks(text: string): string { 2 | return ( 3 | text 4 | 5 | // cleans more than two contiguous line-breaks 6 | .replace(/\n{2,}/g, "\n\n") 7 | ); 8 | } 9 | -------------------------------------------------------------------------------- /src/plugins/quotes/index.ts: -------------------------------------------------------------------------------- 1 | export function fixEnglishQuotes(text: string): string { 2 | return ( 3 | text 4 | // replaces english quote marks with their persian equivalent 5 | .replace(/([“"'`]+)(.+?)(\1)/g, "«$2»") 6 | ); 7 | } 8 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | ['@babel/preset-env', { 4 | targets: { 5 | node: 'current' 6 | } 7 | }], 8 | +'@babel/preset-typescript', 9 | ], 10 | }; -------------------------------------------------------------------------------- /src/plugins/rlm/index.ts: -------------------------------------------------------------------------------- 1 | export function removeRLM(text: string): string { 2 | return ( 3 | text 4 | /* converts Right-to-left marks followed by persian characters to zero-width non-joiners (ZWNJ) */ 5 | .replace(/([^a-zA-Z\-_])(\u200F)/g, "$1\u200c") 6 | ); 7 | } 8 | -------------------------------------------------------------------------------- /src/plugins/dashes/index.ts: -------------------------------------------------------------------------------- 1 | export function fixDashes(text: string): string { 2 | return ( 3 | text 4 | // replaces triple dash to mdash 5 | .replace(/-{3}/g, "—") 6 | 7 | // replaces double dash to ndash 8 | .replace(/-{2}/g, "–") 9 | ); 10 | } 11 | -------------------------------------------------------------------------------- /src/plugins/standardChars/index.ts: -------------------------------------------------------------------------------- 1 | export function fixStandardChars(text: string, persianGlyphs: any): string { 2 | for (const i in persianGlyphs) { 3 | if (Object.prototype.hasOwnProperty.call(persianGlyphs, i)) { 4 | text = text.replace(new RegExp("[" + persianGlyphs[i] + "]", "g"), i); 5 | } 6 | } 7 | return text; 8 | } 9 | -------------------------------------------------------------------------------- /src/plugins/diacritics/index.spec.ts: -------------------------------------------------------------------------------- 1 | import { fixDiacritics, removeDiacritics } from "."; 2 | 3 | describe("Diacritics", () => { 4 | test("removeDiacritics", () => { 5 | expect(removeDiacritics("سَلام اُمیدِ مَن")).toBe("سلام امید من"); 6 | }); 7 | 8 | test("fixDiacritics", () => { 9 | expect(fixDiacritics("سَِلام ِ")).toBe("سِلامِ"); 10 | }); 11 | }); 12 | -------------------------------------------------------------------------------- /src/plugins/ellipsis/index.spec.ts: -------------------------------------------------------------------------------- 1 | import { fixEllipsis } from "."; 2 | 3 | describe("Ellipsis", () => { 4 | test("fixEllipsis", () => { 5 | expect(fixEllipsis("سلام . . .")).toBe("سلام… "); 6 | expect(fixEllipsis("سلام....")).toBe("سلام… "); 7 | expect(fixEllipsis("سلام....")).toBe("سلام… "); 8 | expect(fixEllipsis("سلام……")).toBe("سلام… "); 9 | }); 10 | }); 11 | -------------------------------------------------------------------------------- /index.spec.ts: -------------------------------------------------------------------------------- 1 | import { firast } from "./index"; 2 | 3 | describe("Firast", () => { 4 | test("cleanup simple sentences", () => { 5 | expect( 6 | firast( 7 | "فيراستار به شما كمك مي كند تا متون فارسي زيبا تر و درست تري بنويسيد .", 8 | ), 9 | ).toBe( 10 | "فیراستار به شما کمک می‌کند تا متون فارسی زیباتر و درست‌تری بنویسید.", 11 | ); 12 | }); 13 | }); 14 | -------------------------------------------------------------------------------- /src/plugins/kashidas/index.ts: -------------------------------------------------------------------------------- 1 | export function removeKashidas(text: string): string { 2 | return ( 3 | text 4 | // converts kashida between numbers to ndash 5 | .replace(/([0-9۰-۹]+)ـ+([0-9۰-۹]+)/g, "$1–$2") 6 | 7 | // removes all kashidas between non-whitespace characters 8 | // MAYBE: more punctuations 9 | .replace(/([^\s.])\u0640+(?![\s.])/g, "$1") 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /src/steps/postfix.ts: -------------------------------------------------------------------------------- 1 | import { trim } from "../plugins/trim"; 2 | import { IOptions } from "../types"; 3 | 4 | export default function postfix(options: IOptions, text: string): string { 5 | if (options.trim) { 6 | text = trim(text); 7 | } else { 8 | // removes single space paddings around the string 9 | text = text.replace(/^[ ]/g, "").replace(/[ ]$/g, ""); 10 | } 11 | return text; 12 | } 13 | -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | issue-message: 'Message that will be displayed on users'' first issue' 13 | pr-message: 'Message that will be displayed on users'' first pr' 14 | -------------------------------------------------------------------------------- /jest.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "transform": { 3 | "^.+\\.tsx?$": "/node_modules/ts-jest/preprocessor.js" 4 | }, 5 | "testEnvironment": "node", 6 | "collectCoverage": true, 7 | "collectCoverageFrom": [ 8 | "!**/*.d.ts", 9 | "!**/node_modules/**", 10 | "src/**/*.ts" 11 | ], 12 | "verbose": true, 13 | "moduleFileExtensions": [ 14 | "ts", 15 | "js" 16 | ] 17 | } -------------------------------------------------------------------------------- /src/utils/charLevelReplace.ts: -------------------------------------------------------------------------------- 1 | export function charLevelReplace( 2 | text: string, 3 | from: string, 4 | to: string, 5 | ): string { 6 | const fromChars = from.split(""); 7 | const toChars = to.split(""); 8 | for (const i in fromChars) { 9 | if (Object.prototype.hasOwnProperty.call(fromChars, i)) { 10 | text = text.replace(new RegExp(fromChars[i], "g"), toChars[i]); 11 | } 12 | } 13 | return text; 14 | } 15 | -------------------------------------------------------------------------------- /src/plugins/dates/index.ts: -------------------------------------------------------------------------------- 1 | export function fixDates(text: string): string { 2 | return ( 3 | text 4 | 5 | // re-orders date parts with slash as delimiter 6 | .replace(/([0-9۰-۹]{1,2})([/-])([0-9۰-۹]{1,2})\2([0-9۰-۹]{4})/g, function( 7 | matched, 8 | day, 9 | delimiter, 10 | month, 11 | year, 12 | ) { 13 | return year + "/" + month + "/" + day; 14 | }) 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /src/plugins/trim/index.ts: -------------------------------------------------------------------------------- 1 | export function trim(text: string): string { 2 | return ( 3 | text 4 | 5 | // removes space/tab/zwnj/nbsp from the beginning of the new-lines 6 | .replace(/([\n]+)[ \t\u200c\u00a0]*/g, "$1") 7 | 8 | // removes spaces, tabs, zwnj, direction marks and new lines from 9 | // the beginning and end of text 10 | // @REF: http://stackoverflow.com/a/38490203 11 | .replace(/^[\s\u200c\u200e\u200f]+|[\s\u200c\u200e\u200f]+$/g, "") 12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /src/plugins/ellipsis/index.ts: -------------------------------------------------------------------------------- 1 | export function fixEllipsis(text: string): string { 2 | return ( 3 | text 4 | // removes spaces between dots 5 | .replace(/\.([ ]+)(?=[.])/g, ".") 6 | 7 | // replaces three dots with ellipsis character 8 | .replace(/[ \t]*\.{3,}/g, "…") 9 | 10 | // replaces more than one ellipsis with one 11 | .replace(/(…){2,}/g, "…") 12 | 13 | // replaces (space|tab|zwnj) after ellipsis with one space 14 | .replace(/([ ]{1,})*…[ \t\u200c]*/g, "$1… ") 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /src/plugins/numeralSymbols/index.ts: -------------------------------------------------------------------------------- 1 | export function fixNumeralSymbols(text: string): string { 2 | return ( 3 | text 4 | 5 | // replaces english percent signs (U+066A) 6 | // @Ref ebraminio/persiantools 7 | .replace(/([۰-۹]) ?%/g, "$1٪") 8 | 9 | // replaces dots between numbers into decimal separator (U+066B) 10 | // @Ref ebraminio/persiantools 11 | .replace(/([۰-۹])\.(?=[۰-۹])/g, "$1٫") 12 | 13 | // replaces commas between numbers into thousands separator (U+066C) 14 | // @Ref languagetool-org 15 | .replace(/([۰-۹]),(?=[۰-۹])/g, "$1٬") 16 | ); 17 | } 18 | -------------------------------------------------------------------------------- /src/plugins/html/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const html: string[] = []; 4 | 5 | // preserves all html tags in the text 6 | export function preserveHtml(text: string): string { 7 | return text.replace(/<\/?[a-z][^>]*?>/gi, function(matched) { 8 | html.push(matched); 9 | return preservation("html"); 10 | }); 11 | } 12 | 13 | // bringing back HTML tags 14 | export function restoreHtml(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("html").trim()}[ ]?`, "g"), 17 | () => html.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/nbsp/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const nbsp: string[] = []; 4 | 5 | // preserves all no-break space entities in the text 6 | export function preserveNbsp(text: string): string { 7 | return text.replace(/(\[.*?\])/g, function(matched) { 8 | nbsp.push(matched); 9 | return preservation("nbsp"); 10 | }); 11 | } 12 | 13 | // bringing back nbsp 14 | export function restoreNbsp(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("nbsp").trim()}[ ]?`, "g"), 17 | () => nbsp.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/numbers/index.ts: -------------------------------------------------------------------------------- 1 | import { arabicDigits, englishDigits, persianDigits } from "../../constants"; 2 | import { charLevelReplace } from "../../utils/charLevelReplace"; 3 | 4 | export function fixArabicNumbers(text: string): string { 5 | return ( 6 | // replaces arabic numbers with their persian equivalent 7 | charLevelReplace(text, arabicDigits, persianDigits) 8 | ); 9 | } 10 | 11 | export function fixEnglishNumbers(text: string): string { 12 | return ( 13 | // replaces english numbers with their persian equivalent 14 | charLevelReplace(text, englishDigits, persianDigits) 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /src/plugins/brace/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const brace: string[] = []; 4 | 5 | // preserves strings inside curly braces (`{}`) 6 | export function preserveBrace(text: string): string { 7 | return text.replace(/(\[.*?\])/g, function(matched) { 8 | brace.push(matched); 9 | return preservation("brace"); 10 | }); 11 | } 12 | 13 | // bringing back braces 14 | export function restoreBrace(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("brace").trim()}[ ]?`, "g"), 17 | () => brace.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/entity/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const entity: string[] = []; 4 | 5 | // preserves all html entities in the text 6 | export function preserveEntity(text: string): string { 7 | return text.replace(/&(#?[^;\W]+;?)/g, function(matched) { 8 | entity.push(matched); 9 | return preservation("entity"); 10 | }); 11 | } 12 | 13 | // bringing back entities 14 | export function restoreEntity(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("entity").trim()}[ ]?`, "g"), 17 | () => entity.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/comment/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const comment: string[] = []; 4 | 5 | // preserves all html comments in the text 6 | export function preserveComment(text: string): string { 7 | return text.replace(//g, function(matched) { 8 | comment.push(matched); 9 | return preservation("comment"); 10 | }); 11 | } 12 | 13 | // bringing back HTML comments 14 | export function restoreComment(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("comment").trim()}[ ]?`, "g"), 17 | () => comment.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/bracket/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const bracket: string[] = []; 4 | 5 | // preserves strings inside square brackets (`[]`) 6 | export function preserveBracket(text: string): string { 7 | return text.replace(//g, function(matched) { 8 | bracket.push(matched); 9 | return preservation("bracket"); 10 | }); 11 | } 12 | 13 | // bringing back brackets 14 | export function restoreBracket(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("bracket").trim()}[ ]?`, "g"), 17 | () => bracket.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/frontmatter/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | export const frontmatter: string[] = []; 4 | 5 | // preserves frontmatter data in the markdown 6 | export function preserveFrontmatter(text: string): string { 7 | return text.replace(/^ ---[\S\s]*?---\n/g, function(matched) { 8 | frontmatter.push(matched); 9 | return preservation("frontmatter"); 10 | }); 11 | } 12 | 13 | // bringing back frontmatter 14 | export function restoreFrontmatter(text: string): string { 15 | return text.replace( 16 | new RegExp(`[ ]?${preservation("frontmatter").trim()}[ ]?`, "g"), 17 | () => frontmatter.shift() as string, 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/marks/index.ts: -------------------------------------------------------------------------------- 1 | export function fixQuestionMarks(text: string): string { 2 | return text.replace(/(\?)/g, "؟"); 3 | } 4 | 5 | export function removeExtraMarks(text: string): string { 6 | return ( 7 | text 8 | 9 | // removes space between different/same marks (combining for cleanup) 10 | .replace(/([؟?!])([ ]+)(?=[؟?!])/g, "$1") 11 | 12 | // replaces more than one exclamation mark with just one 13 | .replace(/(!){2,}/g, "$1") 14 | // replaces more than one english or persian question mark with just one 15 | .replace(/(\u061F|\?){2,}/g, "$1") // \u061F = `؟` 16 | // re-orders consecutive marks 17 | .replace(/(!)([ \t]*)([\u061F?])/g, "$3$1") // `?!` --> `!?` 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /src/plugins/spaces/index.ts: -------------------------------------------------------------------------------- 1 | export function removeSpaces(text: string): string { 2 | return ( 3 | text 4 | 5 | // replaces more than one space with just a single one 6 | // except before/after preservers and before new-lines 7 | .replace(/([^_])([ ]{2,})(?![_]{2}|\n)/g, "$1 ") 8 | 9 | // cleans whitespace/zwnj between new-lines 10 | // @REF: https://stackoverflow.com/a/10965543/ 11 | .replace(/\n[\s\u200c]*\n/g, "\n\n") 12 | ); 13 | } 14 | 15 | export function fixMiscSpacing(text: string): string { 16 | return ( 17 | text 18 | 19 | // removes space before parentheses on misc cases 20 | .replace(/ \((ص|عج|س|ع|ره)\)/g, "($1)") 21 | 22 | // removes space before braces containing numbers 23 | .replace(/ \[([0-9۰-۹]+)\]/g, "[$1]") 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | env: { 3 | browser: true, 4 | commonjs: false, 5 | es6: true, 6 | node: true, 7 | jest: true 8 | }, 9 | extends: [ 10 | "eslint:recommended", 11 | "plugin:@typescript-eslint/eslint-recommended", 12 | "plugin:@typescript-eslint/recommended" 13 | ], 14 | parser: '@typescript-eslint/parser', 15 | parserOptions: { 16 | ecmaVersion: 2018, 17 | sourceType: 'module' 18 | }, 19 | rules: { 20 | "arrow-body-style": 2, 21 | "semi": ["error", "always"], 22 | "quotes": ["error", "double"], 23 | "prefer-const": 1, 24 | "@typescript-eslint/no-unused-vars": "off", 25 | "@typescript-eslint/no-explicit-any": "off", 26 | "@typescript-eslint/explicit-module-boundary-types": "off" 27 | } 28 | }; -------------------------------------------------------------------------------- /src/plugins/diacritics/index.ts: -------------------------------------------------------------------------------- 1 | import { persianDiacritic } from "../../constants"; 2 | 3 | export function fixDiacritics(text: string): string { 4 | return ( 5 | text 6 | // cleans zwnj before diacritic characters 7 | .replace(new RegExp("\u200c([" + persianDiacritic + "])", "g"), "$1") 8 | 9 | // cleans more than one diacritic characters 10 | .replace( 11 | new RegExp("(.*)([" + persianDiacritic + "]){2,}(.*)", "g"), 12 | "$1$2$3", 13 | ) 14 | 15 | // cleans spaces before diacritic characters 16 | .replace(new RegExp("(\\S)[ ]+([" + persianDiacritic + "])", "g"), "$1$2") 17 | ); 18 | } 19 | 20 | export function removeDiacritics(text: string): string { 21 | return ( 22 | text 23 | 24 | // removes all diacritic characters 25 | .replace(new RegExp("[" + persianDiacritic + "]+", "g"), "") 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /src/plugins/punctuations/index.ts: -------------------------------------------------------------------------------- 1 | import { charLevelReplace } from "../../utils/charLevelReplace"; 2 | 3 | export function fixPunctuations(text: string): string { 4 | return charLevelReplace(text, ",;", "،؛"); 5 | } 6 | 7 | export function fixPunctuationSpacing(text: string): string { 8 | return ( 9 | text 10 | // removes space before punctuations 11 | .replace(/[ \t\u200c]*([:;,؛،.؟?!]{1})/g, "$1") 12 | 13 | // removes more than one space after punctuations 14 | // except followed by new-lines (or preservers) 15 | .replace(/([:;,؛،.؟?!]{1})[ \t\u200c]*(?!\n|_{2})/g, "$1 ") 16 | 17 | // removes space after colon that separates time parts 18 | .replace(/([0-9۰-۹]+):\s+([0-9۰-۹]+)/g, "$1:$2") 19 | 20 | // removes space after dots in numbers 21 | .replace(/([0-9۰-۹]+)\. ([0-9۰-۹]+)/g, "$1.$2") 22 | 23 | // removes space before common domain tlds 24 | .replace( 25 | /([\w\-_]+)\. (ir|com|org|net|info|edu|me)([\s/\\\])»:;.])/g, 26 | "$1.$2$3", 27 | ) 28 | 29 | // removes space between different/same marks (double-check) 30 | .replace(/([؟?!])([ ]+)(?=[؟?!])/g, "$1") 31 | ); 32 | } 33 | -------------------------------------------------------------------------------- /src/constants/defaultOptions.ts: -------------------------------------------------------------------------------- 1 | import { IOptions } from "../types"; 2 | 3 | export const defaultOptions: IOptions = { 4 | preserveFrontmatter: true, 5 | preserveHtml: true, 6 | preserveComment: true, 7 | preserveBrace: true, 8 | preserveBracket: true, 9 | preserveUri: true, 10 | preserveNbsp: true, 11 | preserveEntity: true, 12 | fixEOL: true, 13 | fixStandardChars: true, 14 | fixDashes: true, 15 | fixEllipsis: true, 16 | fixEnglishQuotes: true, 17 | removeRLM: true, 18 | fixZWNJ: true, 19 | fixArabicNumbers: true, 20 | skipMarkdownOrderedListsNumbersConversion: true, 21 | fixEnglishNumbers: true, 22 | fixNumeralSymbols: true, 23 | fixPunctuations: true, 24 | fixNonPersianChars: true, 25 | fixQuestionMarks: true, 26 | fixDates: true, 27 | 28 | fixHamzeh: true, 29 | fixArabicHamzeh: true, 30 | fixSuffixZwnj: true, 31 | fixPrefixZwnj: true, 32 | 33 | fixBracesSpacing: true, 34 | fixBracesSpacingInside: true, 35 | removeExtraMarks: true, 36 | fixPunctuationSpacing: true, 37 | removeKashidas: true, 38 | fixMiscSpacing: true, 39 | removeDiacritics: true, 40 | fixDiacritics: true, 41 | removeSpaces: true, 42 | 43 | fixLineBreaks: true, 44 | trim: true, 45 | }; 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Allen A. Bargi 2 | Copyright (c) 2019 Brothers in Code 3 | Copyright (c) 2020 Firast 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /src/types/options.ts: -------------------------------------------------------------------------------- 1 | export interface IOptions { 2 | preserveFrontmatter: boolean; 3 | preserveHtml: boolean; 4 | preserveComment: boolean; 5 | preserveBrace: boolean; 6 | preserveBracket: boolean; 7 | preserveUri: boolean; 8 | preserveNbsp: boolean; 9 | preserveEntity: boolean; 10 | fixEOL: boolean; 11 | fixStandardChars: boolean; 12 | fixDashes: boolean; 13 | fixEllipsis: boolean; 14 | fixEnglishQuotes: boolean; 15 | removeRLM: boolean; 16 | fixZWNJ: boolean; 17 | fixArabicNumbers: boolean; 18 | skipMarkdownOrderedListsNumbersConversion: boolean; 19 | fixEnglishNumbers: boolean; 20 | fixNumeralSymbols: boolean; 21 | fixPunctuations: boolean; 22 | fixNonPersianChars: boolean; 23 | fixQuestionMarks: boolean; 24 | fixDates: boolean; 25 | 26 | fixHamzeh: boolean; 27 | fixArabicHamzeh: boolean; 28 | fixSuffixZwnj: boolean; 29 | fixPrefixZwnj: boolean; 30 | 31 | fixBracesSpacing: boolean; 32 | fixBracesSpacingInside: boolean; 33 | removeExtraMarks: boolean; 34 | fixPunctuationSpacing: boolean; 35 | removeKashidas: boolean; 36 | fixMiscSpacing: boolean; 37 | removeDiacritics: boolean; 38 | fixDiacritics: boolean; 39 | removeSpaces: boolean; 40 | 41 | fixLineBreaks: boolean; 42 | trim: boolean; 43 | } 44 | -------------------------------------------------------------------------------- /src/steps/restore.ts: -------------------------------------------------------------------------------- 1 | import { restoreBrace } from "../plugins/brace"; 2 | import { restoreBracket } from "../plugins/bracket"; 3 | import { restoreComment } from "../plugins/comment"; 4 | import { restoreEntity } from "../plugins/entity"; 5 | import { restoreFrontmatter } from "../plugins/frontmatter"; 6 | import { restoreHtml } from "../plugins/html"; 7 | import { restoreNbsp } from "../plugins/nbsp"; 8 | import { restoreUri } from "../plugins/uri"; 9 | import { IOptions } from "../types"; 10 | 11 | export default function restore(options: IOptions, text: string): string { 12 | if (options.preserveEntity) { 13 | text = restoreEntity(text); 14 | } 15 | 16 | if (options.preserveNbsp) { 17 | text = restoreNbsp(text); 18 | } 19 | 20 | if (options.preserveUri) { 21 | text = restoreUri(text); 22 | } 23 | 24 | if (options.preserveBrace) { 25 | text = restoreBrace(text); 26 | } 27 | 28 | if (options.preserveBracket) { 29 | text = restoreBracket(text); 30 | } 31 | 32 | if (options.preserveComment) { 33 | text = restoreComment(text); 34 | } 35 | 36 | if (options.preserveHtml) { 37 | text = restoreHtml(text); 38 | } 39 | 40 | if (options.preserveFrontmatter) { 41 | text = restoreFrontmatter(text); 42 | } 43 | return text; 44 | } 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | builds/ 34 | 35 | 36 | # Dependency directories 37 | node_modules/ 38 | jspm_packages/ 39 | dist/ 40 | coverage/ 41 | 42 | # TypeScript v1 declaration files 43 | typings/ 44 | 45 | # Optional npm cache directory 46 | .npm 47 | 48 | # Optional eslint cache 49 | .eslintcache 50 | 51 | # Optional REPL history 52 | .node_repl_history 53 | 54 | # Output of 'npm pack' 55 | *.tgz 56 | 57 | # Yarn Integrity file 58 | .yarn-integrity 59 | 60 | # dotenv environment variables file 61 | .env 62 | 63 | # next.js build output 64 | .next 65 | 66 | # mac store 67 | .DS_Store 68 | 69 | # CI/CD 70 | 71 | 72 | # IDEA 73 | .vscode 74 | 75 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@firastar/firastar-js", 3 | "version": "0.1.2", 4 | "description": "Write your best with Firastar in JS", 5 | "main": "dist/index.js", 6 | "types": "dist/index.d.ts", 7 | "scripts": { 8 | "test": "jest", 9 | "build": "tsc", 10 | "lint": "eslint \"**/*.ts\"", 11 | "prettier:write": "prettier --write \"**/*.ts\"", 12 | "prettier:check": "prettier --check \"**/*.ts\"" 13 | }, 14 | "repository": { 15 | "type": "git", 16 | "url": "https://github.com/Firastar/firastar-js.git" 17 | }, 18 | "homepage": "https://github.com/Firastar/firastar-js#readme", 19 | "keywords": [ 20 | "js", 21 | "firastar", 22 | "edit", 23 | "javascript" 24 | ], 25 | "license": "MIT", 26 | "dependencies": {}, 27 | "devDependencies": { 28 | "@babel/preset-typescript": "^7.10.4", 29 | "@types/jest": "^25.1.0", 30 | "@types/node": "^9.4.6", 31 | "@typescript-eslint/eslint-plugin": "^4.3.0", 32 | "@typescript-eslint/parser": "^2.18.0", 33 | "codecov": "^3.6.5", 34 | "eslint": "^6.7.2", 35 | "eslint-plugin-import": "^2.19.1", 36 | "jest": "^25.1.0", 37 | "prettier": "1.19.1", 38 | "ts-jest": "^22.4.1", 39 | "typescript": "^3.7.5" 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/constants/defaultChars.ts: -------------------------------------------------------------------------------- 1 | /* 2 | @Ref https://github.com/ebraminio/persiantools 3 | */ 4 | 5 | export const persianDiacritic = "ًٌٍَُِّْ"; 6 | export const persianChars = "ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك"; 7 | export const persianDigits = "۱۲۳۴۵۶۷۸۹۰"; 8 | export const arabicDigits = "١٢٣٤٥٦٧٨٩٠"; 9 | export const englishDigits = "1234567890"; 10 | 11 | /* eslint-disable */ 12 | // prettier-ignore 13 | export const persianGlyphs = { 14 | '\u200cه': 'ﻫ', 15 | 'ی\u200c': 'ﻰﻲ', 16 | 'ﺃ': 'ﺄﺃ', 17 | 'ﺁ': 'ﺁﺂ', 18 | 'ﺇ': 'ﺇﺈ', 19 | 'ا': 'ﺎا', 20 | 'ب': 'ﺏﺐﺑﺒ', 21 | 'پ': 'ﭖﭗﭘﭙ', 22 | 'ت': 'ﺕﺖﺗﺘ', 23 | 'ث': 'ﺙﺚﺛﺜ', 24 | 'ج': 'ﺝﺞﺟﺠ', 25 | 'چ': 'ﭺﭻﭼﭽ', 26 | 'ح': 'ﺡﺢﺣﺤ', 27 | 'خ': 'ﺥﺦﺧﺨ', 28 | 'د': 'ﺩﺪ', 29 | 'ذ': 'ﺫﺬ', 30 | 'ر': 'ﺭﺮ', 31 | 'ز': 'ﺯﺰ', 32 | 'ژ': 'ﮊﮋ', 33 | 'س': 'ﺱﺲﺳﺴ', 34 | 'ش': 'ﺵﺶﺷﺸ', 35 | 'ص': 'ﺹﺺﺻﺼ', 36 | 'ض': 'ﺽﺾﺿﻀ', 37 | 'ط': 'ﻁﻂﻃﻄ', 38 | 'ظ': 'ﻅﻆﻇﻈ', 39 | 'ع': 'ﻉﻊﻋﻌ', 40 | 'غ': 'ﻍﻎﻏﻐ', 41 | 'ف': 'ﻑﻒﻓﻔ', 42 | 'ق': 'ﻕﻖﻗﻘ', 43 | 'ک': 'ﮎﮏﮐﮑﻙﻚﻛﻜ', 44 | 'گ': 'ﮒﮓﮔﮕ', 45 | 'ل': 'ﻝﻞﻟﻠ', 46 | 'م': 'ﻡﻢﻣﻤ', 47 | 'ن': 'ﻥﻦﻧﻨ', 48 | 'ه': 'ﻩﻪﻫﻬ', 49 | 'هٔ': 'ﮤﮥ', 50 | 'و': 'ﻭﻮ', 51 | 'ﺅ': 'ﺅﺆ', 52 | 'ی': 'ﯼﯽﯾﯿﻯﻰﻱﻲﻳﻴ', 53 | 'ئ': 'ﺉﺊﺋﺌ', 54 | 'لا': 'ﻼ', 55 | 'ﻹ': 'ﻺ', 56 | 'ﻷ': 'ﻸ', 57 | 'ﻵ': 'ﻶ' 58 | }; 59 | -------------------------------------------------------------------------------- /src/steps/preserve.ts: -------------------------------------------------------------------------------- 1 | import { preserveBrace } from "../plugins/brace"; 2 | import { preserveBracket } from "../plugins/bracket"; 3 | import { preserveComment } from "../plugins/comment"; 4 | import { preserveEntity } from "../plugins/entity"; 5 | import { preserveFrontmatter } from "../plugins/frontmatter"; 6 | import { preserveHtml } from "../plugins/html"; 7 | import { preserveNbsp } from "../plugins/nbsp"; 8 | import { preserveUri } from "../plugins/uri"; 9 | import { IOptions } from "../types"; 10 | 11 | export default function preserve(options: IOptions, text: string): string { 12 | if (options.preserveFrontmatter) { 13 | text = preserveFrontmatter(text); 14 | } 15 | 16 | if (options.preserveHtml) { 17 | text = preserveHtml(text); 18 | } 19 | 20 | if (options.preserveComment) { 21 | text = preserveComment(text); 22 | } 23 | 24 | if (options.preserveBrace) { 25 | text = preserveBrace(text); 26 | } 27 | 28 | if (options.preserveBracket) { 29 | text = preserveBracket(text); 30 | } 31 | 32 | if (options.preserveUri) { 33 | text = preserveUri(text); 34 | } 35 | 36 | if (options.preserveNbsp) { 37 | text = preserveNbsp(text); 38 | } 39 | 40 | if (options.preserveNbsp) { 41 | text = preserveNbsp(text); 42 | } 43 | 44 | if (options.preserveEntity) { 45 | text = preserveEntity(text); 46 | } 47 | 48 | return text; 49 | } 50 | -------------------------------------------------------------------------------- /.github/workflows/continuous-integration.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [12.x] 17 | 18 | steps: 19 | - name: Checkout repository 20 | uses: actions/checkout@v2 21 | 22 | - name: Set up Node.js ${{ matrix.node-version }} 23 | uses: actions/setup-node@v1 24 | with: 25 | node-version: ${{ matrix.node-version }} 26 | 27 | - name: Cache dependencies 28 | uses: actions/cache@v2 29 | with: 30 | path: | 31 | **/node_modules 32 | key: ${{ runner.os }}-${{ hashFiles('**/package-lock.json') }} 33 | 34 | - name: Install dependencies 35 | run: npm install 36 | 37 | - name: Run the lints 38 | run: npm run lint 39 | 40 | - name: Run Prettier 41 | run: npm run prettier:check 42 | 43 | - name: Run the tests 44 | run: npm run test 45 | 46 | - name: Upload coverage to Codecov 47 | uses: codecov/codecov-action@v1 48 | 49 | - name: Build 50 | run: npm run build 51 | 52 | - name: Archive production 53 | uses: actions/upload-artifact@v2 54 | with: 55 | name: dist 56 | path: | 57 | dist 58 | !dist/**/*.md 59 | -------------------------------------------------------------------------------- /src/plugins/braces/index.ts: -------------------------------------------------------------------------------- 1 | export function fixBracesSpacing(text: string): string { 2 | const replacement = " $1$2$3 "; 3 | return ( 4 | text 5 | // removes inside spaces and more than one outside 6 | // for `()`, `[]`, `{}`, `“”` and `«»` 7 | .replace(/[ \t\u200c]*(\()\s*([^)]+?)\s*?(\))[ \t\u200c]*/g, replacement) 8 | .replace(/[ \t\u200c]*(\[)\s*([^\]]+?)\s*?(\])[ \t\u200c]*/g, replacement) 9 | .replace(/[ \t\u200c]*(\{)\s*([^}]+?)\s*?(\})[ \t\u200c]*/g, replacement) 10 | .replace(/[ \t\u200c]*(“)\s*([^”]+?)\s*?(”)[ \t\u200c]*/g, replacement) 11 | .replace(/[ \t\u200c]*(«)\s*([^»]+?)\s*?(»)[ \t\u200c]*/g, replacement) 12 | ); 13 | } 14 | 15 | export function fixBracesSpacingInside(text: string): string { 16 | const replacement = "$1$2$3"; 17 | return ( 18 | text 19 | // removes inside spaces for `()`, `[]`, `{}`, `“”` and `«»` 20 | .replace(/(\()\s*([^)]+?)\s*?(\))/g, replacement) 21 | .replace(/(\[)\s*([^\]]+?)\s*?(\])/g, replacement) 22 | .replace(/(\{)\s*([^}]+?)\s*?(\})/g, replacement) 23 | .replace(/(“)\s*([^”]+?)\s*?(”)/g, replacement) 24 | .replace(/(«)\s*([^»]+?)\s*?(»)/g, replacement) 25 | 26 | // NOTE: must be here, weird not working if on `markdownNormalizeBraces()` 27 | // removes markdown link spaces inside normal () 28 | .replace(/(\(\[.*?\]\(.*?\))\s+(\))/g, "$1$2") 29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import { IOptions } from "./src/types"; 2 | import { defaultOptions, persianGlyphs } from "./src/constants"; 3 | import preserve from "./src/steps/preserve"; 4 | import prefix from "./src/steps/prefix"; 5 | import wordLevelFix from "./src/steps/wordLevelFix"; 6 | import fix from "./src/steps/fix"; 7 | import restore from "./src/steps/restore"; 8 | import postfix from "./src/steps/postfix"; 9 | 10 | export function firast( 11 | text: string, 12 | options: Partial = defaultOptions, 13 | ): string { 14 | // Complete options obj 15 | const combinedOptions = { ...defaultOptions, ...options }; 16 | 17 | if (!text.trim()) { 18 | return text; 19 | } 20 | 21 | text = " " + text + " "; 22 | 23 | // --- START OF PRESERVE 24 | text = preserve(combinedOptions, text); 25 | // --- END OF PRESERVE 26 | 27 | // --- START OF PRE-FIX 28 | text = prefix(combinedOptions, text); 29 | // --- END OF PRE-FIX 30 | 31 | // --- START OF WORD-LEVEL-FIX 32 | text = wordLevelFix(combinedOptions, text); 33 | // --- END OF WORD-LEVEL-FIX 34 | 35 | // --- START OF FIX 36 | text = fix(combinedOptions, text); 37 | // --- END OF FIX 38 | 39 | // --- START OF RESTORE 40 | text = restore(combinedOptions, text); 41 | // --- END OF RESTORE 42 | 43 | // --- START OF POST-FIX 44 | text = postfix(combinedOptions, text); 45 | // --- END OF POST-FIX 46 | 47 | return text; 48 | } 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 |

3 | 4 |

5 |

Firastar

6 |

Write your best for JS.

7 | 8 | 9 | ![CI/CD](https://github.com/Firastar/firastar-js/workflows/Continuous%20Integration/badge.svg) 10 | [![codecov](https://codecov.io/gh/Firastar/firastar-js/branch/master/graph/badge.svg)](https://codecov.io/gh/Firastar/firastar-js) 11 | [![CodeFactor](https://www.codefactor.io/repository/github/Firastar/firastar-js/badge)](https://www.codefactor.io/repository/github/Firastar/firastar-js) 12 | [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/Firastar/firastar-js/blob/master/LICENSE) 13 | [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-orange.svg)](https://github.com/Firastar/firastar-js/compare) 14 | 15 |
16 |
17 | 18 | ## Todo 19 | 20 | - [ ] Write a document 21 | - [ ] Increase the test coverage 22 | - [ ] Write more tests 23 | - [ ] Implement isPersian text detector func 24 | 25 | ## Contributing 26 | 27 | Thank you for your interest in contributing! Please feel free to put up a PR for any issue or feature request. 28 | 29 | ## Give me a Star 30 | 31 | If you think this project is helpful just give me a ⭐️ Star is enough because i don't drink coffee 😃 32 | 33 | ## License 34 | 35 | This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/Firastar/firastar-js/blob/master/LICENSE) file for details. 36 | -------------------------------------------------------------------------------- /src/steps/prefix.ts: -------------------------------------------------------------------------------- 1 | import { persianGlyphs } from "../constants"; 2 | import { 3 | fixArabicHamzeh, 4 | fixHamzeh, 5 | fixSuffixSpacingHamzeh, 6 | removeArabicHamzeh, 7 | } from "../plugins/chars"; 8 | import { fixDashes } from "../plugins/dashes"; 9 | import { fixEllipsis } from "../plugins/ellipsis"; 10 | import { fixEOL } from "../plugins/eol"; 11 | import { fixArabicNumbers } from "../plugins/numbers"; 12 | import { fixEnglishQuotes } from "../plugins/quotes"; 13 | import { removeRLM } from "../plugins/rlm"; 14 | import { fixStandardChars } from "../plugins/standardChars"; 15 | import { fixZWNJ } from "../plugins/zwnj"; 16 | import { IOptions } from "../types"; 17 | 18 | export default function prefix(options: IOptions, text: string): string { 19 | if (options.fixEOL) { 20 | text = fixEOL(text); 21 | } 22 | 23 | if (options.fixStandardChars) { 24 | text = fixStandardChars(text, persianGlyphs); 25 | } 26 | 27 | if (options.fixDashes) { 28 | text = fixDashes(text); 29 | } 30 | 31 | if (options.fixEllipsis) { 32 | text = fixEllipsis(text); 33 | } 34 | 35 | if (options.fixEnglishQuotes) { 36 | text = fixEnglishQuotes(text); 37 | } 38 | 39 | if (options.fixHamzeh) { 40 | if (options.fixArabicHamzeh) { 41 | text = fixArabicHamzeh(text); 42 | } 43 | 44 | text = fixHamzeh(text); 45 | } else if (options.fixSuffixZwnj) { 46 | if (options.fixArabicHamzeh) { 47 | text = removeArabicHamzeh(text); 48 | } 49 | 50 | text = fixSuffixSpacingHamzeh(text); 51 | } 52 | 53 | if (options.removeRLM) { 54 | text = removeRLM(text); 55 | } 56 | 57 | if (options.fixZWNJ) { 58 | text = fixZWNJ(text); 59 | } 60 | 61 | if (options.fixArabicNumbers) { 62 | text = fixArabicNumbers(text); 63 | } 64 | return text; 65 | } 66 | -------------------------------------------------------------------------------- /src/plugins/uri/index.ts: -------------------------------------------------------------------------------- 1 | import { preservation } from "../../utils/preservation"; 2 | 3 | // @Ref https://github.com/jhermsmeier/uri.regex 4 | const patternURI = 5 | "([A-Za-z][A-Za-z0-9+\\-.]*):(?:(//)(?:((?:[A-Za-z0-9\\-._~!$&'()*+,;=:]|%[0-9A-Fa-f]{2})*)@)?((?:\\[(?:(?:(?:(?:[0-9A-Fa-f]{1,4}:){6}|::(?:[0-9A-Fa-f]{1,4}:){5}|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,1}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}|(?:(?:[0-9A-Fa-f]{1,4}:){0,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}|(?:(?:[0-9A-Fa-f]{1,4}:){0,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:|(?:(?:[0-9A-Fa-f]{1,4}:){0,4}[0-9A-Fa-f]{1,4})?::)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){0,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4})?::)|[Vv][0-9A-Fa-f]+\\.[A-Za-z0-9\\-._~!$&'()*+,;=:]+)\\]|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)|(?:[A-Za-z0-9\\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})*))(?::([0-9]*))?((?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|/((?:(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)?)|((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})+(?:/(?:[A-Za-z0-9\\-._~!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})*)*)|)(?:\\?((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?(?:\\#((?:[A-Za-z0-9\\-._~!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*))?"; 6 | 7 | export const uri: string[] = []; 8 | 9 | // preserves all uri strings in the text 10 | export function preserveUri(text: string): string { 11 | return text.replace(new RegExp(patternURI, "g"), function(matched) { 12 | uri.push(matched); 13 | return preservation("uri"); 14 | }); 15 | } 16 | 17 | // bringing back URIs 18 | export function restoreUri(text: string): string { 19 | return text.replace( 20 | new RegExp(`[ ]?${preservation("uri").trim()}[ ]?`, "g"), 21 | () => uri.shift() as string, 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /src/steps/fix.ts: -------------------------------------------------------------------------------- 1 | import { fixBracesSpacing, fixBracesSpacingInside } from "../plugins/braces"; 2 | import { fixDates } from "../plugins/dates"; 3 | import { fixDiacritics, removeDiacritics } from "../plugins/diacritics"; 4 | import { removeKashidas } from "../plugins/kashidas"; 5 | import { fixLineBreaks } from "../plugins/lineBreaks"; 6 | import { removeExtraMarks } from "../plugins/marks"; 7 | import { fixPunctuationSpacing } from "../plugins/punctuations"; 8 | import { fixMiscSpacing, removeSpaces } from "../plugins/spaces"; 9 | import { fixPrefixZwnj, fixSuffixZwnj, fixZWNJLate } from "../plugins/zwnj"; 10 | import { IOptions } from "../types"; 11 | 12 | export default function fix(options: IOptions, text: string): string { 13 | if (options.fixDates) { 14 | text = fixDates(text); 15 | } 16 | 17 | if (options.fixPrefixZwnj) { 18 | text = fixPrefixZwnj(text); 19 | } 20 | 21 | if (options.fixSuffixZwnj) { 22 | text = fixSuffixZwnj(text); 23 | } 24 | 25 | if (options.fixBracesSpacing) { 26 | text = fixBracesSpacing(text); 27 | } 28 | 29 | if (options.removeExtraMarks) { 30 | text = removeExtraMarks(text); 31 | } 32 | 33 | if (options.fixPunctuationSpacing) { 34 | text = fixPunctuationSpacing(text); 35 | } 36 | 37 | if (options.removeKashidas) { 38 | text = removeKashidas(text); 39 | } 40 | 41 | // doing it again after `fixPunctuationSpacing()` 42 | if (options.fixBracesSpacing) { 43 | text = fixBracesSpacingInside(text); 44 | } 45 | 46 | if (options.fixMiscSpacing) { 47 | text = fixMiscSpacing(text); 48 | } 49 | 50 | if (options.removeDiacritics) { 51 | text = removeDiacritics(text); 52 | } else if (options.fixDiacritics) { 53 | text = fixDiacritics(text); 54 | } 55 | 56 | if (options.removeSpaces) { 57 | text = removeSpaces(text); 58 | } 59 | 60 | if (options.fixZWNJ) { 61 | text = fixZWNJLate(text); 62 | } 63 | 64 | if (options.fixLineBreaks) { 65 | text = fixLineBreaks(text); 66 | } 67 | return text; 68 | } 69 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: NPM publish 2 | 3 | on: 4 | release: 5 | # This specifies that the build will be triggered when we publish a release 6 | types: [published] 7 | 8 | jobs: 9 | build: 10 | 11 | # Run on latest version of ubuntu 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v2 16 | with: 17 | # "ref" specifies the branch to check out. 18 | # "github.event.release.target_commitish" is a global variable and specifies the branch the release targeted 19 | ref: ${{ github.event.release.target_commitish }} 20 | # install Node.js 21 | - name: Use Node.js 12 22 | uses: actions/setup-node@v1 23 | with: 24 | node-version: 12 25 | # Specifies the registry, this field is required! 26 | registry-url: https://registry.npmjs.org/ 27 | # clean install of your projects' deps. We use "npm ci" to avoid package lock changes 28 | - run: npm ci 29 | # set up git since we will later push to the repo 30 | - run: git config --global user.name "GitHub CD bot" 31 | - run: git config --global user.email "github-cd-bot@example.com" 32 | # upgrade npm version in package.json to the tag used in the release. 33 | - run: npm version ${{ github.event.release.tag_name }} || true 34 | 35 | - name: Run the lints 36 | run: npm run lint 37 | 38 | - name: Run Prettier 39 | run: npm run prettier:check 40 | 41 | - name: Run the tests 42 | run: npm run test 43 | 44 | - name: Build 45 | run: npm run build 46 | 47 | # publish to NPM -> there is one caveat, continue reading for the fix 48 | - run: npm publish --tag ${{ github.event.release.target_commitish }} --access public 49 | env: 50 | # Use a token to publish to NPM. See below for how to set it up 51 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 52 | # push the version changes to GitHub 53 | - run: git push 54 | env: 55 | # The secret is passed automatically. Nothing to configure. 56 | github-token: ${{ secrets.GITHUB_TOKEN }} 57 | -------------------------------------------------------------------------------- /src/steps/wordLevelFix.ts: -------------------------------------------------------------------------------- 1 | import { fixNonPersianChars } from "../plugins/chars"; 2 | import { fixQuestionMarks } from "../plugins/marks"; 3 | import { fixEnglishNumbers } from "../plugins/numbers"; 4 | import { fixNumeralSymbols } from "../plugins/numeralSymbols"; 5 | import { fixPunctuations } from "../plugins/punctuations"; 6 | import { IOptions } from "../types"; 7 | 8 | export default function wordLevelFix(options: IOptions, text: string): string { 9 | text = text.replace( 10 | /(^|\s+)([[({"'“«]?)(\S+)([\])}"'”»]?)(?=($|\s+))/g, 11 | (matched, word, trailings, after) => { 12 | // should not replace to persian chars in english phrases 13 | if (word.match(/[a-zA-Z\-_]{2,}/g)) { 14 | return matched; 15 | } 16 | 17 | // should not touch sprintf directives 18 | // @Ref https://stackoverflow.com/a/8915445/ 19 | if ( 20 | word.match( 21 | /%(?:\d+\$)?[+-]?(?:[ 0]|'.{1})?-?\d*(?:\.\d+)?[bcdeEufFgGosxX]/g, 22 | ) 23 | ) { 24 | return matched; 25 | } 26 | 27 | // should not touch numbers in html entities 28 | if (word.match(/&#\d+;/g)) { 29 | return matched; 30 | } 31 | 32 | // skips converting english numbers of ordered lists in markdown 33 | if ( 34 | options.skipMarkdownOrderedListsNumbersConversion && 35 | (matched + trailings + after).match( 36 | /(?:(?:\r?\n)|(?:\r\n?)|(?:^|\n))\d+\.\s/, 37 | ) 38 | ) { 39 | return matched; 40 | } 41 | 42 | if (options.fixEnglishNumbers) { 43 | matched = fixEnglishNumbers(matched); 44 | } 45 | 46 | if (options.fixNumeralSymbols) { 47 | matched = fixNumeralSymbols(matched); 48 | } 49 | 50 | if (options.fixPunctuations) { 51 | matched = fixPunctuations(matched); 52 | } 53 | 54 | if (options.fixNonPersianChars) { 55 | matched = fixNonPersianChars(matched); 56 | } 57 | 58 | if (options.fixQuestionMarks) { 59 | matched = fixQuestionMarks(matched); 60 | } 61 | 62 | return matched; 63 | }, 64 | ); 65 | return text; 66 | } 67 | -------------------------------------------------------------------------------- /src/plugins/chars/index.ts: -------------------------------------------------------------------------------- 1 | import { charLevelReplace } from "../../utils/charLevelReplace"; 2 | 3 | export function fixNonPersianChars(text: string): string { 4 | return charLevelReplace(text, "كڪيىۍېہە", "ککییییههه"); 5 | } 6 | 7 | export function fixArabicHamzeh(text: string): string { 8 | return ( 9 | text 10 | 11 | // converts arabic hamzeh ة to هٔ 12 | .replace(/(\S)ة([\s\u200c\u200e])/g, "$1هٔ$2") 13 | ); 14 | } 15 | 16 | export function removeArabicHamzeh(text: string): string { 17 | return ( 18 | text 19 | // converts arabic hamzeh ة to ه‌ی 20 | .replace(/(\S)ة([\s\u200c\u200e])/g, "$1ه‌ی$2") 21 | ); 22 | } 23 | 24 | export function fixHamzeh(text: string): string { 25 | const replacement = "$1هٔ$3"; 26 | return ( 27 | text 28 | 29 | // replaces ه followed by (space|ZWNJ|lrm) follow by ی with هٔ 30 | .replace(/(\S)(ه[\s\u200c\u200e]+[یي])([\s\u200c\u200e])/g, replacement) // heh + ye 31 | 32 | // replaces ه followed by (space|ZWNJ|lrm|nothing) follow by ء with هٔ 33 | .replace(/(\S)(ه[\s\u200c\u200e]?\u0621)([\s\u200c\u200e])/g, replacement) // heh + standalone hamza 34 | 35 | // replaces هٓ or single-character ۀ with the standard هٔ 36 | // @Ref ebraminio/persiantools 37 | .replace(/(ۀ|هٓ)/g, "هٔ") 38 | ); 39 | } 40 | 41 | export function fixSuffixSpacingHamzeh(text: string): string { 42 | const replacement = "$1\u0647\u200c\u06cc$3"; 43 | return ( 44 | text 45 | 46 | // heh + ye 47 | .replace(/(\S)(ه[\s\u200c]+[یي])([\s\u200c])/g, replacement) 48 | 49 | // heh + standalone hamza 50 | .replace(/(\S)(ه[\s\u200c]?\u0621)([\s\u200c])/g, replacement) 51 | 52 | // heh + hamza above 53 | .replace(/(\S)(ه[\s\u200c]?\u0654)([\s\u200c])/g, replacement) 54 | ); 55 | } 56 | 57 | export function fixSuffixMisc(text: string): string { 58 | return ( 59 | text 60 | // replaces ه followed by ئ or ی, and then by ی, with ه\u200cای, 61 | // EXAMPLE: خانه‌ئی becomes خانه‌ای 62 | // @Ref ebraminio/persiantools 63 | .replace(/(\S)ه[\u200c\u200e][ئی]ی([\s\u200c\u200e])/g, "$1ه\u200cای$2") 64 | ); 65 | } 66 | -------------------------------------------------------------------------------- /src/plugins/zwnj/index.ts: -------------------------------------------------------------------------------- 1 | import { persianChars, persianDiacritic } from "../../constants"; 2 | 3 | // @Ref https://github.com/ebraminio/persiantools 4 | export function fixZWNJ(text: string): string { 5 | return ( 6 | text 7 | // converts all soft hyphens (­) into zwnj 8 | .replace(/\u00ad/g, "\u200c") 9 | 10 | // removes more than one zwnj 11 | .replace(/\u200c{2,}/g, "\u200c") 12 | 13 | // cleans zwnj before and after numbers, english words, spaces and punctuations 14 | .replace( 15 | /\u200c([\w\s0-9۰-۹[\](){}«»“”.…,:;?!$%@#*=+\-/\\،؛٫٬×٪؟ـ])/g, 16 | "$1", 17 | ) 18 | .replace( 19 | /([\w\s0-9۰-۹[\](){}«»“”.…,:;?!$%@#*=+\-/\\،؛٫٬×٪؟ـ])\u200c/g, 20 | "$1", 21 | ) 22 | 23 | // removes unnecessary zwnj on start/end of each line 24 | .replace(/(^\u200c|\u200c$)/gm, "") 25 | ); 26 | } 27 | 28 | // puts zwnj between the word and the prefix: 29 | // - mi* nemi* bi* 30 | // NOTE: there's a possible bug here: prefixes could be separate nouns 31 | export function fixPrefixZwnj(text: string): string { 32 | const replacement = "$1\u200c$3"; 33 | return text 34 | .replace(/((\s|^)ن?می) ([^ ])/g, replacement) 35 | .replace(/((\s|^)بی) ([^ ])/g, replacement); // @Ref zoghal 36 | } 37 | 38 | const suffixPattern = "\\s.,;،؛!؟?\"'()[\\]{}“”«»"; 39 | 40 | // puts zwnj between the word and the suffix 41 | // NOTE: possible bug: suffixes could be nouns 42 | export function fixSuffixZwnj(text: string): string { 43 | const replacement = "$1\u200c$2"; 44 | return ( 45 | text 46 | 47 | // must done before others 48 | // *ha *haye 49 | .replace( 50 | new RegExp( 51 | "([" + 52 | persianChars + 53 | persianDiacritic + 54 | "]) (ها(ی)?[" + 55 | suffixPattern + 56 | "])", 57 | "g", 58 | ), 59 | replacement, 60 | ) 61 | 62 | // *am *at *ash *ei *eid *eem *and *man *tan *shan 63 | .replace( 64 | new RegExp( 65 | "([" + 66 | persianChars + 67 | persianDiacritic + 68 | "]) ((ام|ات|اش|ای|اید|ایم|اند|مان|تان|شان)[" + 69 | suffixPattern + 70 | "])", 71 | "g", 72 | ), 73 | replacement, 74 | ) 75 | 76 | // *tar *tari *tarin 77 | .replace( 78 | new RegExp( 79 | "([" + 80 | persianChars + 81 | persianDiacritic + 82 | "]) (تر((ی)|(ین))?[" + 83 | suffixPattern + 84 | "])", 85 | "g", 86 | ), 87 | replacement, 88 | ) 89 | 90 | // *hayee *hayam *hayat *hayash *hayetan *hayeman *hayeshan 91 | .replace( 92 | new RegExp( 93 | "([" + 94 | persianChars + 95 | persianDiacritic + 96 | "]) ((هایی|هایم|هایت|هایش|هایمان|هایتان|هایشان)[" + 97 | suffixPattern + 98 | "])", 99 | "g", 100 | ), 101 | replacement, 102 | ) 103 | 104 | // replaces ه followed by ئ or ی, and then by ی, with ه\u200cای, 105 | // EXAMPLE: خانه‌ئی becomes خانه‌ای 106 | // @Ref ebraminio/persiantools 107 | .replace(/(\S)ه[\u200c\u200e][ئی]ی([\s\u200c\u200e])/g, "$1ه\u200cای$2") 108 | ); 109 | } 110 | 111 | // late checks for zwnj 112 | export function fixZWNJLate(text: string): string { 113 | return ( 114 | text 115 | 116 | // cleans zwnj after characters that don't connect to the next 117 | .replace(/([إأةؤورزژاآدذ،؛,:«»\\/@#$٪×*()ـ\-=|])\u200c/g, "$1") 118 | ); 119 | } 120 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Basic Options */ 4 | // "incremental": true, /* Enable incremental compilation */ 5 | "target": "es5", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019' or 'ESNEXT'. */ 6 | "module": "commonjs", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', or 'ESNext'. */ 7 | "lib": [ 8 | "es2017", 9 | "es7", 10 | "es6", 11 | "dom" 12 | ], /* Specify library files to be included in the compilation. */ 13 | // "allowJs": true, /* Allow javascript files to be compiled. */ 14 | // "checkJs": true, /* Report errors in .js files. */ 15 | // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', or 'react'. */ 16 | "declaration": true, /* Generates corresponding '.d.ts' file. */ 17 | // "declarationMap": true, /* Generates a sourcemap for each corresponding '.d.ts' file. */ 18 | // "sourceMap": true, /* Generates corresponding '.map' file. */ 19 | // "outFile": "./", /* Concatenate and emit output to single file. */ 20 | "outDir": "./dist", /* Redirect output structure to the directory. */ 21 | // "rootDir": "./", /* Specify the root directory of input files. Use to control the output directory structure with --outDir. */ 22 | // "composite": true, /* Enable project compilation */ 23 | // "tsBuildInfoFile": "./", /* Specify file to store incremental compilation information */ 24 | // "removeComments": true, /* Do not emit comments to output. */ 25 | // "noEmit": true, /* Do not emit outputs. */ 26 | // "importHelpers": true, /* Import emit helpers from 'tslib'. */ 27 | // "downlevelIteration": true, /* Provide full support for iterables in 'for-of', spread, and destructuring when targeting 'ES5' or 'ES3'. */ 28 | // "isolatedModules": true, /* Transpile each file as a separate module (similar to 'ts.transpileModule'). */ 29 | /* Strict Type-Checking Options */ 30 | "strict": true, /* Enable all strict type-checking options. */ 31 | // "noImplicitAny": true, /* Raise error on expressions and declarations with an implied 'any' type. */ 32 | // "strictNullChecks": true, /* Enable strict null checks. */ 33 | // "strictFunctionTypes": true, /* Enable strict checking of function types. */ 34 | // "strictBindCallApply": true, /* Enable strict 'bind', 'call', and 'apply' methods on functions. */ 35 | // "strictPropertyInitialization": true, /* Enable strict checking of property initialization in classes. */ 36 | // "noImplicitThis": true, /* Raise error on 'this' expressions with an implied 'any' type. */ 37 | // "alwaysStrict": true, /* Parse in strict mode and emit "use strict" for each source file. */ 38 | /* Additional Checks */ 39 | // "noUnusedLocals": true, /* Report errors on unused locals. */ 40 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 41 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 42 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 43 | /* Module Resolution Options */ 44 | // "moduleResolution": "node", /* Specify module resolution strategy: 'node' (Node.js) or 'classic' (TypeScript pre-1.6). */ 45 | // "baseUrl": "./", /* Base directory to resolve non-absolute module names. */ 46 | // "paths": {}, /* A series of entries which re-map imports to lookup locations relative to the 'baseUrl'. */ 47 | // "rootDirs": [], /* List of root folders whose combined content represents the structure of the project at runtime. */ 48 | // "typeRoots": [], /* List of folders to include type definitions from. */ 49 | // "types": [], /* Type declaration files to be included in compilation. */ 50 | // "allowSyntheticDefaultImports": true, /* Allow default imports from modules with no default export. This does not affect code emit, just typechecking. */ 51 | "esModuleInterop": true, /* Enables emit interoperability between CommonJS and ES Modules via creation of namespace objects for all imports. Implies 'allowSyntheticDefaultImports'. */ 52 | // "preserveSymlinks": true, /* Do not resolve the real path of symlinks. */ 53 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ 54 | /* Source Map Options */ 55 | // "sourceRoot": "", /* Specify the location where debugger should locate TypeScript files instead of source locations. */ 56 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ 57 | // "inlineSourceMap": true, /* Emit a single file with source maps instead of having a separate file. */ 58 | // "inlineSources": true, /* Emit the source alongside the sourcemaps within a single file; requires '--inlineSourceMap' or '--sourceMap' to be set. */ 59 | /* Experimental Options */ 60 | // "experimentalDecorators": true, /* Enables experimental support for ES7 decorators. */ 61 | // "emitDecoratorMetadata": true, /* Enables experimental support for emitting type metadata for decorators. */ 62 | /* Advanced Options */ 63 | //"forceConsistentCasingInFileNames": true /* Disallow inconsistently-cased references to the same file. */ 64 | }, 65 | "include": ["**/*"], 66 | "exclude": [ 67 | "node_modules", 68 | "dist", 69 | "test", 70 | "**/*.spec.ts" 71 | ] 72 | } --------------------------------------------------------------------------------