├── .gitattributes ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ └── CI.yml ├── .gitignore ├── .gitmodules ├── .prettierignore ├── .prettierrc.js ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bindings ├── java │ ├── .cargo │ │ └── config.toml │ ├── Cargo.toml │ ├── bin │ │ └── bindings.zip │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── rustfmt.toml │ └── src │ │ ├── lib.rs │ │ ├── main │ │ └── java │ │ │ └── io │ │ │ └── github │ │ │ └── null8626 │ │ │ └── decancer │ │ │ ├── CuredString.java │ │ │ ├── Match.java │ │ │ └── Options.java │ │ └── test │ │ └── java │ │ └── io │ │ └── github │ │ └── null8626 │ │ └── decancer │ │ └── DecancerTest.java ├── native │ ├── .cargo │ │ └── config.toml │ ├── .clang-format │ ├── Cargo.toml │ ├── README.md │ ├── build.rs │ ├── decancer.h │ ├── decancer.rc │ ├── docs │ │ ├── Doxyfile │ │ ├── docgen.mjs │ │ ├── header.html │ │ ├── script.js │ │ ├── search.css │ │ └── style.css │ ├── rustfmt.toml │ ├── src │ │ ├── lib.rs │ │ ├── ptr.rs │ │ ├── utf16.rs │ │ └── utf8.rs │ └── tests │ │ ├── CMakeLists.txt │ │ └── test.mjs ├── node │ ├── .cargo │ │ └── config.toml │ ├── .npmignore │ ├── Cargo.toml │ ├── README.md │ ├── build.rs │ ├── npm │ │ ├── android-arm-eabi │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── android-arm64 │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── darwin-arm64 │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── darwin-x64 │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── freebsd-x64 │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── linux-arm-gnueabihf │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── linux-arm64-gnu │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── linux-arm64-musl │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── linux-x64-gnu │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── linux-x64-musl │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── win32-arm64-msvc │ │ │ ├── README.md │ │ │ └── package.json │ │ ├── win32-ia32-msvc │ │ │ ├── README.md │ │ │ └── package.json │ │ └── win32-x64-msvc │ │ │ ├── README.md │ │ │ └── package.json │ ├── package.json │ ├── rustfmt.toml │ ├── src │ │ ├── lib.js │ │ └── lib.rs │ ├── test.cjs │ └── typings.d.ts └── wasm │ ├── Cargo.toml │ ├── bin │ ├── decancer.min.js │ └── decancer.wasm │ ├── example.html │ ├── rustfmt.toml │ ├── src │ └── lib.rs │ └── tests │ ├── index.html │ ├── index.mjs │ ├── package.json │ └── server.mjs ├── core ├── Cargo.toml ├── README.md ├── benches │ └── decancer_bench.rs ├── bin │ ├── bidi.bin │ └── codepoints.bin ├── build.rs ├── rustfmt.toml └── src │ ├── bidi │ ├── brackets.rs │ ├── class.rs │ ├── level.rs │ ├── mod.rs │ └── paragraph.rs │ ├── codepoints.rs │ ├── leetspeak.rs │ ├── lib.rs │ ├── options.rs │ ├── similar.rs │ ├── string.rs │ ├── tests.rs │ ├── translation.rs │ └── util.rs ├── scripts ├── ci_artifacts.mjs ├── ci_native_artifacts.mjs ├── ci_readme.mjs ├── ci_setup.mjs ├── ci_setup_pages.mjs ├── ci_validate_java_artifacts.mjs ├── pretty.mjs ├── read.mjs ├── update_unicode.mjs ├── util.mjs ├── version.mjs └── write.mjs └── yarn.lock /.gitattributes: -------------------------------------------------------------------------------- 1 | .github/** eol=lf 2 | .gitignore eol=lf 3 | .gitmodules eol=lf 4 | **/*.bin binary 5 | **/*.wasm binary 6 | **/*.jar binary 7 | **/*.zip binary 8 | **/*.h eol=lf 9 | .prettierrc.js linguist-vendored=true eol=lf 10 | **/CMakeLists.txt linguist-vendored=true eol=lf 11 | **/Doxyfile linguist-vendored=true eol=lf 12 | **/*.css linguist-vendored=true eol=lf 13 | **/*.ts linguist-vendored=true eol=lf 14 | **/*.mjs linguist-vendored=true eol=lf 15 | **/*.cjs linguist-vendored=true eol=lf 16 | **/*.html linguist-vendored=true eol=lf 17 | **/*.rc linguist-vendored=true eol=lf 18 | bindings/native/docs/** linguist-vendored=true eol=lf 19 | **/*.java eol=lf 20 | **/*.rs eol=lf 21 | bindings/node/src/lib.js eol=lf 22 | **/*.md eol=lf 23 | **/*.bat eol=lf 24 | **/*.toml eol=lf 25 | **/*.properties eol=lf 26 | **/*.json eol=lf 27 | bindings/java/gradlew eol=lf -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: null8626 2 | ko_fi: null8626 -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/core" 5 | commit-message: 6 | prefix: "deps[core]: " 7 | schedule: 8 | day: "saturday" 9 | interval: "weekly" 10 | time: "06:00" 11 | - package-ecosystem: "cargo" 12 | directory: "/bindings/node" 13 | commit-message: 14 | prefix: "deps[node]: " 15 | schedule: 16 | day: "saturday" 17 | interval: "weekly" 18 | time: "06:05" 19 | - package-ecosystem: "npm" 20 | directory: "/bindings/node" 21 | commit-message: 22 | prefix: "deps[node]: " 23 | schedule: 24 | day: "saturday" 25 | interval: "weekly" 26 | time: "06:10" 27 | - package-ecosystem: "cargo" 28 | directory: "/bindings/wasm" 29 | commit-message: 30 | prefix: "deps[wasm]: " 31 | schedule: 32 | day: "saturday" 33 | interval: "weekly" 34 | time: "06:15" 35 | - package-ecosystem: "npm" 36 | directory: "/bindings/wasm/tests" 37 | commit-message: 38 | prefix: "deps[wasm-tests]: " 39 | schedule: 40 | day: "saturday" 41 | interval: "weekly" 42 | time: "06:20" 43 | - package-ecosystem: "cargo" 44 | directory: "/bindings/java" 45 | commit-message: 46 | prefix: "deps[java]: " 47 | schedule: 48 | day: "saturday" 49 | interval: "weekly" 50 | time: "06:25" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/node_modules/ 2 | **/package-lock.json 3 | **/Cargo.lock 4 | **/*.node 5 | **/*.dll 6 | **/*.dylib 7 | **/*.so 8 | **/*.exe 9 | **/*.jar 10 | **/*.lib 11 | **/*.obj 12 | **/target/ 13 | **/artifacts/ 14 | **/java-artifacts/ 15 | 16 | .yarn/ 17 | .yarnrc.yml 18 | wasm_example.html 19 | native_docs/ 20 | bindings/java/.gradle/ 21 | bindings/java/build/ 22 | bindings/java/gradle.properties 23 | bindings/native/docs/package.json 24 | bindings/native/docs/html/ 25 | bindings/native/docs/xml/ 26 | bindings/native/tests/*.c 27 | bindings/native/tests/build/ 28 | bindings/native/build/ 29 | bindings/wasm/pkg/ 30 | core/proptest-regressions/ 31 | 32 | /*.json 33 | /*.bin 34 | /*.txt 35 | /*.sh -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "bindings/native/docs/doxygen-awesome-css"] 2 | path = bindings/native/docs/doxygen-awesome-css 3 | url = https://github.com/jothepro/doxygen-awesome-css.git 4 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | **/target/** 2 | **/node_modules/** 3 | **/package-lock.json 4 | /*.json 5 | bindings/wasm/bin/** 6 | bindings/wasm/pkg/** -------------------------------------------------------------------------------- /.prettierrc.js: -------------------------------------------------------------------------------- 1 | const SETUP_OUTPUTS = process.env.DECANCER_SETUP_OUTPUTS 2 | ? JSON.parse(process.env.DECANCER_SETUP_OUTPUTS) 3 | : {} 4 | const plugins = [] 5 | 6 | if ( 7 | SETUP_OUTPUTS.release !== 'null' || 8 | SETUP_OUTPUTS.java_affected === 'true' 9 | ) { 10 | plugins.push(require.resolve('prettier-plugin-java')) 11 | } 12 | 13 | module.exports = { 14 | semi: false, 15 | singleQuote: true, 16 | trailingComma: 'none', 17 | arrowParens: 'avoid', 18 | htmlWhitespaceSensitivity: 'ignore', 19 | plugins 20 | } 21 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing guide 2 | 3 | If you want to contribute to the source code, feel free to do so! ❤️ 4 | 5 | Don't forget to state what should be changed and their respective reasons. 6 | 7 | If you want to see/modify on the codepoints supported and/or their respective translation(s), feel free to do so! Here are the ways on how to do them; 8 | 9 | - **To convert the binary into a readable text file, do the following:** 10 | 11 | ```console 12 | node scripts/read.mjs path/to/output.txt 13 | ``` 14 | 15 | - **To convert the binary into a readable and modifiable JSON, do the following:** 16 | 17 | ```console 18 | node scripts/read.mjs [path/to/output.json] 19 | ``` 20 | 21 | If the output file name is not supplied, it will default to `output.json`. 22 | 23 | - **And to validate, optimize, and convert the JSON back into a binary, do the following:** 24 | 25 | ```console 26 | node scripts/write.mjs path/to/input.json 27 | ``` 28 | 29 | - **The structure of the JSON is as follows:** 30 | 31 | The optional fields here are only optional when writing and encoding them back into a binary. 32 | 33 | ```ts 34 | interface Codepoint { 35 | codepoint: number 36 | translation: string 37 | } 38 | 39 | interface JsonContents { 40 | codepoints: Codepoint[] 41 | similar: string[][] 42 | } 43 | ``` 44 | 45 | - **Information regarding the `Codepoint` structure:** 46 | 47 | - The `JsonContents#codepoints` array **must NOT be empty.** 48 | - `Codepoint#codepoint` is the unicode codepoint. It must be around `\u80` to `\ue00ff` and must NOT be a [surrogate](https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates). 49 | - `Codepoint#translation` is the translation string: its length must not exceed `15`, it must be in lowercase, and it must be in ASCII. 50 | 51 | - **Information regarding the `JsonContents#similar` field:** 52 | 53 | - The `string[][]` two-dimensional array **must NOT be empty** and its length **must NOT exceed `127`.** 54 | - The `string[]` arrays **must NOT be empty** and their lengths **must NOT exceed `255`.** 55 | - Each `string` **must ONLY be one character long.** 56 | - Each `string` **must be ASCII.** 57 | 58 | ## Other scripts 59 | 60 | Other useful scripts have been added as a utility for maintaining `decancer`. 61 | 62 | ### Pretty source code files 63 | 64 | Prerequisites: 65 | 66 | - [clang-format v18 or later](https://clang.llvm.org) 67 | - [Rust v1.65 or later](https://www.rust-lang.org) 68 | 69 | ```console 70 | node scripts/pretty.mjs 71 | ``` 72 | 73 | ### Bump version number 74 | 75 | ```console 76 | node scripts/version.mjs 1.2.3 77 | ``` 78 | 79 | ### Update cache if a new unicode version is released 80 | 81 | ```console 82 | node scripts/update_unicode.mjs 83 | ``` 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2025 null8626 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bindings/java/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.aarch64-unknown-linux-musl] 2 | linker = "aarch64-linux-musl-gcc" -------------------------------------------------------------------------------- /bindings/java/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decancer_java" 3 | version = "2.0.2" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | name = "decancer" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | decancer = { path = "../../core" } 13 | jni = "0.21" -------------------------------------------------------------------------------- /bindings/java/bin/bindings.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/null8626/decancer/2c6679bd15877c28f7796b078209f3caabf5c1b3/bindings/java/bin/bindings.zip -------------------------------------------------------------------------------- /bindings/java/build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java-library' 3 | id 'maven-publish' 4 | id 'org.jreleaser' version '1.18.0' 5 | } 6 | 7 | group = 'io.github.null8626' 8 | version = '3.3.0' 9 | description = 'A library that removes common unicode confusables/homoglyphs from strings.' 10 | 11 | repositories { 12 | mavenCentral() 13 | maven { url = 'https://jitpack.io' } 14 | } 15 | 16 | publishing { 17 | publications { 18 | maven(MavenPublication) { 19 | artifactId = 'decancer' 20 | groupId = 'io.github.null8626' 21 | version = '3.3.0' 22 | 23 | from components.java 24 | 25 | pom { 26 | name = 'decancer' 27 | description = 'A library that removes common unicode confusables/homoglyphs from strings.' 28 | url = 'https://github.com/null8626/decancer' 29 | inceptionYear = '2021' 30 | 31 | licenses { 32 | license { 33 | name = 'MIT License' 34 | distribution = 'repo' 35 | url = 'https://github.com/null8626/decancer/blob/v3.3.0/LICENSE' 36 | } 37 | } 38 | 39 | developers { 40 | developer { 41 | id = 'null8626' 42 | name = 'null8626' 43 | url = 'https://github.com/null8626' 44 | timezone = 'Asia/Jakarta' 45 | } 46 | } 47 | 48 | scm { 49 | url = 'https://github.com/null8626/decancer' 50 | connection = 'scm:git:git://github.com/null8626/decancer' 51 | developerConnection = 'scm:git:git://github.com/null8626/decancer' 52 | } 53 | 54 | issueManagement { 55 | system = 'GitHub' 56 | url = 'https://github.com/null8626/decancer/issues' 57 | } 58 | 59 | ciManagement { 60 | system = 'Github Actions' 61 | url = 'https://github.com/null8626/decancer/actions' 62 | } 63 | } 64 | } 65 | } 66 | 67 | repositories { 68 | maven { 69 | url = layout.buildDirectory.dir('staging-deploy') 70 | } 71 | } 72 | } 73 | 74 | jreleaser { 75 | project { 76 | name = 'decancer' 77 | version = '3.3.0' 78 | description = 'A library that removes common unicode confusables/homoglyphs from strings.' 79 | longDescription = 'A library that removes common unicode confusables/homoglyphs from strings. By default, the library supports filtering over 220,000 unicode codepoints and reordering right-to-left text as it were to be rendered graphically.' 80 | license = 'MIT' 81 | inceptionYear = '2021' 82 | copyright = 'Copyright (c) 2021-2025 null8626' 83 | } 84 | 85 | signing { 86 | active = 'ALWAYS' 87 | armored = true 88 | } 89 | 90 | deploy { 91 | maven { 92 | mavenCentral { 93 | sonatype { 94 | active = 'ALWAYS' 95 | url = 'https://central.sonatype.com/api/v1/publisher' 96 | gitRootSearch = true 97 | maxRetries = 250 98 | stagingRepository(layout.buildDirectory.dir('staging-deploy').get().toString()) 99 | } 100 | } 101 | } 102 | } 103 | 104 | release { 105 | github { 106 | skipTag = true 107 | skipRelease = true 108 | } 109 | } 110 | } 111 | 112 | java { 113 | withJavadocJar() 114 | withSourcesJar() 115 | 116 | jar { 117 | archiveBaseName = 'decancer' 118 | archiveVersion = '3.3.0' 119 | 120 | from('./bin') { 121 | include '*.dll' 122 | include '*.dylib' 123 | include '*.so' 124 | } 125 | } 126 | 127 | javadoc { 128 | if (JavaVersion.current().isJava9Compatible()) { 129 | options.addBooleanOption('html5', true) 130 | } 131 | } 132 | } 133 | 134 | test { 135 | useJUnitPlatform() 136 | 137 | systemProperty 'DECANCER_TESTING', System.getProperty('DECANCER_TESTING') 138 | systemProperty 'java.library.path', "${System.getProperty('java.library.path')}${File.pathSeparatorChar}$rootDir/bin" 139 | } 140 | 141 | dependencies { 142 | implementation 'com.github.adamheinrich:native-utils:e6a3948966' 143 | implementation 'com.fizzed:jne:4.3.0' 144 | 145 | testImplementation 'org.junit.jupiter:junit-jupiter-api:5.12.2' 146 | testRuntimeOnly "org.junit.jupiter:junit-jupiter-engine:5.12.2" 147 | testRuntimeOnly "org.junit.platform:junit-platform-launcher:1.12.2" 148 | } 149 | 150 | tasks.withType(JavaCompile) { 151 | options.compilerArgs << "-Xlint:deprecation" 152 | } -------------------------------------------------------------------------------- /bindings/java/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/null8626/decancer/2c6679bd15877c28f7796b078209f3caabf5c1b3/bindings/java/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /bindings/java/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.1-bin.zip 4 | networkTimeout=10000 5 | validateDistributionUrl=true 6 | zipStoreBase=GRADLE_USER_HOME 7 | zipStorePath=wrapper/dists 8 | -------------------------------------------------------------------------------- /bindings/java/gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # SPDX-License-Identifier: Apache-2.0 19 | # 20 | 21 | ############################################################################## 22 | # 23 | # Gradle start up script for POSIX generated by Gradle. 24 | # 25 | # Important for running: 26 | # 27 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 28 | # noncompliant, but you have some other compliant shell such as ksh or 29 | # bash, then to run this script, type that shell name before the whole 30 | # command line, like: 31 | # 32 | # ksh Gradle 33 | # 34 | # Busybox and similar reduced shells will NOT work, because this script 35 | # requires all of these POSIX shell features: 36 | # * functions; 37 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 38 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 39 | # * compound commands having a testable exit status, especially «case»; 40 | # * various built-in commands including «command», «set», and «ulimit». 41 | # 42 | # Important for patching: 43 | # 44 | # (2) This script targets any POSIX shell, so it avoids extensions provided 45 | # by Bash, Ksh, etc; in particular arrays are avoided. 46 | # 47 | # The "traditional" practice of packing multiple parameters into a 48 | # space-separated string is a well documented source of bugs and security 49 | # problems, so this is (mostly) avoided, by progressively accumulating 50 | # options in "$@", and eventually passing that to Java. 51 | # 52 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 53 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 54 | # see the in-line comments for details. 55 | # 56 | # There are tweaks for specific operating systems such as AIX, CygWin, 57 | # Darwin, MinGW, and NonStop. 58 | # 59 | # (3) This script is generated from the Groovy template 60 | # https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 61 | # within the Gradle project. 62 | # 63 | # You can find Gradle at https://github.com/gradle/gradle/. 64 | # 65 | ############################################################################## 66 | 67 | # Attempt to set APP_HOME 68 | 69 | # Resolve links: $0 may be a link 70 | app_path=$0 71 | 72 | # Need this for daisy-chained symlinks. 73 | while 74 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 75 | [ -h "$app_path" ] 76 | do 77 | ls=$( ls -ld "$app_path" ) 78 | link=${ls#*' -> '} 79 | case $link in #( 80 | /*) app_path=$link ;; #( 81 | *) app_path=$APP_HOME$link ;; 82 | esac 83 | done 84 | 85 | # This is normally unused 86 | # shellcheck disable=SC2034 87 | APP_BASE_NAME=${0##*/} 88 | # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) 89 | APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s\n' "$PWD" ) || exit 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH="\\\"\\\"" 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | if ! command -v java >/dev/null 2>&1 137 | then 138 | die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 139 | 140 | Please set the JAVA_HOME variable in your environment to match the 141 | location of your Java installation." 142 | fi 143 | fi 144 | 145 | # Increase the maximum file descriptors if we can. 146 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 147 | case $MAX_FD in #( 148 | max*) 149 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 150 | # shellcheck disable=SC2039,SC3045 151 | MAX_FD=$( ulimit -H -n ) || 152 | warn "Could not query maximum file descriptor limit" 153 | esac 154 | case $MAX_FD in #( 155 | '' | soft) :;; #( 156 | *) 157 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 158 | # shellcheck disable=SC2039,SC3045 159 | ulimit -n "$MAX_FD" || 160 | warn "Could not set maximum file descriptor limit to $MAX_FD" 161 | esac 162 | fi 163 | 164 | # Collect all arguments for the java command, stacking in reverse order: 165 | # * args from the command line 166 | # * the main class name 167 | # * -classpath 168 | # * -D...appname settings 169 | # * --module-path (only if needed) 170 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 171 | 172 | # For Cygwin or MSYS, switch paths to Windows format before running java 173 | if "$cygwin" || "$msys" ; then 174 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 175 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 176 | 177 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 178 | 179 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 180 | for arg do 181 | if 182 | case $arg in #( 183 | -*) false ;; # don't mess with options #( 184 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 185 | [ -e "$t" ] ;; #( 186 | *) false ;; 187 | esac 188 | then 189 | arg=$( cygpath --path --ignore --mixed "$arg" ) 190 | fi 191 | # Roll the args list around exactly as many times as the number of 192 | # args, so each arg winds up back in the position where it started, but 193 | # possibly modified. 194 | # 195 | # NB: a `for` loop captures its iteration list before it begins, so 196 | # changing the positional parameters here affects neither the number of 197 | # iterations, nor the values presented in `arg`. 198 | shift # remove old arg 199 | set -- "$@" "$arg" # push replacement arg 200 | done 201 | fi 202 | 203 | 204 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 205 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 206 | 207 | # Collect all arguments for the java command: 208 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, 209 | # and any embedded shellness will be escaped. 210 | # * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be 211 | # treated as '${Hostname}' itself on the command line. 212 | 213 | set -- \ 214 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 215 | -classpath "$CLASSPATH" \ 216 | -jar "$APP_HOME/gradle/wrapper/gradle-wrapper.jar" \ 217 | "$@" 218 | 219 | # Stop when "xargs" is not available. 220 | if ! command -v xargs >/dev/null 2>&1 221 | then 222 | die "xargs is not available" 223 | fi 224 | 225 | # Use "xargs" to parse quoted args. 226 | # 227 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 228 | # 229 | # In Bash we could simply go: 230 | # 231 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 232 | # set -- "${ARGS[@]}" "$@" 233 | # 234 | # but POSIX shell has neither arrays nor command substitution, so instead we 235 | # post-process each arg (as a line of input to sed) to backslash-escape any 236 | # character that might be a shell metacharacter, then use eval to reverse 237 | # that process (while maintaining the separation between arguments), and wrap 238 | # the whole thing up as a single "set" statement. 239 | # 240 | # This will of course break if any of these variables contains a newline or 241 | # an unmatched quote. 242 | # 243 | 244 | eval "set -- $( 245 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 246 | xargs -n1 | 247 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 248 | tr '\n' ' ' 249 | )" '"$@"' 250 | 251 | exec "$JAVACMD" "$@" 252 | -------------------------------------------------------------------------------- /bindings/java/gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | @rem SPDX-License-Identifier: Apache-2.0 17 | @rem 18 | 19 | @if "%DEBUG%"=="" @echo off 20 | @rem ########################################################################## 21 | @rem 22 | @rem Gradle startup script for Windows 23 | @rem 24 | @rem ########################################################################## 25 | 26 | @rem Set local scope for the variables with windows NT shell 27 | if "%OS%"=="Windows_NT" setlocal 28 | 29 | set DIRNAME=%~dp0 30 | if "%DIRNAME%"=="" set DIRNAME=. 31 | @rem This is normally unused 32 | set APP_BASE_NAME=%~n0 33 | set APP_HOME=%DIRNAME% 34 | 35 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 36 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 37 | 38 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 39 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 40 | 41 | @rem Find java.exe 42 | if defined JAVA_HOME goto findJavaFromJavaHome 43 | 44 | set JAVA_EXE=java.exe 45 | %JAVA_EXE% -version >NUL 2>&1 46 | if %ERRORLEVEL% equ 0 goto execute 47 | 48 | echo. 1>&2 49 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 50 | echo. 1>&2 51 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 52 | echo location of your Java installation. 1>&2 53 | 54 | goto fail 55 | 56 | :findJavaFromJavaHome 57 | set JAVA_HOME=%JAVA_HOME:"=% 58 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 59 | 60 | if exist "%JAVA_EXE%" goto execute 61 | 62 | echo. 1>&2 63 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 64 | echo. 1>&2 65 | echo Please set the JAVA_HOME variable in your environment to match the 1>&2 66 | echo location of your Java installation. 1>&2 67 | 68 | goto fail 69 | 70 | :execute 71 | @rem Setup the command line 72 | 73 | set CLASSPATH= 74 | 75 | 76 | @rem Execute Gradle 77 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" -jar "%APP_HOME%\gradle\wrapper\gradle-wrapper.jar" %* 78 | 79 | :end 80 | @rem End local scope for the variables with windows NT shell 81 | if %ERRORLEVEL% equ 0 goto mainEnd 82 | 83 | :fail 84 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 85 | rem the _cmd.exe /c_ return code! 86 | set EXIT_CODE=%ERRORLEVEL% 87 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 88 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 89 | exit /b %EXIT_CODE% 90 | 91 | :mainEnd 92 | if "%OS%"=="Windows_NT" endlocal 93 | 94 | :omega 95 | -------------------------------------------------------------------------------- /bindings/java/rustfmt.toml: -------------------------------------------------------------------------------- 1 | ../../core/rustfmt.toml -------------------------------------------------------------------------------- /bindings/java/src/main/java/io/github/null8626/decancer/Match.java: -------------------------------------------------------------------------------- 1 | package io.github.null8626.decancer; 2 | 3 | /** 4 | * A match yielded by the CuredString.find() method. 5 | * 6 | * @author null8626 7 | * @version 3.3.0 8 | * @since 3.1.0 9 | */ 10 | public class Match { 11 | 12 | /** 13 | * The UTF-8 byte offset to the beginning of the match. 14 | * 15 | * @since 3.1.0 16 | */ 17 | public final long start; 18 | 19 | /** 20 | * The UTF-8 byte offset to the end of the match (non-inclusive). 21 | * 22 | * @since 3.1.0 23 | */ 24 | public final long end; 25 | 26 | private final String matched; 27 | 28 | private Match(final long start, final long end, final String matched) { 29 | this.start = start; 30 | this.end = end; 31 | this.matched = matched; 32 | } 33 | 34 | /** 35 | * @return String The matched portion of the original String. 36 | * @since 3.1.0 37 | */ 38 | @Override 39 | public String toString() { 40 | return this.matched; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /bindings/java/src/test/java/io/github/null8626/decancer/DecancerTest.java: -------------------------------------------------------------------------------- 1 | package io.github.null8626.decancer; 2 | 3 | import org.junit.jupiter.api.AfterEach; 4 | import org.junit.jupiter.api.Assertions; 5 | import org.junit.jupiter.api.BeforeEach; 6 | import org.junit.jupiter.api.DisplayName; 7 | import org.junit.jupiter.api.Test; 8 | 9 | public class DecancerTest { 10 | 11 | private CuredString cured; 12 | 13 | @BeforeEach 14 | public void cure() { 15 | this.cured = new CuredString("vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣"); 16 | } 17 | 18 | @Test 19 | public void censor() throws Exception { 20 | try ( 21 | final CuredString string = new CuredString("wow heellllo wow hello wow!") 22 | ) { 23 | string.censor("hello", '*'); 24 | 25 | Assertions.assertEquals("wow ******** wow ***** wow!", string.toString()); 26 | } 27 | } 28 | 29 | @Test 30 | public void censorMultiple() throws Exception { 31 | try (final CuredString string = new CuredString("helloh yeah")) { 32 | final String[] keywords = { "hello", "oh yeah" }; 33 | string.censorMultiple(keywords, '*'); 34 | 35 | Assertions.assertEquals("***********", string.toString()); 36 | } 37 | } 38 | 39 | @Test 40 | public void replace() throws Exception { 41 | try ( 42 | final CuredString string = new CuredString("wow hello wow heellllo!") 43 | ) { 44 | string.replace("hello", "world"); 45 | 46 | Assertions.assertEquals("wow world wow world!", string.toString()); 47 | } 48 | } 49 | 50 | @Test 51 | public void replaceMultiple() throws Exception { 52 | try (final CuredString string = new CuredString("helloh yeah")) { 53 | final String[] keywords = { "hello", "oh yeah" }; 54 | string.replaceMultiple(keywords, "world"); 55 | 56 | Assertions.assertEquals("world", string.toString()); 57 | } 58 | } 59 | 60 | @Test 61 | public void find() { 62 | final Match[] match = this.cured.find("funny"); 63 | 64 | Assertions.assertEquals(1, match.length, 1); 65 | Assertions.assertEquals(5, match[0].start, 5); 66 | Assertions.assertEquals(10, match[0].end, 10); 67 | Assertions.assertEquals("funny", match[0].toString()); 68 | } 69 | 70 | @Test 71 | public void equals() { 72 | Assertions.assertTrue(this.cured.equals("very funny text")); 73 | } 74 | 75 | @Test 76 | public void startsWith() { 77 | Assertions.assertTrue(this.cured.startsWith("very")); 78 | } 79 | 80 | @Test 81 | public void endsWith() { 82 | Assertions.assertTrue(this.cured.endsWith("text")); 83 | } 84 | 85 | @Test 86 | public void contains() { 87 | Assertions.assertTrue(this.cured.contains("funny")); 88 | } 89 | 90 | @Test 91 | @DisplayName("toString()") 92 | public void toStringTest() { 93 | Assertions.assertEquals("very funny text", this.cured.toString()); 94 | } 95 | 96 | @AfterEach 97 | public void cleanup() { 98 | this.cured.close(); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /bindings/native/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.aarch64-unknown-linux-gnu] 2 | linker = "aarch64-linux-gnu-gcc" 3 | [target.aarch64-unknown-linux-musl] 4 | linker = "aarch64-linux-musl-gcc" 5 | [target.armv7-unknown-linux-gnueabihf] 6 | linker = "arm-linux-gnueabihf-gcc" -------------------------------------------------------------------------------- /bindings/native/.clang-format: -------------------------------------------------------------------------------- 1 | AttributeMacros: ['DECANCER_CXX_EXPORT', 'DECANCER_EXPORT'] 2 | TypenameMacros: ['DECANCER_EXPORT_NAME'] 3 | 4 | AccessModifierOffset: -2 5 | AlignAfterOpenBracket: BlockIndent 6 | AlignArrayOfStructures: Left 7 | AlignConsecutiveAssignments: None 8 | AlignConsecutiveBitFields: None 9 | AlignConsecutiveDeclarations: None 10 | AlignConsecutiveMacros: Consecutive 11 | AlignConsecutiveShortCaseStatements: 12 | Enabled: false 13 | AlignEscapedNewlines: Right 14 | AlignOperands: Align 15 | AlignTrailingComments: 16 | Kind: Always 17 | AllowAllArgumentsOnNextLine: false 18 | AllowAllParametersOfDeclarationOnNextLine: false 19 | AllowBreakBeforeNoexceptSpecifier: OnlyWithParen 20 | AllowShortBlocksOnASingleLine: Empty 21 | AllowShortCaseLabelsOnASingleLine: false 22 | AllowShortCompoundRequirementOnASingleLine: true 23 | AllowShortEnumsOnASingleLine: true 24 | AllowShortFunctionsOnASingleLine: Empty 25 | AllowShortIfStatementsOnASingleLine: Never 26 | AllowShortLoopsOnASingleLine: true 27 | AlwaysBreakBeforeMultilineStrings: false 28 | BinPackArguments: false 29 | BinPackParameters: false 30 | BitFieldColonSpacing: After 31 | BraceWrapping: 32 | AfterCaseLabel: false 33 | AfterClass: false 34 | AfterControlStatement: Never 35 | AfterEnum: false 36 | AfterFunction: false 37 | AfterNamespace: false 38 | AfterObjCDeclaration: false 39 | AfterStruct: false 40 | AfterUnion: false 41 | AfterExternBlock: false 42 | BeforeCatch: false 43 | BeforeElse: false 44 | BeforeLambdaBody: false 45 | BeforeWhile: false 46 | IndentBraces: false 47 | SplitEmptyFunction: false 48 | SplitEmptyRecord: false 49 | SplitEmptyNamespace: false 50 | BracedInitializerIndentWidth: 2 51 | BreakAdjacentStringLiterals: true 52 | BreakAfterAttributes: Leave 53 | BreakBeforeBinaryOperators: None 54 | BreakBeforeBraces: Custom 55 | BreakBeforeConceptDeclarations: Always 56 | BreakBeforeInlineASMColon: OnlyMultiline 57 | BreakBeforeTernaryOperators: true 58 | BreakStringLiterals: false 59 | ColumnLimit: 0 60 | CompactNamespaces: false 61 | ConstructorInitializerIndentWidth: 2 62 | ContinuationIndentWidth: 2 63 | Cpp11BracedListStyle: true 64 | DerivePointerAlignment: false 65 | EmptyLineAfterAccessModifier: Never 66 | EmptyLineBeforeAccessModifier: LogicalBlock 67 | FixNamespaceComments: true 68 | IncludeBlocks: Regroup 69 | IndentAccessModifiers: false 70 | IndentCaseBlocks: false 71 | IndentCaseLabels: false 72 | IndentExternBlock: Indent 73 | IndentGotoLabels: false 74 | IndentPPDirectives: None 75 | IndentRequiresClause: false 76 | IndentWidth: 2 77 | IndentWrappedFunctionNames: false 78 | InsertBraces: true 79 | InsertNewlineAtEOF: false 80 | InsertTrailingCommas: Wrapped 81 | IntegerLiteralSeparator: 82 | Binary: -1 83 | BinaryMinDigits: -1 84 | Decimal: -1 85 | DecimalMinDigits: -1 86 | Hex: -1 87 | HexMinDigits: -1 88 | KeepEmptyLinesAtEOF: false 89 | KeepEmptyLinesAtTheStartOfBlocks: false 90 | Language: Cpp 91 | LineEnding: LF 92 | MaxEmptyLinesToKeep: 1 93 | NamespaceIndentation: All 94 | PackConstructorInitializers: NextLine 95 | PointerAlignment: Left 96 | QualifierAlignment: Left 97 | ReferenceAlignment: Left 98 | RemoveParentheses: MultipleParentheses 99 | RemoveSemicolon: true 100 | RequiresClausePosition: OwnLine 101 | SeparateDefinitionBlocks: Always 102 | ShortNamespaceLines: 0 103 | SkipMacroDefinitionBody: true 104 | SortIncludes: Never 105 | SortUsingDeclarations: Lexicographic 106 | SpaceAfterCStyleCast: false 107 | SpaceAfterLogicalNot: false 108 | SpaceAfterTemplateKeyword: false 109 | SpaceBeforeAssignmentOperators: true 110 | SpaceBeforeCaseColon: false 111 | SpaceBeforeCpp11BracedList: false 112 | SpaceBeforeCtorInitializerColon: false 113 | SpaceBeforeInheritanceColon: false 114 | SpaceBeforeParens: Custom 115 | SpaceBeforeParensOptions: 116 | AfterControlStatements: true 117 | AfterForeachMacros: false 118 | AfterFunctionDeclarationName: false 119 | AfterFunctionDefinitionName: false 120 | AfterIfMacros: false 121 | AfterOverloadedOperator: false 122 | AfterRequiresInClause: false 123 | AfterRequiresInExpression: false 124 | BeforeNonEmptyParentheses: false 125 | SpaceBeforeRangeBasedForLoopColon: false 126 | SpaceBeforeSquareBrackets: false 127 | SpaceInEmptyBlock: false 128 | SpacesBeforeTrailingComments: 1 129 | SpacesInAngles: Never 130 | SpacesInLineCommentPrefix: 131 | Minimum: 1 132 | Maximum: -1 133 | SpacesInParens: Custom 134 | SpacesInParensOptions: 135 | InConditionalStatements: false 136 | InCStyleCasts: false 137 | InEmptyParentheses: false 138 | Other: false 139 | SpacesInSquareBrackets: false 140 | Standard: c++17 141 | UseTab: Never -------------------------------------------------------------------------------- /bindings/native/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decancer_native" 3 | version = "3.3.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | name = "decancer" 9 | crate-type = ["cdylib"] 10 | 11 | [lints.clippy] 12 | all = { level = "warn", priority = -1 } 13 | pedantic = { level = "warn", priority = -1 } 14 | cast-lossless = "allow" 15 | cast-possible-truncation = "allow" 16 | cast-possible-wrap = "allow" 17 | missing-panics-doc = "allow" 18 | missing-safety-doc = "allow" 19 | missing-transmute-annotations = "allow" 20 | single-match-else = "allow" 21 | 22 | [dependencies] 23 | decancer = { path = "../../core", default-features = false } 24 | 25 | [target."cfg(windows)".build-dependencies] 26 | embed-resource = "2" 27 | 28 | [features] 29 | default = ["options", "leetspeak"] 30 | options = ["decancer/options"] 31 | leetspeak = ["decancer/leetspeak"] -------------------------------------------------------------------------------- /bindings/native/README.md: -------------------------------------------------------------------------------- 1 | 3 | 4 | # decancer [![npm][npm-image]][npm-url] [![crates.io][crates-io-image]][crates-io-url] [![npm downloads][npm-downloads-image]][npm-url] [![crates.io downloads][crates-io-downloads-image]][crates-io-url] [![codacy][codacy-image]][codacy-url] [![ko-fi][ko-fi-brief-image]][ko-fi-url] 5 | 6 | [crates-io-image]: https://img.shields.io/crates/v/decancer?style=flat-square 7 | [crates-io-downloads-image]: https://img.shields.io/crates/d/decancer?style=flat-square 8 | [crates-io-url]: https://crates.io/crates/decancer 9 | [npm-image]: https://img.shields.io/npm/v/decancer.svg?style=flat-square 10 | [npm-url]: https://npmjs.org/package/decancer 11 | [npm-downloads-image]: https://img.shields.io/npm/dt/decancer.svg?style=flat-square 12 | [codacy-image]: https://app.codacy.com/project/badge/Grade/d740b1aa867d42f2b37eb992ad73784a 13 | [codacy-url]: https://app.codacy.com/gh/null8626/decancer/dashboard 14 | [ko-fi-brief-image]: https://img.shields.io/badge/donations-ko--fi-red?color=ff5e5b&style=flat-square 15 | [ko-fi-image]: https://ko-fi.com/img/githubbutton_sm.svg 16 | [ko-fi-url]: https://ko-fi.com/null8626 17 | 18 | A library that removes common unicode confusables/homoglyphs from strings. 19 | 20 | - Its core is written in [Rust](https://www.rust-lang.org) and utilizes a form of [**Binary Search**](https://en.wikipedia.org/wiki/Binary_search_algorithm) to ensure speed! 21 | - By default, it's capable of filtering **221,529 (19.88%) different unicode codepoints** like: 22 | - All [whitespace characters](https://en.wikipedia.org/wiki/Whitespace_character) 23 | - All [diacritics](https://en.wikipedia.org/wiki/Diacritic), this also eliminates all forms of [Zalgo text](https://en.wikipedia.org/wiki/Zalgo_text) 24 | - Most [leetspeak characters](https://en.wikipedia.org/wiki/Leet) 25 | - Most [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) 26 | - Several emojis 27 | - Unlike other packages, this package is **[unicode bidi-aware](https://en.wikipedia.org/wiki/Bidirectional_text)** where it also interprets right-to-left characters in the same way as it were to be rendered by an application! 28 | - Its behavior is also highly customizable to your liking! 29 | 30 | ## Installation 31 | ### Download 32 | 33 | - [Header file](https://raw.githubusercontent.com/null8626/decancer/v3.3.0/bindings/native/decancer.h) 34 | - [Download for ARM64 macOS (11.0+, Big Sur+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-apple-darwin.zip) 35 | - [Download for ARM64 iOS](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-apple-ios.zip) 36 | - [Download for Apple iOS Simulator on ARM6](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-apple-ios-sim.zip) 37 | - [Download for ARM64 Android](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-linux-android.zip) 38 | - [Download for ARM64 Windows MSVC](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-pc-windows-msvc.zip) 39 | - [Download for ARM64 Linux (kernel 4.1, glibc 2.17+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-unknown-linux-gnu.zip) 40 | - [Download for ARM64 Linux with MUSL](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-aarch64-unknown-linux-musl.zip) 41 | - [Download for ARMv6 Linux (kernel 3.2, glibc 2.17)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-arm-unknown-linux-gnueabi.zip) 42 | - [Download for ARMv5TE Linux (kernel 4.4, glibc 2.23)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-armv5te-unknown-linux-gnueabi.zip) 43 | - [Download for ARMv7-A Android](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-armv7-linux-androideabi.zip) 44 | - [Download for ARMv7-A Linux (kernel 4.15, glibc 2.27)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-armv7-unknown-linux-gnueabi.zip) 45 | - [Download for ARMv7-A Linux, hardfloat (kernel 3.2, glibc 2.17)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-armv7-unknown-linux-gnueabihf.zip) 46 | - [Download for 32-bit Linux w/o SSE (kernel 3.2, glibc 2.17)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-i586-unknown-linux-gnu.zip) 47 | - [Download for 32-bit MSVC (Windows 7+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-i686-pc-windows-msvc.zip) 48 | - [Download for 32-bit FreeBSD](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-i686-unknown-freebsd.zip) 49 | - [Download for 32-bit Linux (kernel 3.2+, glibc 2.17+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-i686-unknown-linux-gnu.zip) 50 | - [Download for PPC64LE Linux (kernel 3.10, glibc 2.17)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-powerpc64le-unknown-linux-gnu.zip) 51 | - [Download for RISC-V Linux (kernel 4.20, glibc 2.29)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-riscv64gc-unknown-linux-gnu.zip) 52 | - [Download for S390x Linux (kernel 3.2, glibc 2.17)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-s390x-unknown-linux-gnu.zip) 53 | - [Download for SPARC Solaris 11, illumos](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-sparcv9-sun-solaris.zip) 54 | - [Download for Thumb2-mode ARMv7-A Linux with NEON (kernel 4.4, glibc 2.23)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-thumbv7neon-unknown-linux-gnueabihf.zip) 55 | - [Download for 64-bit macOS (10.12+, Sierra+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-apple-darwin.zip) 56 | - [Download for 64-bit iOS](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-apple-ios.zip) 57 | - [Download for 64-bit MSVC (Windows 7+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-pc-windows-msvc.zip) 58 | - [Download for 64-bit FreeBSD](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-unknown-freebsd.zip) 59 | - [Download for 64-bit illumos](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-unknown-illumos.zip) 60 | - [Download for 64-bit Linux (kernel 3.2+, glibc 2.17+)](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-unknown-linux-gnu.zip) 61 | - [Download for 64-bit Linux with MUSL](https://github.com/null8626/decancer/releases/download/v3.3.0/decancer-x86_64-unknown-linux-musl.zip) 62 | 63 | ### Building from source 64 | 65 | Building from source requires [Rust v1.65 or later](https://rustup.rs/). 66 | 67 | ```sh 68 | git clone https://github.com/null8626/decancer.git --depth 1 69 | cd decancer/bindings/native 70 | cargo build --release 71 | ``` 72 | 73 | And the binary files should be generated in the `target/release` directory. 74 | ## Examples 75 | For more information, please read the [documentation](https://null8626.github.io/decancer/native_docs). 76 | 77 | UTF-8 example: 78 | 79 | ```c 80 | #include 81 | 82 | #include 83 | #include 84 | #include 85 | 86 | #define decancer_assert(expr, notes) \ 87 | if (!(expr)) { \ 88 | fprintf(stderr, "assertion failure at " notes "\n"); \ 89 | ret = 1; \ 90 | goto END; \ 91 | } 92 | 93 | int main(void) { 94 | int ret = 0; 95 | 96 | // UTF-8 bytes for "vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣" 97 | uint8_t input[] = {0x76, 0xef, 0xbc, 0xa5, 0xe2, 0x93, 0xa1, 0xf0, 0x9d, 0x94, 0x82, 0x20, 0xf0, 0x9d, 98 | 0x94, 0xbd, 0xf0, 0x9d, 0x95, 0x8c, 0xc5, 0x87, 0xe2, 0x84, 0x95, 0xef, 0xbd, 0x99, 99 | 0x20, 0xc5, 0xa3, 0xe4, 0xb9, 0x87, 0xf0, 0x9d, 0x95, 0x8f, 0xf0, 0x9d, 0x93, 0xa3}; 100 | 101 | decancer_error_t error; 102 | decancer_cured_t cured = decancer_cure(input, sizeof(input), DECANCER_OPTION_DEFAULT, &error); 103 | 104 | if (cured == NULL) { 105 | fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message); 106 | return 1; 107 | } 108 | 109 | decancer_assert(decancer_contains(cured, "funny", 5), "decancer_contains"); 110 | 111 | END: 112 | decancer_cured_free(cured); 113 | return ret; 114 | } 115 | ``` 116 | 117 | UTF-16 example: 118 | 119 | ```c 120 | #include 121 | 122 | #include 123 | #include 124 | #include 125 | 126 | #define decancer_assert(expr, notes) \ 127 | if (!(expr)) { \ 128 | fprintf(stderr, "assertion failure at " notes "\n"); \ 129 | ret = 1; \ 130 | goto END; \ 131 | } 132 | 133 | int main(void) { 134 | int ret = 0; 135 | 136 | // UTF-16 bytes for "vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣" 137 | uint16_t input[] = { 138 | 0x0076, 0xff25, 0x24e1, 139 | 0xd835, 0xdd02, 0x0020, 140 | 0xd835, 0xdd3d, 0xd835, 141 | 0xdd4c, 0x0147, 0x2115, 142 | 0xff59, 0x0020, 0x0163, 143 | 0x4e47, 0xd835, 0xdd4f, 144 | 0xd835, 0xdce3 145 | }; 146 | 147 | // UTF-16 bytes for "funny" 148 | uint16_t funny[] = { 0x66, 0x75, 0x6e, 0x6e, 0x79 }; 149 | 150 | decancer_error_t error; 151 | decancer_cured_t cured = decancer_cure_utf16(input, sizeof(input) / sizeof(uint16_t), DECANCER_OPTION_DEFAULT, &error); 152 | 153 | if (cured == NULL) { 154 | fprintf(stderr, "curing error: %.*s\n", (int)error.message_length, error.message); 155 | return 1; 156 | } 157 | 158 | decancer_assert(decancer_contains_utf16(cured, funny, sizeof(funny) / sizeof(uint16_t)), "decancer_contains_utf16"); 159 | 160 | END: 161 | decancer_cured_free(cured); 162 | return ret; 163 | } 164 | ``` 165 | ## Donations 166 | 167 | If you want to support my eyes for manually looking at thousands of unicode characters, consider donating! ❤ 168 | 169 | [![ko-fi][ko-fi-image]][ko-fi-url] 170 | -------------------------------------------------------------------------------- /bindings/native/build.rs: -------------------------------------------------------------------------------- 1 | #[cfg(windows)] 2 | fn embed() { 3 | println!("cargo:rerun-if-changed=decancer.rc"); 4 | 5 | let version = env!("CARGO_PKG_VERSION"); 6 | let version_digits = version.split('.').collect::>(); 7 | 8 | let rc_version_major = format!("DECANCER_RC_VERSION_MAJOR={}", version_digits[0]); 9 | let rc_version_minor = format!("DECANCER_RC_VERSION_MINOR={}", version_digits[1]); 10 | let rc_version_patch = format!("DECANCER_RC_VERSION_PATCH={}", version_digits[2]); 11 | let rc_version = format!("DECANCER_RC_VERSION=\"{version}\""); 12 | 13 | embed_resource::compile( 14 | "decancer.rc", 15 | [ 16 | &rc_version_major, 17 | &rc_version_minor, 18 | &rc_version_patch, 19 | &rc_version, 20 | ], 21 | ); 22 | } 23 | 24 | fn main() { 25 | #[cfg(windows)] 26 | embed(); 27 | } 28 | -------------------------------------------------------------------------------- /bindings/native/decancer.rc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | VS_VERSION_INFO VERSIONINFO 4 | FILEVERSION DECANCER_RC_VERSION_MAJOR,DECANCER_RC_VERSION_MINOR,DECANCER_RC_VERSION_PATCH,0 5 | PRODUCTVERSION DECANCER_RC_VERSION_MAJOR,DECANCER_RC_VERSION_MINOR,DECANCER_RC_VERSION_PATCH,0 6 | FILEFLAGSMASK VS_FFI_FILEFLAGSMASK 7 | 8 | FILEFLAGS 0 9 | FILEOS VOS_NT_WINDOWS32 10 | FILETYPE VFT_DLL 11 | FILESUBTYPE VFT2_UNKNOWN 12 | 13 | BEGIN 14 | BLOCK "StringFileInfo" 15 | BEGIN 16 | BLOCK "040904B0" 17 | BEGIN 18 | VALUE "CompanyName", "null8626" 19 | VALUE "FileDescription", "A library that removes common unicode confusables/homoglyphs from strings." 20 | VALUE "FileVersion", DECANCER_RC_VERSION 21 | VALUE "ProductVersion", DECANCER_RC_VERSION 22 | VALUE "ProductName", "decancer" 23 | VALUE "InternalName", "decancer" 24 | VALUE "LegalCopyright", "Copyright (C) 2021-2025 null - Licensed under the MIT License." 25 | VALUE "OriginalFilename", "decancer.dll" 26 | END 27 | END 28 | 29 | BLOCK "VarFileInfo" 30 | BEGIN 31 | VALUE "Translation", 0x409, 1200 32 | END 33 | END -------------------------------------------------------------------------------- /bindings/native/docs/docgen.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdirSync, readFileSync, writeFileSync } from 'node:fs' 6 | import { execSync } from 'node:child_process' 7 | import { dirname, join } from 'node:path' 8 | import { fileURLToPath } from 'node:url' 9 | 10 | const ROOT_DIR = dirname(fileURLToPath(import.meta.url)) 11 | const HTML_DIR = join(ROOT_DIR, 'html') 12 | let XMLParser 13 | 14 | try { 15 | XMLParser = (await import('fast-xml-parser')).XMLParser 16 | } catch { 17 | try { 18 | execSync('npm init -y && npm i fast-xml-parser --save') 19 | } catch { 20 | process.exit(1) 21 | } 22 | 23 | XMLParser = (await import('fast-xml-parser')).XMLParser 24 | } 25 | 26 | function renderAPIHTML(parts) { 27 | let rendered = 28 | '
No such query exists :(
' 29 | 30 | for (const [name, members] of Object.entries(parts)) { 31 | rendered += `
${name}
` 32 | 33 | for (const member of members) { 34 | rendered += `${member.name}` 35 | } 36 | 37 | rendered += '
' 38 | } 39 | 40 | return `${rendered}
` 41 | } 42 | 43 | try { 44 | execSync('doxygen', { 45 | cwd: ROOT_DIR, 46 | stdio: 'inherit' 47 | }) 48 | } catch { 49 | process.exit(1) 50 | } 51 | 52 | const parser = new XMLParser({ 53 | ignoreAttributes: false 54 | }) 55 | 56 | const index = parser.parse( 57 | readFileSync(join(ROOT_DIR, 'xml', 'index.xml')).toString() 58 | ) 59 | 60 | const typeDefinitions = [] 61 | const macros = [] 62 | const functions = [] 63 | 64 | for (const compound of index.doxygenindex.compound) { 65 | if (compound['@_kind'] === 'struct') { 66 | typeDefinitions.push({ 67 | name: compound.name, 68 | href: `${compound['@_refid']}.html` 69 | }) 70 | } else if (compound.name === 'decancer.h') { 71 | for (const member of compound.member) { 72 | const data = { 73 | name: member.name, 74 | href: member['@_refid'].replace( 75 | /_1([a-f0-9]+)$/, 76 | (_, x) => `.html#${x}` 77 | ) 78 | } 79 | 80 | switch (member['@_kind']) { 81 | case 'define': 82 | macros.push(data) 83 | break 84 | 85 | case 'typedef': 86 | typeDefinitions.push(data) 87 | break 88 | 89 | case 'function': 90 | functions.push(data) 91 | } 92 | } 93 | } 94 | } 95 | 96 | const renderedAPIHTML = renderAPIHTML({ 97 | Functions: functions, 98 | 'Type definitions': typeDefinitions, 99 | Macros: macros 100 | }) 101 | 102 | for (const htmlFile of readdirSync(HTML_DIR).filter(file => 103 | file.endsWith('.html') 104 | )) { 105 | const htmlFilePath = join(HTML_DIR, htmlFile) 106 | const htmlFileContents = readFileSync(htmlFilePath).toString() 107 | 108 | try { 109 | writeFileSync( 110 | htmlFilePath, 111 | htmlFileContents.replace('$apis', renderedAPIHTML) 112 | ) 113 | } catch {} 114 | } 115 | -------------------------------------------------------------------------------- /bindings/native/docs/header.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | decancer 14 | 15 | 16 | 17 | 18 | 19 | $extrastylesheet 20 | 21 | 22 |
23 |
24 |
25 | $projectname 26 | $projectnumber 27 |
28 |
$projectbrief
29 | $searchbox 30 |
31 | $apis 32 |
-------------------------------------------------------------------------------- /bindings/native/docs/script.js: -------------------------------------------------------------------------------- 1 | class ToggleableElement { 2 | constructor(elem) { 3 | this.enabled = true 4 | this.elem = elem 5 | } 6 | 7 | enable() { 8 | this.enabled = true 9 | this.elem.style.display = 'block' 10 | } 11 | 12 | disable() { 13 | this.enabled = false 14 | this.elem.style.display = 'none' 15 | } 16 | 17 | resolve(text) { 18 | if (this.elem.innerHTML.toLowerCase().includes(text)) { 19 | this.enable() 20 | } else { 21 | this.disable() 22 | } 23 | } 24 | } 25 | 26 | class ToggleableGroup { 27 | constructor(elem) { 28 | this.titleElement = new ToggleableElement(elem) 29 | this.elems = [] 30 | this.enabled = true 31 | } 32 | 33 | setGroupElement(elem) { 34 | this.groupElement = new ToggleableElement(elem) 35 | } 36 | 37 | push(elem) { 38 | this.elems.push(new ToggleableElement(elem)) 39 | } 40 | 41 | resolve(search) { 42 | this.elems.forEach(elem => elem.resolve(search)) 43 | 44 | if (this.elems.some(elem => elem.enabled)) { 45 | this.titleElement.enable() 46 | this.groupElement.enable() 47 | this.enabled = true 48 | } else { 49 | this.titleElement.disable() 50 | this.groupElement.disable() 51 | this.enabled = false 52 | } 53 | } 54 | } 55 | 56 | class APIsWrapper extends ToggleableElement { 57 | constructor(elem) { 58 | super(elem) 59 | this.topElem = document.getElementById('top') 60 | } 61 | 62 | enable() { 63 | this.topElem.className = 'top-enabled' 64 | super.enable() 65 | } 66 | 67 | disable() { 68 | this.topElem.removeAttribute('class') 69 | super.disable() 70 | } 71 | } 72 | 73 | let docContent 74 | let APIs 75 | let APINotFound 76 | 77 | let mobile = null 78 | let previousMarkedElem = null 79 | let searchIsEmpty = true 80 | const API = [] 81 | const elementsToScrollInto = [] 82 | 83 | function resolveSearch(event) { 84 | const search = document.querySelector('input').value.toLowerCase() 85 | searchIsEmpty = search.length === 0 86 | 87 | if (mobile) { 88 | if (searchIsEmpty) { 89 | docContent.enable() 90 | APIs.disable() 91 | return 92 | } 93 | 94 | docContent.disable() 95 | APIs.enable() 96 | } 97 | 98 | API.forEach(APIGroup => APIGroup.resolve(search)) 99 | 100 | if (API.some(APIGroup => APIGroup.enabled)) { 101 | APINotFound.disable() 102 | } else { 103 | APINotFound.enable() 104 | } 105 | } 106 | 107 | function resolveBoundaries() { 108 | const previous = mobile 109 | 110 | mobile = window.innerWidth < 768 111 | 112 | if (previous !== mobile) { 113 | if (mobile) { 114 | if (searchIsEmpty) { 115 | APIs.disable() 116 | } else { 117 | docContent.disable() 118 | } 119 | } else { 120 | APIs.enable() 121 | docContent.enable() 122 | 123 | if (previousMarkedElem) { 124 | previousMarkedElem.style.fontWeight = 'bold' 125 | elementsToScrollInto.push(previousMarkedElem) 126 | previousMarkedElem.scrollIntoView() 127 | } 128 | } 129 | } 130 | } 131 | 132 | function markAPIAsSelected(name) { 133 | previousMarkedElem = [...document.querySelectorAll('#api')].find( 134 | x => x.innerHTML === name 135 | ) 136 | previousMarkedElem.style.fontWeight = 'bold' 137 | elementsToScrollInto.push(previousMarkedElem) 138 | previousMarkedElem.scrollIntoView() 139 | } 140 | 141 | window.addEventListener('load', () => { 142 | const headerTitle = document.querySelector('div.header .title') 143 | const path = new URL(window.location).pathname 144 | const structMatch = path.match(/\/struct(\w+)(\.html)?\/?$/) 145 | 146 | docContent = new ToggleableElement(document.getElementById('doc-content')) 147 | APIs = new APIsWrapper(document.getElementById('apis')) 148 | APINotFound = new ToggleableElement(document.getElementById('api-not-found')) 149 | APINotFound.disable() 150 | 151 | for (const APIElement of document.getElementById('apis').children) { 152 | switch (APIElement.id) { 153 | case 'apitype': 154 | API.push(new ToggleableGroup(APIElement)) 155 | break 156 | case 'apilist': 157 | const currentAPIIndex = API.length - 1 158 | 159 | API[currentAPIIndex].setGroupElement(APIElement) 160 | 161 | for (const APIElementChild of APIElement.children) { 162 | API[currentAPIIndex].push(APIElementChild) 163 | } 164 | break 165 | } 166 | } 167 | 168 | resolveBoundaries() 169 | window.addEventListener('resize', resolveBoundaries) 170 | 171 | if (/\/decancer_8h(\.html)?$/.test(new URL(window.location).pathname)) { 172 | const hash = (window.location.hash || '').replace(/^#/, '') 173 | 174 | if (hash.length) { 175 | let hashFlag = false 176 | let foundMatchingHash = false 177 | let previousChild 178 | 179 | for (const child of document.querySelector('.contents').children) { 180 | if (child.id === hash) { 181 | previousChild = child 182 | hashFlag = true 183 | } else if (hashFlag) { 184 | foundMatchingHash = true 185 | markAPIAsSelected( 186 | [...child.childNodes] 187 | .find(x => x.nodeType === Node.TEXT_NODE) 188 | .nodeValue.replace(/\(\)?/, '') 189 | ) 190 | elementsToScrollInto.push(previousChild) 191 | break 192 | } 193 | } 194 | 195 | if (!foundMatchingHash) { 196 | const matchingElement = [ 197 | ...document.querySelectorAll('.contents table tbody tr') 198 | ].find(x => x.id === `r_${hash}`) 199 | 200 | if (matchingElement) { 201 | markAPIAsSelected(matchingElement.children[1].children[0].innerHTML) 202 | elementsToScrollInto.push(matchingElement) 203 | } 204 | } 205 | } 206 | 207 | for (const link of document.querySelectorAll('.memItemRight a')) { 208 | link.style.fontWeight = 'bold' 209 | } 210 | 211 | for (const returnDoc of document.querySelectorAll('.return dd')) { 212 | returnDoc.innerHTML = returnDoc.innerHTML.replace(/^(const )?\w+\*? /, '') 213 | } 214 | 215 | document.querySelector('.contents p').remove() 216 | 217 | for (const textBlock of document.querySelectorAll('.contents .textblock')) { 218 | textBlock.remove() 219 | } 220 | 221 | document.querySelector('.contents p a').remove() 222 | 223 | headerTitle.remove() 224 | } else if (structMatch) { 225 | const p = [...document.querySelectorAll('.contents p')] 226 | const structName = structMatch[1].replaceAll('__', '_') 227 | 228 | markAPIAsSelected(structName) 229 | 230 | p[0].remove() 231 | p[1].remove() 232 | 233 | const stfu = [...document.querySelector('.contents').childNodes].find( 234 | n => 235 | n.nodeType === Node.TEXT_NODE && 236 | n.nodeValue === 237 | 'The documentation for this struct was generated from the following file:' 238 | ) 239 | 240 | stfu.remove() 241 | 242 | document.querySelector('.contents ul:last-child').remove() 243 | 244 | let childFlag = false 245 | 246 | for (const child of [...document.querySelector('.contents').children]) { 247 | if ( 248 | child.className === 'groupheader' && 249 | child.innerHTML === 'Detailed Description' 250 | ) { 251 | childFlag = true 252 | child.remove() 253 | } else if (childFlag && child.className === 'textblock') { 254 | const currentContents = document.querySelector('.contents') 255 | 256 | currentContents.insertBefore(child, currentContents.firstChild) 257 | break 258 | } 259 | } 260 | 261 | headerTitle.innerHTML = structName 262 | headerTitle.style.visibility = 'visible' 263 | elementsToScrollInto.push(headerTitle) 264 | } else { 265 | headerTitle.remove() 266 | } 267 | 268 | for (const since of document.querySelectorAll('.since dd')) { 269 | const version = since.innerHTML.trim() 270 | 271 | since.innerHTML = `v${version}` 272 | } 273 | 274 | try { 275 | document.querySelector('#doc-content #MSearchSelectWindow').remove() 276 | } catch {} 277 | 278 | try { 279 | document.querySelector('#doc-content #MSearchResultsWindow').remove() 280 | } catch {} 281 | 282 | for (const stfu of [...document.querySelectorAll('.summary')]) { 283 | stfu.remove() 284 | } 285 | 286 | document.addEventListener('click', event => { 287 | const elem = event.target || event.srcElement 288 | 289 | if (elem && elem.id === 'api') { 290 | if (previousMarkedElem) { 291 | previousMarkedElem.style.fontWeight = 'normal' 292 | } 293 | 294 | previousMarkedElem = elem 295 | elem.style.fontWeight = 'bold' 296 | elem.scrollIntoView() 297 | 298 | if (mobile) { 299 | APIs.disable() 300 | docContent.enable() 301 | } 302 | } 303 | }) 304 | 305 | for (const memname of document.querySelectorAll('td.memname')) { 306 | memname.innerHTML = memname.innerHTML.replace(/^DECANCER_EXPORT /, '') 307 | } 308 | 309 | const input = document.querySelector('input') 310 | 311 | window.addEventListener('keydown', event => { 312 | if (event.ctrlKey && event.key.toLowerCase() === 'f') { 313 | input.focus() 314 | event.preventDefault() 315 | } 316 | }) 317 | 318 | const search = document.querySelector('input') 319 | 320 | search.removeAttribute('onfocus') 321 | search.removeAttribute('onblur') 322 | search.setAttribute('onkeyup', 'resolveSearch(event)') 323 | 324 | document.querySelector('html').style.visibility = 'visible' 325 | 326 | while (true) { 327 | const elem = elementsToScrollInto.shift() 328 | 329 | if (!elem) { 330 | break 331 | } 332 | 333 | elem.scrollIntoView() 334 | } 335 | }) 336 | -------------------------------------------------------------------------------- /bindings/native/docs/search.css: -------------------------------------------------------------------------------- 1 | /* copied from doxygen's search/search.css source file */ 2 | 3 | /*---------------- Search Box */ 4 | 5 | #MSearchBox { 6 | position: absolute; 7 | right: 5px; 8 | } 9 | /*---------------- Search box styling */ 10 | 11 | .SRPage * { 12 | font-weight: normal; 13 | line-height: normal; 14 | } 15 | 16 | dark-mode-toggle { 17 | margin-left: 5px; 18 | display: flex; 19 | float: right; 20 | } 21 | 22 | #MSearchBox { 23 | display: inline-block; 24 | white-space: nowrap; 25 | background: black; 26 | border-radius: 0.65em; 27 | box-shadow: inset 0.5px 0.5px 3px 0px #2f436c; 28 | z-index: 102; 29 | } 30 | 31 | #MSearchBox .left { 32 | display: inline-block; 33 | vertical-align: middle; 34 | height: 1.4em; 35 | } 36 | 37 | #MSearchSelect { 38 | display: inline-block; 39 | vertical-align: middle; 40 | width: 20px; 41 | height: 19px; 42 | background-image: url('mag_seld.svg'); 43 | margin: 0 0 0 0.3em; 44 | padding: 0; 45 | } 46 | 47 | #MSearchSelectExt { 48 | display: inline-block; 49 | vertical-align: middle; 50 | width: 10px; 51 | height: 19px; 52 | background-image: url('mag_d.svg'); 53 | margin: 0 0 0 0.5em; 54 | padding: 0; 55 | } 56 | 57 | #MSearchField { 58 | display: inline-block; 59 | vertical-align: middle; 60 | width: 7.5em; 61 | height: 19px; 62 | margin: 0 0.15em; 63 | padding: 0; 64 | line-height: 1em; 65 | border: none; 66 | color: #c5c5c5; 67 | outline: none; 68 | font-family: Arial, Verdana, sans-serif; 69 | -webkit-border-radius: 0px; 70 | border-radius: 0px; 71 | background: none; 72 | } 73 | 74 | @media (hover: none) { 75 | /* to avoid zooming on iOS */ 76 | #MSearchField { 77 | font-size: 16px; 78 | } 79 | } 80 | 81 | #MSearchBox .right { 82 | display: inline-block; 83 | vertical-align: middle; 84 | width: 1.4em; 85 | height: 1.4em; 86 | } 87 | 88 | #MSearchClose { 89 | display: none; 90 | font-size: inherit; 91 | background: none; 92 | border: none; 93 | margin: 0; 94 | padding: 0; 95 | outline: none; 96 | } 97 | 98 | #MSearchCloseImg { 99 | padding: 0.3em; 100 | margin: 0; 101 | } 102 | 103 | .MSearchBoxActive #MSearchField { 104 | color: #c5c5c5; 105 | } 106 | 107 | /*---------------- Search filter selection */ 108 | 109 | #MSearchSelectWindow { 110 | display: none; 111 | position: absolute; 112 | left: 0; 113 | top: 0; 114 | border: 1px solid #7c95c6; 115 | background-color: #101826; 116 | z-index: 10001; 117 | padding-top: 4px; 118 | padding-bottom: 4px; 119 | -moz-border-radius: 4px; 120 | -webkit-border-top-left-radius: 4px; 121 | -webkit-border-top-right-radius: 4px; 122 | -webkit-border-bottom-left-radius: 4px; 123 | -webkit-border-bottom-right-radius: 4px; 124 | -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); 125 | } 126 | 127 | .SelectItem { 128 | font: 129 | 8pt Arial, 130 | Verdana, 131 | sans-serif; 132 | padding-left: 2px; 133 | padding-right: 12px; 134 | border: 0px; 135 | } 136 | 137 | span.SelectionMark { 138 | margin-right: 4px; 139 | font-family: 140 | 'JetBrains Mono', Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace, 141 | fixed; 142 | outline-style: none; 143 | text-decoration: none; 144 | } 145 | 146 | a.SelectItem { 147 | display: block; 148 | outline-style: none; 149 | color: #90a5ce; 150 | text-decoration: none; 151 | padding-left: 6px; 152 | padding-right: 12px; 153 | } 154 | 155 | a.SelectItem:focus, 156 | a.SelectItem:active { 157 | color: #90a5ce; 158 | outline-style: none; 159 | text-decoration: none; 160 | } 161 | 162 | a.SelectItem:hover { 163 | color: #bcc9e2; 164 | background-color: #283a5d; 165 | outline-style: none; 166 | text-decoration: none; 167 | cursor: pointer; 168 | display: block; 169 | } 170 | 171 | /*---------------- Search results window */ 172 | 173 | iframe#MSearchResults { 174 | /*width: 60ex;*/ 175 | height: 15em; 176 | } 177 | 178 | #MSearchResultsWindow { 179 | display: none; 180 | position: absolute; 181 | left: 0; 182 | top: 0; 183 | border: 1px solid #7c95c6; 184 | background-color: #101826; 185 | z-index: 10000; 186 | width: 300px; 187 | height: 400px; 188 | overflow: auto; 189 | } 190 | 191 | /* ----------------------------------- */ 192 | 193 | #SRIndex { 194 | clear: both; 195 | } 196 | 197 | .SREntry { 198 | font-size: 10pt; 199 | padding-left: 1ex; 200 | } 201 | 202 | .SRPage .SREntry { 203 | font-size: 8pt; 204 | padding: 1px 5px; 205 | } 206 | 207 | div.SRPage { 208 | margin: 5px 2px; 209 | background-color: #101826; 210 | } 211 | 212 | .SRChildren { 213 | padding-left: 3ex; 214 | padding-bottom: 0.5em; 215 | } 216 | 217 | .SRPage .SRChildren { 218 | display: none; 219 | } 220 | 221 | .SRSymbol { 222 | font-weight: bold; 223 | color: #90a5ce; 224 | font-family: Arial, Verdana, sans-serif; 225 | text-decoration: none; 226 | outline: none; 227 | } 228 | 229 | a.SRScope { 230 | display: block; 231 | color: #90a5ce; 232 | font-family: Arial, Verdana, sans-serif; 233 | font-size: 8pt; 234 | text-decoration: none; 235 | outline: none; 236 | } 237 | 238 | a.SRSymbol:focus, 239 | a.SRSymbol:active, 240 | a.SRScope:focus, 241 | a.SRScope:active { 242 | text-decoration: underline; 243 | } 244 | 245 | span.SRScope { 246 | padding-left: 4px; 247 | font-family: Arial, Verdana, sans-serif; 248 | } 249 | 250 | .SRPage .SRStatus { 251 | padding: 2px 5px; 252 | font-size: 8pt; 253 | font-style: italic; 254 | font-family: Arial, Verdana, sans-serif; 255 | } 256 | 257 | .SRResult { 258 | display: none; 259 | } 260 | 261 | div.searchresults { 262 | margin-left: 10px; 263 | margin-right: 10px; 264 | } 265 | 266 | /*---------------- External search page results */ 267 | 268 | .pages b { 269 | color: white; 270 | padding: 5px 5px 3px 5px; 271 | background-image: url('../tab_ad.png'); 272 | background-repeat: repeat-x; 273 | text-shadow: 0 1px 1px #000000; 274 | } 275 | 276 | .pages { 277 | line-height: 17px; 278 | margin-left: 4px; 279 | text-decoration: none; 280 | } 281 | 282 | .hl { 283 | font-weight: bold; 284 | } 285 | 286 | #searchresults { 287 | margin-bottom: 20px; 288 | } 289 | 290 | .searchpages { 291 | margin-top: 10px; 292 | } 293 | -------------------------------------------------------------------------------- /bindings/native/docs/style.css: -------------------------------------------------------------------------------- 1 | html { 2 | --font-family: 'Inter'; 3 | --font-family-monospace: 'Roboto Mono'; 4 | --top-height: 150px; 5 | --searchbar-border-radius: 0px; 6 | } 7 | 8 | #MSearchField { 9 | padding: 0px 10px; 10 | } 11 | 12 | #projectname a, 13 | div.header .title, 14 | h1, 15 | h2.groupheader { 16 | font-weight: 900; 17 | } 18 | 19 | div.contents { 20 | margin: 0px auto var(--spacing-medium) auto; 21 | padding-bottom: var(--spacing-large); 22 | } 23 | 24 | .see a { 25 | font-family: var(--font-family-monospace); 26 | font-size: var(--memname-font-size); 27 | } 28 | 29 | a:hover { 30 | cursor: pointer; 31 | text-decoration: underline; 32 | } 33 | 34 | #apitype, 35 | h2.groupheader, 36 | #projectbrief { 37 | user-select: none; 38 | } 39 | 40 | h2.groupheader { 41 | box-shadow: none; 42 | } 43 | 44 | #apitype, 45 | #api-not-found, 46 | .see dd a { 47 | font-weight: bold; 48 | } 49 | 50 | #projectbrief { 51 | padding-bottom: 10px; 52 | } 53 | 54 | #top { 55 | border-bottom: none; 56 | } 57 | 58 | #top, 59 | #apis { 60 | overflow-x: hidden; 61 | } 62 | 63 | #apis { 64 | background-color: #191a1c; 65 | overflow-y: auto; 66 | } 67 | 68 | #apis, 69 | #apitype, 70 | #api, 71 | .note dd { 72 | font-size: var(--memname-font-size); 73 | } 74 | 75 | dl.section dd { 76 | margin-bottom: 0px; 77 | } 78 | 79 | #apitype { 80 | color: white; 81 | } 82 | 83 | #apilist { 84 | padding-left: calc(var(--spacing-large) + 10px); 85 | padding-right: var(--spacing-large); 86 | } 87 | 88 | #apilist:last-child { 89 | margin-bottom: 35px; 90 | } 91 | 92 | #api { 93 | font-family: var(--font-family-monospace); 94 | display: block; 95 | color: var(--primary-color); 96 | } 97 | 98 | #apitype { 99 | padding: calc(var(--spacing-medium) - 5px) var(--spacing-large); 100 | } 101 | 102 | #api-not-found { 103 | font-size: var(--page-font-size); 104 | text-align: center; 105 | margin-top: 50px; 106 | } 107 | 108 | #MSearchBox { 109 | right: 0px; 110 | } 111 | 112 | @media screen and (max-width: 1169px) { 113 | div.contents .textblock h1 a { 114 | display: none; 115 | } 116 | } 117 | 118 | @media screen and (min-width: 1170px) { 119 | div.contents .textblock h1 a { 120 | display: inline-block; 121 | } 122 | } 123 | 124 | @media screen and (max-width: 767px) { 125 | #MSearchBox { 126 | width: calc(100% - var(--spacing-medium) + var(--spacing-small)); 127 | margin-top: 0px; 128 | } 129 | 130 | div.memproto { 131 | padding: calc(var(--spacing-medium) / 2); 132 | } 133 | 134 | div.fragment { 135 | padding: calc((var(--spacing-large) - (var(--spacing-large) / 6)) / 2) 136 | calc(var(--spacing-large) / 2); 137 | } 138 | 139 | #titlearea { 140 | background-color: var(--side-nav-background); 141 | } 142 | 143 | html { 144 | --searchbar-background: var(--page-background-color); 145 | --code-font-size: 13px; 146 | } 147 | 148 | html, 149 | body, 150 | .top-enabled { 151 | position: relative; 152 | height: 100%; 153 | } 154 | 155 | #apis { 156 | position: relative; 157 | height: calc(100% - var(--top-height) + 29px); 158 | } 159 | } 160 | 161 | @media screen and (min-width: 768px) { 162 | html, 163 | body { 164 | overflow: auto; 165 | position: relative; 166 | height: 100%; 167 | } 168 | 169 | #apis { 170 | position: relative; 171 | height: calc(100% - var(--top-height) + var(--spacing-large)); 172 | } 173 | 174 | #top { 175 | position: -webkit-sticky; 176 | position: sticky; 177 | top: 0px; 178 | height: 100%; 179 | } 180 | 181 | #doc-content { 182 | position: relative; 183 | top: calc(var(--top-height) - 17px - 100%); 184 | padding-top: 0px; 185 | height: 100% !important; 186 | } 187 | 188 | #MSearchBox { 189 | width: calc(var(--side-nav-fixed-width) - calc(4 * var(--spacing-medium))); 190 | } 191 | } 192 | 193 | html, 194 | div.header .title { 195 | visibility: hidden; 196 | } 197 | 198 | hr, 199 | .permalink, 200 | .footer, 201 | #MSearchSelect, 202 | #MSearchBox .right { 203 | display: none; 204 | } 205 | -------------------------------------------------------------------------------- /bindings/native/rustfmt.toml: -------------------------------------------------------------------------------- 1 | ../../core/rustfmt.toml -------------------------------------------------------------------------------- /bindings/native/src/ptr.rs: -------------------------------------------------------------------------------- 1 | #[repr(C)] 2 | pub(crate) struct Element { 3 | pub(crate) string: *const T, 4 | pub(crate) size: usize, 5 | } 6 | 7 | pub(crate) struct NullTerminatedPointer { 8 | ptr: *const T, 9 | pub(crate) size: usize, 10 | } 11 | 12 | impl NullTerminatedPointer { 13 | pub(crate) const fn new(ptr: *const T) -> Self { 14 | Self { ptr, size: 0 } 15 | } 16 | } 17 | 18 | impl Iterator for NullTerminatedPointer 19 | where 20 | T: PartialEq + Default + Copy, 21 | { 22 | type Item = T; 23 | 24 | fn next(&mut self) -> Option { 25 | let value = unsafe { *self.ptr }; 26 | 27 | self.ptr = unsafe { self.ptr.offset(1) }; 28 | 29 | if value == Default::default() { 30 | None 31 | } else { 32 | self.size += 1; 33 | 34 | Some(value) 35 | } 36 | } 37 | } 38 | 39 | pub(crate) struct SizedPointer { 40 | ptr: *const T, 41 | size: usize, 42 | } 43 | 44 | impl SizedPointer { 45 | pub(crate) const fn new(ptr: *const T, size: usize) -> Self { 46 | Self { ptr, size } 47 | } 48 | } 49 | 50 | impl Iterator for SizedPointer 51 | where 52 | T: Copy, 53 | { 54 | type Item = T; 55 | 56 | fn next(&mut self) -> Option { 57 | if self.size == 0 { 58 | return None; 59 | } 60 | 61 | let value = unsafe { *self.ptr }; 62 | 63 | self.ptr = unsafe { self.ptr.offset(1) }; 64 | self.size -= 1; 65 | 66 | Some(value) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /bindings/native/src/utf16.rs: -------------------------------------------------------------------------------- 1 | use crate::ptr::{Element, NullTerminatedPointer, SizedPointer}; 2 | 3 | fn get_inner(iter: &mut impl Iterator) -> Option> { 4 | let mut output: Vec = Vec::new(); 5 | let mut next: Option = None; 6 | 7 | loop { 8 | let c = match next.take() { 9 | Some(res) => res, 10 | None => match iter.next() { 11 | Some(res) => res, 12 | None => return Some(output), 13 | }, 14 | }; 15 | 16 | if c <= 0x7f { 17 | output.push(c as _); 18 | } else if c <= 0x7ff { 19 | output.extend([((c >> 6) as u8) | 0xc0, ((c & 0x3f) as u8) | 0x80]); 20 | } else if !(0xd800..0xe000).contains(&c) { 21 | output.extend([ 22 | ((c >> 12) as u8) | 0xe0, 23 | (((c >> 6) & 0x3f) as u8) | 0x80, 24 | ((c & 0x3f) as u8) | 0x80, 25 | ]); 26 | } else { 27 | let n = iter.next()?; 28 | 29 | if (0xdc00..0xe000).contains(&n) { 30 | let c = 0x10000 + (((c - 0xd800) as u32) << 10) + ((n as u32) - 0xdc00); 31 | 32 | output.extend([ 33 | ((c >> 18) as u8) | 0xf0, 34 | (((c >> 12) & 0x3f) as u8) | 0x80, 35 | (((c >> 6) & 0x3f) as u8) | 0x80, 36 | ((c & 0x3f) as u8) | 0x80, 37 | ]); 38 | } else { 39 | next.replace(n); 40 | } 41 | } 42 | } 43 | } 44 | 45 | pub(crate) unsafe fn get(input_ptr: *const u16, input_size: usize) -> Option> { 46 | if input_size == 0 { 47 | let mut input_ptr = NullTerminatedPointer::new(input_ptr); 48 | 49 | get_inner(&mut input_ptr) 50 | } else { 51 | let mut input_ptr = SizedPointer::new(input_ptr, input_size); 52 | 53 | get_inner(&mut input_ptr) 54 | } 55 | } 56 | 57 | pub(crate) unsafe fn get_array( 58 | input_ptr: *const Element, 59 | input_length: usize, 60 | ) -> Option> { 61 | let mut output = Vec::with_capacity(input_length); 62 | 63 | for i in 0..input_length { 64 | output.push(unsafe { 65 | let s = input_ptr.add(i); 66 | 67 | String::from_utf8(get((*s).string, (*s).size)?).unwrap() 68 | }); 69 | } 70 | 71 | Some(output) 72 | } 73 | -------------------------------------------------------------------------------- /bindings/native/src/utf8.rs: -------------------------------------------------------------------------------- 1 | use crate::ptr::{Element, NullTerminatedPointer}; 2 | use std::{slice, str}; 3 | 4 | pub(crate) fn get(input_ptr: *const u8, mut input_size: usize) -> Option<&'static str> { 5 | if input_size == 0 { 6 | let mut input_ptr = NullTerminatedPointer::new(input_ptr); 7 | 8 | while let Some(value) = input_ptr.next() { 9 | if (0xA0..=0xBF).contains(&value) 10 | || value >= 0xF8 11 | || (value >= 0xC0 12 | && ((input_ptr.next()? >> 6) != 0x02 13 | || (value >= 0xE0 14 | && ((input_ptr.next()? >> 6) != 0x02 15 | || (value >= 0xF0 && (input_ptr.next()? >> 6) != 0x02))))) 16 | { 17 | return None; 18 | } 19 | } 20 | 21 | input_size = input_ptr.size; 22 | } 23 | 24 | str::from_utf8(unsafe { slice::from_raw_parts(input_ptr, input_size) }).ok() 25 | } 26 | 27 | pub(crate) unsafe fn get_array( 28 | input_ptr: *const Element, 29 | input_length: usize, 30 | ) -> Option> { 31 | let mut output = Vec::with_capacity(input_length); 32 | 33 | for i in 0..input_length { 34 | output.push(unsafe { 35 | let s = input_ptr.add(i); 36 | 37 | get((*s).string, (*s).size)? 38 | }); 39 | } 40 | 41 | Some(output) 42 | } 43 | -------------------------------------------------------------------------------- /bindings/native/tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8.2) 2 | 3 | project( 4 | decancer_native_test 5 | LANGUAGES C 6 | HOMEPAGE_URL "https://github.com/null8626/decancer" 7 | DESCRIPTION "Test suite for the native binding of the decancer library." 8 | ) 9 | 10 | set(CMAKE_BUILD_TYPE Debug) 11 | 12 | file(GLOB DECANCER_NATIVE_TEST_SOURCE_FILES *.c) 13 | 14 | add_executable(decancer_native_test ${DECANCER_NATIVE_TEST_SOURCE_FILES}) 15 | 16 | if(WIN32) 17 | find_file( 18 | DECANCER_DLL 19 | NAME "decancer.dll" 20 | HINTS "${CMAKE_SOURCE_DIR}/.." "${CMAKE_SOURCE_DIR}/../target/release" "${CMAKE_SOURCE_DIR}/../target/debug" 21 | REQUIRED 22 | ) 23 | endif() 24 | 25 | find_library( 26 | DECANCER_LIBRARY 27 | NAMES "decancer.dll.lib" "libdecancer.dylib" "libdecancer.so" 28 | HINTS "${CMAKE_SOURCE_DIR}/.." "${CMAKE_SOURCE_DIR}/../target/release" "${CMAKE_SOURCE_DIR}/../target/debug" 29 | REQUIRED 30 | ) 31 | 32 | target_include_directories(decancer_native_test PUBLIC "${CMAKE_SOURCE_DIR}/..") 33 | target_link_libraries(decancer_native_test ${DECANCER_LIBRARY}) 34 | 35 | if(WIN32) 36 | add_custom_command( 37 | TARGET decancer_native_test POST_BUILD 38 | COMMAND ${CMAKE_COMMAND} -E copy ${DECANCER_DLL} $ 39 | COMMAND_EXPAND_LISTS 40 | ) 41 | endif() 42 | 43 | add_custom_command( 44 | TARGET decancer_native_test POST_BUILD 45 | COMMAND decancer_native_test 46 | ) -------------------------------------------------------------------------------- /bindings/native/tests/test.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdirSync, readFileSync, writeFileSync, rmSync } from 'node:fs' 6 | import { dirname, join, sep } from 'node:path' 7 | import { execSync } from 'node:child_process' 8 | import { fileURLToPath } from 'node:url' 9 | import { rm } from 'node:fs/promises' 10 | 11 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 12 | const TESTS_DIR = join(ROOT_DIR, 'tests') 13 | 14 | rmSync(join(TESTS_DIR, 'build'), { 15 | recursive: true, 16 | force: true 17 | }) 18 | 19 | await Promise.all( 20 | readdirSync(TESTS_DIR) 21 | .filter(f => f.endsWith('.c')) 22 | .map(f => 23 | rm(join(TESTS_DIR, f), { 24 | force: true 25 | }) 26 | ) 27 | ) 28 | 29 | const functions = [] 30 | let status = 0 31 | let example = [] 32 | 33 | for (const line of readFileSync(join(ROOT_DIR, 'decancer.h')) 34 | .toString() 35 | .trim() 36 | .split(/\r?\n/g) 37 | .map(x => x.replace(/^\s*\* ?/, ''))) { 38 | switch (status) { 39 | case 0: { 40 | if (line.startsWith('```c')) { 41 | status = 1 42 | } 43 | 44 | break 45 | } 46 | 47 | case 1: { 48 | if (line.startsWith('```')) { 49 | status = 2 50 | } else { 51 | example.push(line) 52 | } 53 | 54 | break 55 | } 56 | 57 | case 2: { 58 | if (line.startsWith('/')) { 59 | status = 3 60 | } 61 | 62 | break 63 | } 64 | 65 | default: { 66 | try { 67 | const functionName = line.match(/(decancer_\w+)\(/)[1] 68 | const exampleCode = example 69 | .join('\n') 70 | .replace('int main(', `int ${functionName}_test(`) 71 | functions.push(functionName) 72 | 73 | writeFileSync(join(TESTS_DIR, `${functionName}_test.c`), exampleCode) 74 | } catch {} 75 | 76 | example = [] 77 | status = 0 78 | } 79 | } 80 | } 81 | 82 | let testFile = ` 83 | #include 84 | 85 | #ifdef _WIN32 86 | #pragma comment(lib, "WS2_32") 87 | #pragma comment(lib, "Userenv") 88 | #pragma comment(lib, "ntdll") 89 | #endif 90 | 91 | ${functions.map(f => `int ${f}_test(void);`).join('\n')} 92 | 93 | int main(void) { 94 | ` 95 | 96 | for (const func of functions) { 97 | testFile += ` 98 | printf("testing ${func}...\\n"); 99 | if (${func}_test()) { 100 | fprintf(stderr, "error: tests for ${func} failed.\\n"); 101 | return 1; 102 | } 103 | ` 104 | } 105 | 106 | testFile += '\n return 0;\n}' 107 | 108 | writeFileSync(join(TESTS_DIR, 'test.c'), testFile) 109 | 110 | try { 111 | execSync('cmake -B build .', { 112 | cwd: TESTS_DIR, 113 | stdio: 'inherit' 114 | }) 115 | 116 | execSync('cmake --build build --config Debug', { 117 | cwd: TESTS_DIR, 118 | stdio: 'inherit' 119 | }) 120 | } catch { 121 | process.exit(1) 122 | } 123 | -------------------------------------------------------------------------------- /bindings/node/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.aarch64-unknown-linux-gnu] 2 | linker = "aarch64-linux-gnu-gcc" 3 | [target.aarch64-unknown-linux-musl] 4 | linker = "aarch64-linux-musl-gcc" 5 | rustflags = ["-C", "target-feature=-crt-static"] 6 | [target.armv7-unknown-linux-gnueabihf] 7 | linker = "arm-linux-gnueabihf-gcc" 8 | -------------------------------------------------------------------------------- /bindings/node/.npmignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | .cargo 4 | npm 5 | rustfmt.toml 6 | test.cjs 7 | **/*.node 8 | **/*.rs 9 | Cargo.toml 10 | -------------------------------------------------------------------------------- /bindings/node/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decancer_node" 3 | version = "3.3.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | crate-type = ["cdylib"] 9 | 10 | [dependencies] 11 | napi = { version = "2", default-features = false } 12 | napi-derive = { version = "2", default-features = false } 13 | decancer = { path = "../../core" } 14 | 15 | [build-dependencies] 16 | napi-build = "2" 17 | 18 | [profile.release] 19 | lto = true -------------------------------------------------------------------------------- /bindings/node/README.md: -------------------------------------------------------------------------------- 1 | 3 | 4 | # decancer [![npm][npm-image]][npm-url] [![crates.io][crates-io-image]][crates-io-url] [![npm downloads][npm-downloads-image]][npm-url] [![crates.io downloads][crates-io-downloads-image]][crates-io-url] [![codacy][codacy-image]][codacy-url] [![ko-fi][ko-fi-brief-image]][ko-fi-url] 5 | 6 | [crates-io-image]: https://img.shields.io/crates/v/decancer?style=flat-square 7 | [crates-io-downloads-image]: https://img.shields.io/crates/d/decancer?style=flat-square 8 | [crates-io-url]: https://crates.io/crates/decancer 9 | [npm-image]: https://img.shields.io/npm/v/decancer.svg?style=flat-square 10 | [npm-url]: https://npmjs.org/package/decancer 11 | [npm-downloads-image]: https://img.shields.io/npm/dt/decancer.svg?style=flat-square 12 | [codacy-image]: https://app.codacy.com/project/badge/Grade/d740b1aa867d42f2b37eb992ad73784a 13 | [codacy-url]: https://app.codacy.com/gh/null8626/decancer/dashboard 14 | [ko-fi-brief-image]: https://img.shields.io/badge/donations-ko--fi-red?color=ff5e5b&style=flat-square 15 | [ko-fi-image]: https://ko-fi.com/img/githubbutton_sm.svg 16 | [ko-fi-url]: https://ko-fi.com/null8626 17 | 18 | A library that removes common unicode confusables/homoglyphs from strings. 19 | 20 | - Its core is written in [Rust](https://www.rust-lang.org) and utilizes a form of [**Binary Search**](https://en.wikipedia.org/wiki/Binary_search_algorithm) to ensure speed! 21 | - By default, it's capable of filtering **221,529 (19.88%) different unicode codepoints** like: 22 | - All [whitespace characters](https://en.wikipedia.org/wiki/Whitespace_character) 23 | - All [diacritics](https://en.wikipedia.org/wiki/Diacritic), this also eliminates all forms of [Zalgo text](https://en.wikipedia.org/wiki/Zalgo_text) 24 | - Most [leetspeak characters](https://en.wikipedia.org/wiki/Leet) 25 | - Most [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) 26 | - Several emojis 27 | - Unlike other packages, this package is **[unicode bidi-aware](https://en.wikipedia.org/wiki/Bidirectional_text)** where it also interprets right-to-left characters in the same way as it were to be rendered by an application! 28 | - Its behavior is also highly customizable to your liking! 29 | 30 | ## Installation 31 | In your shell: 32 | 33 | ```sh 34 | npm install decancer 35 | ``` 36 | 37 | In your code (CommonJS): 38 | 39 | ```js 40 | const decancer = require('decancer') 41 | ``` 42 | 43 | In your code (ESM): 44 | 45 | ```js 46 | import decancer from 'decancer' 47 | ``` 48 | ## Examples 49 | ```js 50 | const assert = require('assert') 51 | const cured = decancer('vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣 wWiIiIIttHh l133t5p3/-\\|<') 52 | 53 | assert(cured.equals('very funny text with leetspeak')) 54 | 55 | // WARNING: it's NOT recommended to coerce this output to a JavaScript string 56 | // and process it manually from there, as decancer has its own 57 | // custom comparison measures, including leetspeak matching! 58 | assert(cured.toString() !== 'very funny text with leetspeak') 59 | console.log(cured.toString()) 60 | // => very funny text wwiiiiitthh l133t5p3/-\|< 61 | 62 | assert(cured.contains('funny')) 63 | 64 | cured.censor('funny', '*') 65 | console.log(cured.toString()) 66 | // => very ***** text wwiiiiitthh l133t5p3/-\|< 67 | 68 | cured.censorMultiple(['very', 'text'], '-') 69 | console.log(cured.toString()) 70 | // => ---- ***** ---- wwiiiiitthh l133t5p3/-\|< 71 | ``` 72 | ## Donations 73 | 74 | If you want to support my eyes for manually looking at thousands of unicode characters, consider donating! ❤ 75 | 76 | [![ko-fi][ko-fi-image]][ko-fi-url] 77 | -------------------------------------------------------------------------------- /bindings/node/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | napi_build::setup(); 3 | } 4 | -------------------------------------------------------------------------------- /bindings/node/npm/android-arm-eabi/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-android-arm-eabi` 2 | 3 | This is the **armv7-linux-androideabi** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/android-arm-eabi/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-android-arm-eabi", 3 | "version": "3.3.0", 4 | "os": [ 5 | "android" 6 | ], 7 | "cpu": [ 8 | "arm" 9 | ], 10 | "main": "decancer.android-arm-eabi.node", 11 | "files": [ 12 | "decancer.android-arm-eabi.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for android-arm-eabi)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/android-arm64/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-android-arm64` 2 | 3 | This is the **aarch64-linux-android** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/android-arm64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-android-arm64", 3 | "version": "3.3.0", 4 | "os": [ 5 | "android" 6 | ], 7 | "cpu": [ 8 | "arm64" 9 | ], 10 | "main": "decancer.android-arm64.node", 11 | "files": [ 12 | "decancer.android-arm64.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for android-arm64)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/darwin-arm64/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-darwin-arm64` 2 | 3 | This is the **aarch64-apple-darwin** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/darwin-arm64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-darwin-arm64", 3 | "version": "3.3.0", 4 | "os": [ 5 | "darwin" 6 | ], 7 | "cpu": [ 8 | "arm64" 9 | ], 10 | "main": "decancer.darwin-arm64.node", 11 | "files": [ 12 | "decancer.darwin-arm64.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for darwin-arm64)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/darwin-x64/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-darwin-x64` 2 | 3 | This is the **x86_64-apple-darwin** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/darwin-x64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-darwin-x64", 3 | "version": "3.3.0", 4 | "os": [ 5 | "darwin" 6 | ], 7 | "cpu": [ 8 | "x64" 9 | ], 10 | "main": "decancer.darwin-x64.node", 11 | "files": [ 12 | "decancer.darwin-x64.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for darwin-x64)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/freebsd-x64/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-freebsd-x64` 2 | 3 | This is the **x86_64-unknown-freebsd** binary for `decancer` -------------------------------------------------------------------------------- /bindings/node/npm/freebsd-x64/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-freebsd-x64", 3 | "version": "3.3.0", 4 | "os": [ 5 | "freebsd" 6 | ], 7 | "cpu": [ 8 | "x64" 9 | ], 10 | "main": "decancer.freebsd-x64.node", 11 | "files": [ 12 | "decancer.freebsd-x64.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for freebsd-x64)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm-gnueabihf/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-linux-arm-gnueabihf` 2 | 3 | This is the **armv7-unknown-linux-gnueabihf** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm-gnueabihf/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-linux-arm-gnueabihf", 3 | "version": "3.3.0", 4 | "os": [ 5 | "linux" 6 | ], 7 | "cpu": [ 8 | "arm" 9 | ], 10 | "main": "decancer.linux-arm-gnueabihf.node", 11 | "files": [ 12 | "decancer.linux-arm-gnueabihf.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for linux-arm-gnueabihf)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm64-gnu/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-linux-arm64-gnu` 2 | 3 | This is the **aarch64-unknown-linux-gnu** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm64-gnu/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-linux-arm64-gnu", 3 | "version": "3.3.0", 4 | "os": [ 5 | "linux" 6 | ], 7 | "cpu": [ 8 | "arm64" 9 | ], 10 | "main": "decancer.linux-arm64-gnu.node", 11 | "files": [ 12 | "decancer.linux-arm64-gnu.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "libc": [ 19 | "glibc" 20 | ], 21 | "author": "null8626", 22 | "homepage": "https://github.com/null8626/decancer#readme", 23 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for linux-arm64-gnu)", 24 | "bugs": { 25 | "url": "https://github.com/null8626/decancer/issues" 26 | }, 27 | "keywords": [ 28 | "security", 29 | "unicode", 30 | "homoglyphs", 31 | "string", 32 | "moderation", 33 | "sanitizer", 34 | "confusables" 35 | ], 36 | "funding": [ 37 | "https://github.com/sponsors/null8626", 38 | "https://ko-fi.com/null8626" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm64-musl/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-linux-arm64-musl` 2 | 3 | This is the **aarch64-unknown-linux-musl** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-arm64-musl/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-linux-arm64-musl", 3 | "version": "3.3.0", 4 | "os": [ 5 | "linux" 6 | ], 7 | "cpu": [ 8 | "arm64" 9 | ], 10 | "main": "decancer.linux-arm64-musl.node", 11 | "files": [ 12 | "decancer.linux-arm64-musl.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "libc": [ 19 | "musl" 20 | ], 21 | "author": "null8626", 22 | "homepage": "https://github.com/null8626/decancer#readme", 23 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for linux-arm64-musl)", 24 | "bugs": { 25 | "url": "https://github.com/null8626/decancer/issues" 26 | }, 27 | "keywords": [ 28 | "security", 29 | "unicode", 30 | "homoglyphs", 31 | "string", 32 | "moderation", 33 | "sanitizer", 34 | "confusables" 35 | ], 36 | "funding": [ 37 | "https://github.com/sponsors/null8626", 38 | "https://ko-fi.com/null8626" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-x64-gnu/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-linux-x64-gnu` 2 | 3 | This is the **x86_64-unknown-linux-gnu** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-x64-gnu/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-linux-x64-gnu", 3 | "version": "3.3.0", 4 | "os": [ 5 | "linux" 6 | ], 7 | "cpu": [ 8 | "x64" 9 | ], 10 | "main": "decancer.linux-x64-gnu.node", 11 | "files": [ 12 | "decancer.linux-x64-gnu.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "libc": [ 19 | "glibc" 20 | ], 21 | "author": "null8626", 22 | "homepage": "https://github.com/null8626/decancer#readme", 23 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for linux-x64-gnu)", 24 | "bugs": { 25 | "url": "https://github.com/null8626/decancer/issues" 26 | }, 27 | "keywords": [ 28 | "security", 29 | "unicode", 30 | "homoglyphs", 31 | "string", 32 | "moderation", 33 | "sanitizer", 34 | "confusables" 35 | ], 36 | "funding": [ 37 | "https://github.com/sponsors/null8626", 38 | "https://ko-fi.com/null8626" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-x64-musl/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-linux-x64-musl` 2 | 3 | This is the **x86_64-unknown-linux-musl** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/linux-x64-musl/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-linux-x64-musl", 3 | "version": "3.3.0", 4 | "os": [ 5 | "linux" 6 | ], 7 | "cpu": [ 8 | "x64" 9 | ], 10 | "main": "decancer.linux-x64-musl.node", 11 | "files": [ 12 | "decancer.linux-x64-musl.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "libc": [ 19 | "musl" 20 | ], 21 | "author": "null8626", 22 | "homepage": "https://github.com/null8626/decancer#readme", 23 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for linux-x64-musl)", 24 | "bugs": { 25 | "url": "https://github.com/null8626/decancer/issues" 26 | }, 27 | "keywords": [ 28 | "security", 29 | "unicode", 30 | "homoglyphs", 31 | "string", 32 | "moderation", 33 | "sanitizer", 34 | "confusables" 35 | ], 36 | "funding": [ 37 | "https://github.com/sponsors/null8626", 38 | "https://ko-fi.com/null8626" 39 | ] 40 | } 41 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-arm64-msvc/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-win32-arm64-msvc` 2 | 3 | This is the **aarch64-pc-windows-msvc** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-arm64-msvc/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-win32-arm64-msvc", 3 | "version": "3.3.0", 4 | "os": [ 5 | "win32" 6 | ], 7 | "cpu": [ 8 | "arm64" 9 | ], 10 | "main": "decancer.win32-arm64-msvc.node", 11 | "files": [ 12 | "decancer.win32-arm64-msvc.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for win32-arm64-msvc)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-ia32-msvc/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-win32-ia32-msvc` 2 | 3 | This is the **i686-pc-windows-msvc** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-ia32-msvc/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-win32-ia32-msvc", 3 | "version": "3.3.0", 4 | "os": [ 5 | "win32" 6 | ], 7 | "cpu": [ 8 | "ia32" 9 | ], 10 | "main": "decancer.win32-ia32-msvc.node", 11 | "files": [ 12 | "decancer.win32-ia32-msvc.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for win32-ia32-msvc)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-x64-msvc/README.md: -------------------------------------------------------------------------------- 1 | # `decancer-win32-x64-msvc` 2 | 3 | This is the **x86_64-pc-windows-msvc** binary for `decancer` 4 | -------------------------------------------------------------------------------- /bindings/node/npm/win32-x64-msvc/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@vierofernando/decancer-win32-x64-msvc", 3 | "version": "3.3.0", 4 | "os": [ 5 | "win32" 6 | ], 7 | "cpu": [ 8 | "x64" 9 | ], 10 | "main": "decancer.win32-x64-msvc.node", 11 | "files": [ 12 | "decancer.win32-x64-msvc.node" 13 | ], 14 | "license": "MIT", 15 | "engines": { 16 | "node": ">= 10" 17 | }, 18 | "author": "null8626", 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "description": "A library that removes common unicode confusables/homoglyphs from strings. (Binary port for win32-x64-msvc)", 21 | "bugs": { 22 | "url": "https://github.com/null8626/decancer/issues" 23 | }, 24 | "keywords": [ 25 | "security", 26 | "unicode", 27 | "homoglyphs", 28 | "string", 29 | "moderation", 30 | "sanitizer", 31 | "confusables" 32 | ], 33 | "funding": [ 34 | "https://github.com/sponsors/null8626", 35 | "https://ko-fi.com/null8626" 36 | ] 37 | } 38 | -------------------------------------------------------------------------------- /bindings/node/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "decancer", 3 | "version": "3.3.0", 4 | "description": "A library that removes common unicode confusables/homoglyphs from strings.", 5 | "author": "null8626", 6 | "keywords": [ 7 | "security", 8 | "unicode", 9 | "homoglyphs", 10 | "string", 11 | "moderation", 12 | "sanitizer", 13 | "confusables" 14 | ], 15 | "repository": { 16 | "type": "git", 17 | "url": "git+https://github.com/null8626/decancer" 18 | }, 19 | "homepage": "https://github.com/null8626/decancer#readme", 20 | "bugs": { 21 | "url": "https://github.com/null8626/decancer/issues" 22 | }, 23 | "license": "MIT", 24 | "main": "src/lib.js", 25 | "typings": "typings.d.ts", 26 | "devDependencies": { 27 | "@napi-rs/cli": "^2.18.0" 28 | }, 29 | "engines": { 30 | "node": ">= 10" 31 | }, 32 | "scripts": { 33 | "artifacts": "napi artifacts", 34 | "build": "napi build --no-dts-header --platform --release", 35 | "prepublishOnly": "napi prepublish -t npm --skip-gh-release", 36 | "test": "node test.cjs" 37 | }, 38 | "napi": { 39 | "name": "decancer", 40 | "package": { 41 | "name": "@vierofernando/decancer" 42 | }, 43 | "triples": { 44 | "additional": [ 45 | "aarch64-apple-darwin", 46 | "aarch64-linux-android", 47 | "aarch64-unknown-linux-gnu", 48 | "aarch64-unknown-linux-musl", 49 | "aarch64-pc-windows-msvc", 50 | "armv7-unknown-linux-gnueabihf", 51 | "x86_64-unknown-linux-musl", 52 | "i686-pc-windows-msvc", 53 | "armv7-linux-androideabi", 54 | "x86_64-unknown-freebsd" 55 | ] 56 | } 57 | }, 58 | "funding": [ 59 | "https://github.com/sponsors/null8626", 60 | "https://ko-fi.com/null8626" 61 | ] 62 | } 63 | -------------------------------------------------------------------------------- /bindings/node/rustfmt.toml: -------------------------------------------------------------------------------- 1 | ../../core/rustfmt.toml -------------------------------------------------------------------------------- /bindings/node/src/lib.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | const { existsSync, readFileSync } = require('fs') 6 | const assert = require('assert') 7 | const { join } = require('path') 8 | 9 | const PLATFORMS = { 10 | win32: { 11 | x64: 'win32-x64-msvc', 12 | ia32: 'win32-ia32-msvc', 13 | arm64: 'win32-arm64-msvc' 14 | }, 15 | darwin: { x64: 'darwin-x64', arm64: 'darwin-arm64' }, 16 | linux: { 17 | x64: { name: 'linux-x64', musl: true }, 18 | arm64: { name: 'linux-arm64', musl: true }, 19 | arm: 'linux-arm-gnueabihf' 20 | }, 21 | android: { 22 | arm64: 'android-arm64', 23 | arm: 'android-arm-eabi' 24 | }, 25 | freebsd: { 26 | x64: 'freebsd-x64' 27 | } 28 | } 29 | 30 | function isMusl() { 31 | if ( 32 | process.report == undefined || 33 | typeof process.report.getReport !== 'function' 34 | ) { 35 | try { 36 | return readFileSync('/usr/bin/ldd', 'utf8').includes('musl') 37 | } catch { 38 | return true 39 | } 40 | } else { 41 | const { glibcVersionRuntime } = process.report.getReport().header 42 | 43 | return !glibcVersionRuntime 44 | } 45 | } 46 | 47 | function getBinding(name) { 48 | const path = join(__dirname, '..', `decancer.${name}.node`) 49 | 50 | return require(existsSync(path) ? path : `@vierofernando/decancer-${name}`) 51 | } 52 | 53 | let binding 54 | 55 | try { 56 | const data = PLATFORMS[process.platform][process.arch] 57 | 58 | assert( 59 | data != null, 60 | `This platform (${process.platform} on a ${process.arch}) is not supported.` 61 | ) 62 | 63 | binding = getBinding( 64 | typeof data === 'string' 65 | ? data 66 | : `${data.name}-${data.musl && isMusl() ? 'musl' : 'gnu'}` 67 | ) 68 | 69 | module.exports = Object.assign(binding.cure, { 70 | options: binding.options 71 | }) 72 | } catch (err) { 73 | console.error( 74 | `Error: cannot load module. OS: ${process.platform} Arch: ${process.arch} may not be supported.` 75 | ) 76 | 77 | throw err 78 | } 79 | -------------------------------------------------------------------------------- /bindings/node/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![forbid(unsafe_code)] 2 | 3 | #[macro_use] 4 | extern crate napi_derive; 5 | 6 | use napi::{ 7 | bindgen_prelude::{Error, FromNapiValue}, 8 | Env, JsNumber, JsString, JsUnknown, Result, Status, ValueType, 9 | }; 10 | use std::ops::Range; 11 | 12 | macro_rules! options { 13 | ( 14 | keys { 15 | $($key_idx:literal: $key_name:ident,)* 16 | } 17 | 18 | overrides { 19 | $($override_name:ident: $override_value:expr,)* 20 | } 21 | ) => { 22 | #[napi(object)] 23 | #[derive(Default)] 24 | pub struct Options { 25 | $(pub $key_name: Option,)* 26 | $(pub $override_name: Option,)* 27 | } 28 | 29 | impl From for u32 { 30 | fn from(value: Options) -> u32 { 31 | let mut options = 0; 32 | 33 | $(if value.$key_name.unwrap_or_default() { 34 | options |= (1 << $key_idx); 35 | })* 36 | 37 | $(if value.$override_name.unwrap_or_default() { 38 | options = $override_value; 39 | })* 40 | 41 | options 42 | } 43 | } 44 | }; 45 | } 46 | 47 | options! { 48 | keys { 49 | 0: retain_capitalization, 50 | 1: disable_bidi, 51 | 2: retain_diacritics, 52 | 3: retain_greek, 53 | 4: retain_cyrillic, 54 | 5: retain_hebrew, 55 | 6: retain_arabic, 56 | 7: retain_devanagari, 57 | 8: retain_bengali, 58 | 9: retain_armenian, 59 | 10: retain_gujarati, 60 | 11: retain_tamil, 61 | 12: retain_thai, 62 | 13: retain_lao, 63 | 14: retain_burmese, 64 | 15: retain_khmer, 65 | 16: retain_mongolian, 66 | 17: retain_chinese, 67 | 18: retain_japanese, 68 | 19: retain_korean, 69 | 20: retain_braille, 70 | 21: retain_emojis, 71 | 22: retain_turkish, 72 | 23: ascii_only, 73 | 24: alphanumeric_only, 74 | } 75 | 76 | overrides { 77 | all: 0x1ffffff, 78 | pure_homoglyph: 0x3ffffc, 79 | } 80 | } 81 | 82 | #[napi] 83 | pub struct Match { 84 | range: Range, 85 | portion: String, 86 | } 87 | 88 | #[napi] 89 | impl Match { 90 | #[napi(getter)] 91 | pub fn start(&self, env: Env) -> Result { 92 | env.create_int64(self.range.start as _) 93 | } 94 | 95 | #[napi(getter)] 96 | pub fn end(&self, env: Env) -> Result { 97 | env.create_int64(self.range.end as _) 98 | } 99 | 100 | #[napi] 101 | pub fn to_string(&self, env: Env) -> Result { 102 | env.create_string(&self.portion) 103 | } 104 | } 105 | 106 | #[napi] 107 | pub struct CuredString(decancer::CuredString); 108 | 109 | #[napi] 110 | impl CuredString { 111 | #[inline(always)] 112 | fn new_match(&self, mat: Range) -> Match { 113 | Match { 114 | range: mat.clone(), 115 | portion: String::from(&self.0[mat]), 116 | } 117 | } 118 | 119 | #[napi] 120 | pub fn find(&self, other: String) -> Vec { 121 | self.0.find(&other).map(|mat| self.new_match(mat)).collect() 122 | } 123 | 124 | #[napi] 125 | pub fn find_multiple(&self, other: Vec) -> Vec { 126 | self 127 | .0 128 | .find_multiple(other) 129 | .into_iter() 130 | .map(|mat| self.new_match(mat)) 131 | .collect() 132 | } 133 | 134 | #[napi] 135 | pub fn censor(&mut self, other: String, with: String) -> Result<()> { 136 | match with.chars().next() { 137 | Some(with_char) => { 138 | self.0.censor(&other, with_char); 139 | 140 | Ok(()) 141 | }, 142 | 143 | None => Err(Error::new( 144 | Status::InvalidArg, 145 | "Replacement string is empty.", 146 | )), 147 | } 148 | } 149 | 150 | #[napi] 151 | pub fn censor_multiple(&mut self, other: Vec, with: String) -> Result<()> { 152 | match with.chars().next() { 153 | Some(with_char) => { 154 | self.0.censor_multiple(&other, with_char); 155 | 156 | Ok(()) 157 | }, 158 | 159 | None => Err(Error::new( 160 | Status::InvalidArg, 161 | "Replacement string is empty.", 162 | )), 163 | } 164 | } 165 | 166 | #[napi] 167 | pub fn replace(&mut self, other: String, with: String) { 168 | self.0.replace(&other, &with); 169 | } 170 | 171 | #[napi] 172 | pub fn replace_multiple(&mut self, other: Vec, with: String) { 173 | self.0.replace_multiple(&other, &with); 174 | } 175 | 176 | #[napi] 177 | pub fn starts_with(&self, other: String) -> bool { 178 | self.0.starts_with(&other) 179 | } 180 | 181 | #[napi] 182 | pub fn ends_with(&self, other: String) -> bool { 183 | self.0.ends_with(&other) 184 | } 185 | 186 | #[napi] 187 | pub fn contains(&self, other: String) -> bool { 188 | self.0.contains(&other) 189 | } 190 | 191 | #[napi] 192 | pub fn equals(&self, other: String) -> bool { 193 | self.0 == other 194 | } 195 | 196 | #[napi] 197 | pub fn to_string(&self, env: Env) -> Result { 198 | env.create_string(&self.0) 199 | } 200 | } 201 | 202 | #[napi] 203 | fn options(options: Option) -> u32 { 204 | options.unwrap_or_default().into() 205 | } 206 | 207 | #[napi] 208 | fn cure(input: String, maybe_options: JsUnknown) -> Result { 209 | let options = if maybe_options.get_type()? == ValueType::Number { 210 | maybe_options 211 | .coerce_to_number() 212 | .and_then(|idx| idx.get_uint32()) 213 | } else { 214 | as FromNapiValue>::from_unknown(maybe_options).map(options) 215 | }?; 216 | 217 | match decancer::cure(&input, options.into()) { 218 | Ok(output) => Ok(CuredString(output)), 219 | Err(err) => Err(Error::new(Status::InvalidArg, err)), 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /bindings/node/test.cjs: -------------------------------------------------------------------------------- 1 | const { strictEqual } = require('node:assert') 2 | const { describe, it } = require('node:test') 3 | 4 | class TestContext { 5 | #inner 6 | 7 | constructor(result) { 8 | this.#inner = result 9 | } 10 | 11 | test(functionName, expected, ...args) { 12 | it(functionName, () => 13 | strictEqual(this.#inner[functionName](...args), expected) 14 | ) 15 | 16 | return this 17 | } 18 | 19 | testFind() { 20 | it('find', () => { 21 | const match = this.#inner.find('funny') 22 | 23 | strictEqual(match.length, 1) 24 | strictEqual(match[0].start, 5) 25 | strictEqual(match[0].end, 10) 26 | strictEqual(match[0].toString(), 'funny') 27 | }) 28 | 29 | return this 30 | } 31 | } 32 | 33 | describe('decancer', () => { 34 | const decancer = require('./src/lib.js') 35 | 36 | new TestContext(decancer('vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣')) 37 | .test('equals', true, 'very funny text') 38 | .test('startsWith', true, 'very') 39 | .test('endsWith', true, 'text') 40 | .test('contains', true, 'funny') 41 | .test('toString', 'very funny text') 42 | .testFind() 43 | }) 44 | -------------------------------------------------------------------------------- /bindings/node/typings.d.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | export class Match { 4 | readonly start: number 5 | readonly end: number 6 | toString(): string 7 | } 8 | 9 | export class CuredString { 10 | find(other: string): Match[] 11 | findMultiple(other: string[]): Match[] 12 | censor(other: string, character: string): void 13 | censorMultiple(other: string[], character: string): void 14 | replace(other: string, withWhat: string): void 15 | replaceMultiple(other: string[], withWhat: string): void 16 | startsWith(other: string): boolean 17 | endsWith(other: string): boolean 18 | contains(other: string): boolean 19 | equals(other: string): boolean 20 | toString(): string 21 | } 22 | 23 | export interface Options { 24 | all?: boolean 25 | retainCapitalization?: boolean 26 | disableBidi?: boolean 27 | retainDiacritics?: boolean 28 | retainGreek?: boolean 29 | retainCyrillic?: boolean 30 | retainHebrew?: boolean 31 | retainArabic?: boolean 32 | retainDevanagari?: boolean 33 | retainBengali?: boolean 34 | retainArmenian?: boolean 35 | retainGujarati?: boolean 36 | retainTamil?: boolean 37 | retainThai?: boolean 38 | retainLao?: boolean 39 | retainBurmese?: boolean 40 | retainKhmer?: boolean 41 | retainMongolian?: boolean 42 | retainChinese?: boolean 43 | retainJapanese?: boolean 44 | retainKorean?: boolean 45 | retainBraille?: boolean 46 | retainEmojis?: boolean 47 | retainTurkish?: boolean 48 | pureHomoglyph?: boolean 49 | asciiOnly?: boolean 50 | alphanumericOnly?: boolean 51 | } 52 | 53 | export default function (input: string, options?: Options | number): CuredString 54 | 55 | export function options(options?: Options): number 56 | -------------------------------------------------------------------------------- /bindings/wasm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decancer_wasm" 3 | version = "3.3.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [lib] 8 | crate-type = ["cdylib"] 9 | 10 | [dependencies] 11 | decancer = { path = "../../core" } 12 | wasm-bindgen = { version = "0.2.9", default-features = false, features = ["std"] } 13 | console_error_panic_hook = "0.1.7" -------------------------------------------------------------------------------- /bindings/wasm/bin/decancer.min.js: -------------------------------------------------------------------------------- 1 | let wasm,output;const cachedTextDecoder="undefined"!=typeof TextDecoder?new TextDecoder("utf-8",{ignoreBOM:!0,fatal:!0}):{decode:()=>{throw Error("TextDecoder not available")}};"undefined"!=typeof TextDecoder&&cachedTextDecoder.decode();let cachedUint8ArrayMemory0=null;function getUint8ArrayMemory0(){return null!==cachedUint8ArrayMemory0&&0!==cachedUint8ArrayMemory0.byteLength||(cachedUint8ArrayMemory0=new Uint8Array(wasm.memory.buffer)),cachedUint8ArrayMemory0}function getStringFromWasm0(e,t){return e>>>=0,cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(e,e+t))}let WASM_VECTOR_LEN=0;const cachedTextEncoder="undefined"!=typeof TextEncoder?new TextEncoder("utf-8"):{encode:()=>{throw Error("TextEncoder not available")}},encodeString="function"==typeof cachedTextEncoder.encodeInto?function(e,t){return cachedTextEncoder.encodeInto(e,t)}:function(e,t){const n=cachedTextEncoder.encode(e);return t.set(n),{read:e.length,written:n.length}};function passStringToWasm0(e,t,n){if(void 0===n){const n=cachedTextEncoder.encode(e),r=t(n.length,1)>>>0;return getUint8ArrayMemory0().subarray(r,r+n.length).set(n),WASM_VECTOR_LEN=n.length,r}let r=e.length,a=t(r,1)>>>0;const i=getUint8ArrayMemory0();let _=0;for(;_127)break;i[a+_]=t}if(_!==r){0!==_&&(e=e.slice(_)),a=n(a,r,r=_+3*e.length,1)>>>0;const t=getUint8ArrayMemory0().subarray(a+_,a+r);_+=encodeString(e,t).written,a=n(a,r,_,1)>>>0}return WASM_VECTOR_LEN=_,a}let cachedDataViewMemory0=null;function getDataViewMemory0(){return(null===cachedDataViewMemory0||!0===cachedDataViewMemory0.buffer.detached||void 0===cachedDataViewMemory0.buffer.detached&&cachedDataViewMemory0.buffer!==wasm.memory.buffer)&&(cachedDataViewMemory0=new DataView(wasm.memory.buffer)),cachedDataViewMemory0}function getArrayJsValueFromWasm0(e,t){e>>>=0;const n=getDataViewMemory0(),r=[];for(let a=e;a>>0,r=getDataViewMemory0();for(let t=0;t=1114112||e>=55296&&e<57344))throw new Error(`expected a valid Unicode scalar value, found ${e}`)}function takeFromExternrefTable0(e){const t=wasm.__wbindgen_export_3.get(e);return wasm.__externref_table_dealloc(e),t}const OPTIONS={k:["retainCapitalization","disableBidi","retainDiacritics","retainGreek","retainCyrillic","retainHebrew","retainArabic","retainDevanagari","retainBengali","retainArmenian","retainGujarati","retainTamil","retainThai","retainLao","retainBurmese","retainKhmer","retainMongolian","retainChinese","retainJapanese","retainKorean","retainBraille","retainEmojis","retainTurkish","asciiOnly","alphanumericOnly"],o:{all:33554431,pureHomoglyph:4194300}};function options(e){e||(e={});let t=0;for(let n=0;n{},unregister:()=>{}}:new FinalizationRegistry((e=>wasm.__wbg_curedstring_free(e>>>0,1)));export class CuredString{static __wrap(e){e>>>=0;const t=Object.create(CuredString.prototype);return t.__wbg_ptr=e,CuredStringFinalization.register(t,t.__wbg_ptr,t),t}__destroy_into_raw(){const e=this.__wbg_ptr;return this.__wbg_ptr=0,CuredStringFinalization.unregister(this),e}free(){const e=this.__destroy_into_raw();wasm.__wbg_curedstring_free(e,0)}find(e){const t=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),n=WASM_VECTOR_LEN,r=wasm.curedstring_find(this.__wbg_ptr,t,n);var a=getArrayJsValueFromWasm0(r[0],r[1]).slice();return wasm.__wbindgen_free(r[0],4*r[1],4),a}findMultiple(e){const t=passArrayJsValueToWasm0(e,wasm.__wbindgen_malloc),n=WASM_VECTOR_LEN,r=wasm.curedstring_findMultiple(this.__wbg_ptr,t,n);var a=getArrayJsValueFromWasm0(r[0],r[1]).slice();return wasm.__wbindgen_free(r[0],4*r[1],4),a}censor(e,t){const n=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),r=WASM_VECTOR_LEN,a=t.codePointAt(0);_assertChar(a),wasm.curedstring_censor(this.__wbg_ptr,n,r,a)}censorMultiple(e,t){const n=passArrayJsValueToWasm0(e,wasm.__wbindgen_malloc),r=WASM_VECTOR_LEN,a=t.codePointAt(0);_assertChar(a),wasm.curedstring_censorMultiple(this.__wbg_ptr,n,r,a)}replace(e,t){const n=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),r=WASM_VECTOR_LEN,a=passStringToWasm0(t,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),i=WASM_VECTOR_LEN;wasm.curedstring_replace(this.__wbg_ptr,n,r,a,i)}replaceMultiple(e,t){const n=passArrayJsValueToWasm0(e,wasm.__wbindgen_malloc),r=WASM_VECTOR_LEN,a=passStringToWasm0(t,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),i=WASM_VECTOR_LEN;wasm.curedstring_replaceMultiple(this.__wbg_ptr,n,r,a,i)}startsWith(e){const t=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),n=WASM_VECTOR_LEN;return 0!==wasm.curedstring_startsWith(this.__wbg_ptr,t,n)}endsWith(e){const t=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),n=WASM_VECTOR_LEN;return 0!==wasm.curedstring_endsWith(this.__wbg_ptr,t,n)}contains(e){const t=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),n=WASM_VECTOR_LEN;return 0!==wasm.curedstring_contains(this.__wbg_ptr,t,n)}equals(e){const t=passStringToWasm0(e,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),n=WASM_VECTOR_LEN;return 0!==wasm.curedstring_equals(this.__wbg_ptr,t,n)}toString(){let e,t;try{const n=wasm.curedstring_toString(this.__wbg_ptr);return e=n[0],t=n[1],getStringFromWasm0(n[0],n[1])}finally{wasm.__wbindgen_free(e,t,1)}}}const MatchFinalization="undefined"==typeof FinalizationRegistry?{register:()=>{},unregister:()=>{}}:new FinalizationRegistry((e=>wasm.__wbg_match_free(e>>>0,1)));export class Match{static __wrap(e){e>>>=0;const t=Object.create(Match.prototype);return t.__wbg_ptr=e,MatchFinalization.register(t,t.__wbg_ptr,t),t}__destroy_into_raw(){const e=this.__wbg_ptr;return this.__wbg_ptr=0,MatchFinalization.unregister(this),e}free(){const e=this.__destroy_into_raw();wasm.__wbg_match_free(e,0)}get start(){return wasm.__wbg_get_match_start(this.__wbg_ptr)>>>0}set start(e){wasm.__wbg_set_match_start(this.__wbg_ptr,e)}get end(){return wasm.__wbg_get_match_end(this.__wbg_ptr)>>>0}set end(e){wasm.__wbg_set_match_end(this.__wbg_ptr,e)}toString(){let e,t;try{const n=wasm.match_toString(this.__wbg_ptr);return e=n[0],t=n[1],getStringFromWasm0(n[0],n[1])}finally{wasm.__wbindgen_free(e,t,1)}}}async function __wbg_load(e,t){if("function"==typeof Response&&e instanceof Response){if("function"==typeof WebAssembly.instantiateStreaming)try{return await WebAssembly.instantiateStreaming(e,t)}catch(t){if("application/wasm"==e.headers.get("Content-Type"))throw t;console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n",t)}const n=await e.arrayBuffer();return await WebAssembly.instantiate(n,t)}{const n=await WebAssembly.instantiate(e,t);return n instanceof WebAssembly.Instance?{instance:n,module:e}:n}}function __wbg_get_imports(){const e={wbg:{}};return e.wbg.__wbg_error_7534b8e9a36f1ab4=function(e,t){let n,r;try{n=e,r=t,console.error(getStringFromWasm0(e,t))}finally{wasm.__wbindgen_free(n,r,1)}},e.wbg.__wbg_match_new=function(e){return Match.__wrap(e)},e.wbg.__wbg_new_8a6f238a6ece86ea=function(){return new Error},e.wbg.__wbg_stack_0ed75d68575b0f3c=function(e,t){const n=passStringToWasm0(t.stack,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),r=WASM_VECTOR_LEN;getDataViewMemory0().setInt32(e+4,r,!0),getDataViewMemory0().setInt32(e+0,n,!0)},e.wbg.__wbindgen_error_new=function(e,t){return new Error(getStringFromWasm0(e,t))},e.wbg.__wbindgen_init_externref_table=function(){const e=wasm.__wbindgen_export_3,t=e.grow(4);e.set(0,void 0),e.set(t+0,void 0),e.set(t+1,null),e.set(t+2,!0),e.set(t+3,!1)},e.wbg.__wbindgen_string_get=function(e,t){const n="string"==typeof t?t:void 0;var r=null==n?0:passStringToWasm0(n,wasm.__wbindgen_malloc,wasm.__wbindgen_realloc),a=WASM_VECTOR_LEN;getDataViewMemory0().setInt32(e+4,a,!0),getDataViewMemory0().setInt32(e+0,r,!0)},e.wbg.__wbindgen_throw=function(e,t){throw new Error(getStringFromWasm0(e,t))},e}export default async function init({local:e}={}){if(void 0!==wasm)return output;const{instance:t}=await __wbg_load(await fetch(e?new URL("decancer.wasm",import.meta.url):"https://cdn.jsdelivr.net/gh/null8626/decancer@v3.3.0/bindings/wasm/bin/decancer.wasm"),__wbg_get_imports());return wasm=t.exports,cachedDataViewMemory0=null,cachedUint8ArrayMemory0=null,output=Object.assign(cure,{options:options}),wasm.__wbindgen_start(),output} -------------------------------------------------------------------------------- /bindings/wasm/bin/decancer.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/null8626/decancer/2c6679bd15877c28f7796b078209f3caabf5c1b3/bindings/wasm/bin/decancer.wasm -------------------------------------------------------------------------------- /bindings/wasm/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Decancerer!!! (tm) 6 | 16 | 17 | 18 |

Input cancerous text here:

19 | 20 |
21 | 22 | 37 | 38 | -------------------------------------------------------------------------------- /bindings/wasm/rustfmt.toml: -------------------------------------------------------------------------------- 1 | ../../core/rustfmt.toml -------------------------------------------------------------------------------- /bindings/wasm/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | #![forbid(unsafe_code)] 3 | 4 | use std::{convert::AsRef, ops::Range}; 5 | use wasm_bindgen::prelude::*; 6 | 7 | #[wasm_bindgen] 8 | pub struct Match { 9 | pub start: usize, 10 | pub end: usize, 11 | portion: String, 12 | } 13 | 14 | #[wasm_bindgen] 15 | impl Match { 16 | pub fn toString(&self) -> String { 17 | self.portion.clone() 18 | } 19 | } 20 | 21 | #[wasm_bindgen] 22 | pub struct CuredString(decancer::CuredString); 23 | 24 | #[wasm_bindgen] 25 | impl CuredString { 26 | #[inline(always)] 27 | fn new_match(&self, mat: Range) -> Match { 28 | Match { 29 | start: mat.start, 30 | end: mat.end, 31 | portion: String::from(&self.0[mat]), 32 | } 33 | } 34 | 35 | pub fn find(&self, other: &str) -> Vec { 36 | self.0.find(other).map(|mat| self.new_match(mat)).collect() 37 | } 38 | 39 | pub fn findMultiple(&self, other: Vec) -> Vec { 40 | self 41 | .0 42 | .find_multiple(other) 43 | .into_iter() 44 | .map(|mat| self.new_match(mat)) 45 | .collect() 46 | } 47 | 48 | pub fn censor(&mut self, other: &str, with: char) { 49 | self.0.censor(other, with) 50 | } 51 | 52 | pub fn censorMultiple(&mut self, other: Vec, with: char) { 53 | self.0.censor_multiple(other, with) 54 | } 55 | 56 | pub fn replace(&mut self, other: &str, with: &str) { 57 | self.0.replace(other, with) 58 | } 59 | 60 | pub fn replaceMultiple(&mut self, other: Vec, with: &str) { 61 | self.0.replace_multiple(other, with) 62 | } 63 | 64 | pub fn startsWith(&self, other: &str) -> bool { 65 | self.0.starts_with(other) 66 | } 67 | 68 | pub fn endsWith(&self, other: &str) -> bool { 69 | self.0.ends_with(other) 70 | } 71 | 72 | pub fn contains(&self, other: &str) -> bool { 73 | self.0.contains(other) 74 | } 75 | 76 | pub fn equals(&self, other: &str) -> bool { 77 | self.0 == other 78 | } 79 | 80 | pub fn toString(&self) -> String { 81 | self.0.clone().into() 82 | } 83 | } 84 | 85 | #[wasm_bindgen] 86 | pub fn cure(input: &str, options: u32) -> Result { 87 | console_error_panic_hook::set_once(); 88 | 89 | match decancer::cure(input, options.into()) { 90 | Ok(output) => Ok(CuredString(output)), 91 | Err(err) => Err(JsError::new(>::as_ref(&err))), 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /bindings/wasm/tests/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | -------------------------------------------------------------------------------- /bindings/wasm/tests/index.mjs: -------------------------------------------------------------------------------- 1 | import { Worker } from 'node:worker_threads' 2 | import { dirname, join } from 'node:path' 3 | import { fileURLToPath } from 'node:url' 4 | import { promisify } from 'node:util' 5 | import puppeteer from 'puppeteer' 6 | 7 | const CURRENT_DIR = join(dirname(fileURLToPath(import.meta.url))) 8 | 9 | function error(message) { 10 | process.exitCode = 1 11 | console.error(message) 12 | } 13 | 14 | console.log('- [client] running worker...') 15 | 16 | const server = new Worker(join(CURRENT_DIR, 'server.mjs')) 17 | 18 | server.on('message', async message => { 19 | switch (message.code) { 20 | case 'ready': 21 | console.log('- [client] launching browser...') 22 | let browser = null 23 | 24 | for (let tries = 0; ; tries++) { 25 | try { 26 | browser = await puppeteer.launch({ 27 | headless: 'new', 28 | timeout: 12500 29 | }) 30 | 31 | break 32 | } catch (err) { 33 | console.log( 34 | `- [client] failed to launch brower after ${tries} tries.` 35 | ) 36 | 37 | if (tries === 5) { 38 | error( 39 | `- [client] aborting browser launching process due to error:\n${err.stack}` 40 | ) 41 | 42 | return server.postMessage(null) 43 | } 44 | } 45 | } 46 | 47 | console.log('- [client] launching browser page...') 48 | const page = await browser.newPage() 49 | 50 | console.log('- [client] requesting to localhost:8080...') 51 | await page.goto('http://localhost:8080', { 52 | waitFor: 'load' 53 | }) 54 | 55 | console.log('- [client] running tests...') 56 | const err = await page.evaluate(async () => { 57 | class TestContext { 58 | #err 59 | #object 60 | 61 | constructor(object) { 62 | this.#err = null 63 | this.#object = object 64 | } 65 | 66 | #assert(received, expected, functionName) { 67 | if (this.#err === null && received !== expected) { 68 | this.#err = { 69 | expected, 70 | received, 71 | functionName 72 | } 73 | } 74 | } 75 | 76 | test(expected, functionName, ...args) { 77 | if (this.#err === null) { 78 | this.#assert( 79 | this.#object[functionName](...args), 80 | expected, 81 | functionName 82 | ) 83 | } 84 | 85 | return this 86 | } 87 | 88 | testModifications() { 89 | if (this.#err === null) { 90 | this.#object.replace('text', 'other') 91 | this.#assert( 92 | this.#object.toString(), 93 | 'very funny other', 94 | true, 95 | 'replace' 96 | ) 97 | 98 | this.#object.replaceMultiple(['very ', ' funny'], 'asdf') 99 | this.#assert( 100 | this.#object.toString(), 101 | 'asdf other', 102 | true, 103 | 'replaceMultiple' 104 | ) 105 | 106 | this.#object.censor('asdf', '*') 107 | this.#assert( 108 | this.#object.toString(), 109 | '**** other', 110 | true, 111 | 'censor' 112 | ) 113 | 114 | this.#object.censorMultiple(['**** ', ' other'], '*') 115 | this.#assert( 116 | this.#object.toString(), 117 | '**********', 118 | true, 119 | 'censorMultiple' 120 | ) 121 | } 122 | 123 | return this 124 | } 125 | 126 | testFind() { 127 | if (this.#err === null) { 128 | const match = this.#object.find('funny') 129 | 130 | this.#assert(match.length, 1, 'find:match.length') 131 | this.#assert(match[0].start, 5, 'find:match[0].start') 132 | this.#assert(match[0].end, 10, 'find:match[0].end') 133 | this.#assert( 134 | match[0].toString(), 135 | 'funny', 136 | 'find:match[0].toString()' 137 | ) 138 | 139 | const matches = this.#object.findMultiple(['very ', ' funny']) 140 | 141 | this.#assert(matches.length, 1, 'findMultiple:matches.length') 142 | this.#assert(matches[0].start, 0, 'findMultiple:matches[0].start') 143 | this.#assert(matches[0].end, 10, 'findMultiple:matches[0].end') 144 | this.#assert( 145 | matches[0].toString(), 146 | 'very funny', 147 | 'findMultiple:matches[0].toString()' 148 | ) 149 | } 150 | 151 | return this 152 | } 153 | 154 | finish() { 155 | return this.#err 156 | } 157 | } 158 | 159 | try { 160 | const decancer = await window.init({ 161 | local: true 162 | }) 163 | 164 | return new TestContext(decancer('vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣')) 165 | .test(true, 'equals', 'very funny text') 166 | .test(true, 'startsWith', 'very') 167 | .test(true, 'endsWith', 'text') 168 | .test(true, 'contains', 'funny') 169 | .test('very funny text', 'toString') 170 | .testFind() 171 | .testModifications() 172 | .finish() 173 | } catch (err) { 174 | return err.stack 175 | } 176 | }) 177 | 178 | if (err !== null) { 179 | if (typeof err === 'string') { 180 | error( 181 | `- [client] error while loading wasm binary:\n${decodeURIComponent( 182 | err 183 | )}` 184 | ) 185 | } else { 186 | error( 187 | `- [client] assertion error while calling ${err.functionName}: expected '${err.expected}', got '${err.received}'` 188 | ) 189 | } 190 | } else { 191 | console.log('- [client] tests were successful.') 192 | } 193 | 194 | console.log('- [client] closing browser...') 195 | 196 | await browser.close() 197 | server.postMessage(null) 198 | 199 | break 200 | 201 | case 'error': 202 | error(`- [client] error while starting server:\n${message.stack}`) 203 | 204 | break 205 | 206 | case 'close': 207 | server.terminate() 208 | } 209 | }) 210 | -------------------------------------------------------------------------------- /bindings/wasm/tests/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "decancer-wasm-tests", 3 | "private": true, 4 | "description": "Test suite for decancer's WebAssembly binding.", 5 | "author": "null8626", 6 | "keywords": [ 7 | "test", 8 | "wasm", 9 | "webassembly" 10 | ], 11 | "homepage": "https://github.com/null8626/decancer#readme", 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/null8626/decancer" 15 | }, 16 | "bugs": { 17 | "url": "https://github.com/null8626/decancer/issues" 18 | }, 19 | "license": "MIT", 20 | "main": "index.mjs", 21 | "dependencies": { 22 | "@fastify/static": "^8.0.0", 23 | "fastify": "^5.0.0", 24 | "puppeteer": "^24.0.0" 25 | }, 26 | "engines": { 27 | "node": ">= 16.3" 28 | }, 29 | "scripts": { 30 | "test": "node index.mjs" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /bindings/wasm/tests/server.mjs: -------------------------------------------------------------------------------- 1 | import { parentPort } from 'node:worker_threads' 2 | import fastifyStatic from '@fastify/static' 3 | import { createReadStream } from 'node:fs' 4 | import { dirname, join } from 'node:path' 5 | import { fileURLToPath } from 'node:url' 6 | import fastify from 'fastify' 7 | 8 | const CURRENT_DIR = dirname(fileURLToPath(import.meta.url)) 9 | 10 | console.log('- [server] running...') 11 | 12 | const app = fastify() 13 | 14 | app.register(fastifyStatic, { 15 | root: join(CURRENT_DIR, '..', 'bin') 16 | }) 17 | 18 | app.get('/', (req, res) => { 19 | console.log('- [server] received a request.') 20 | res.type('text/html').send(createReadStream(join(CURRENT_DIR, 'index.html'))) 21 | }) 22 | 23 | app.listen( 24 | { 25 | port: 8080 26 | }, 27 | err => { 28 | if (err) { 29 | parentPort.postMessage({ 30 | code: 'error', 31 | stack: err.stack 32 | }) 33 | } else { 34 | console.log('- [server] ready.') 35 | parentPort.postMessage({ 36 | code: 'ready' 37 | }) 38 | 39 | parentPort.on('message', () => { 40 | console.log('- [server] closing...') 41 | 42 | app.close().finally(() => 43 | parentPort.postMessage({ 44 | code: 'close' 45 | }) 46 | ) 47 | }) 48 | } 49 | } 50 | ) 51 | -------------------------------------------------------------------------------- /core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "decancer" 3 | version = "3.3.0" 4 | edition = "2021" 5 | authors = ["null (https://github.com/null8626)"] 6 | description = "A library that removes common unicode confusables/homoglyphs from strings." 7 | readme = "README.md" 8 | repository = "https://github.com/null8626/decancer" 9 | license = "MIT" 10 | keywords = ["security", "unicode", "homoglyphs", "moderation", "confusables"] 11 | categories = ["parsing", "text-processing", "value-formatting"] 12 | exclude = ["rustfmt.toml"] 13 | rust-version = "1.65.0" 14 | 15 | [dependencies] 16 | serde = { version = "1", default-features = false, optional = true } 17 | regex = { version = "1", default-features = false, features = ["std", "perf"], optional = true } 18 | lazy_static = { version = "1", optional = true } 19 | 20 | [lints.clippy] 21 | all = { level = "warn", priority = -1 } 22 | pedantic = { level = "warn", priority = -1 } 23 | cast-lossless = "allow" 24 | cast-possible-truncation = "allow" 25 | cast-possible-wrap = "allow" 26 | cast-sign-loss = "allow" 27 | inline-always = "allow" 28 | module-name-repetitions = "allow" 29 | must-use-candidate = "allow" 30 | return-self-not-must-use = "allow" 31 | similar-names = "allow" 32 | single-match-else = "allow" 33 | too-many-lines = "allow" 34 | unreadable-literal = "allow" 35 | 36 | [features] 37 | default = ["options", "separators", "leetspeak"] 38 | options = [] 39 | separators = [] 40 | leetspeak = ["regex", "lazy_static"] 41 | 42 | [dev-dependencies] 43 | proptest = { version = "1", default-features = false, features = ["std"] } 44 | serde = { version = "1", features = ["derive"] } 45 | serde_json = "1" 46 | criterion = { version = "0.6", default-features = false } 47 | rand = "0.9" 48 | censor = "0.3" 49 | 50 | [[bench]] 51 | name = "decancer_bench" 52 | harness = false 53 | 54 | [package.metadata.docs.rs] 55 | all-features = true 56 | rustdoc-args = ["--cfg", "docsrs"] 57 | rustc-args = ["--cfg", "docsrs"] -------------------------------------------------------------------------------- /core/README.md: -------------------------------------------------------------------------------- 1 | 3 | 4 | # decancer [![npm][npm-image]][npm-url] [![crates.io][crates-io-image]][crates-io-url] [![npm downloads][npm-downloads-image]][npm-url] [![crates.io downloads][crates-io-downloads-image]][crates-io-url] [![codacy][codacy-image]][codacy-url] [![ko-fi][ko-fi-brief-image]][ko-fi-url] 5 | 6 | [crates-io-image]: https://img.shields.io/crates/v/decancer?style=flat-square 7 | [crates-io-downloads-image]: https://img.shields.io/crates/d/decancer?style=flat-square 8 | [crates-io-url]: https://crates.io/crates/decancer 9 | [npm-image]: https://img.shields.io/npm/v/decancer.svg?style=flat-square 10 | [npm-url]: https://npmjs.org/package/decancer 11 | [npm-downloads-image]: https://img.shields.io/npm/dt/decancer.svg?style=flat-square 12 | [codacy-image]: https://app.codacy.com/project/badge/Grade/d740b1aa867d42f2b37eb992ad73784a 13 | [codacy-url]: https://app.codacy.com/gh/null8626/decancer/dashboard 14 | [ko-fi-brief-image]: https://img.shields.io/badge/donations-ko--fi-red?color=ff5e5b&style=flat-square 15 | [ko-fi-image]: https://ko-fi.com/img/githubbutton_sm.svg 16 | [ko-fi-url]: https://ko-fi.com/null8626 17 | 18 | A library that removes common unicode confusables/homoglyphs from strings. 19 | 20 | - Its core is written in [Rust](https://www.rust-lang.org) and utilizes a form of [**Binary Search**](https://en.wikipedia.org/wiki/Binary_search_algorithm) to ensure speed! 21 | - By default, it's capable of filtering **221,529 (19.88%) different unicode codepoints** like: 22 | - All [whitespace characters](https://en.wikipedia.org/wiki/Whitespace_character) 23 | - All [diacritics](https://en.wikipedia.org/wiki/Diacritic), this also eliminates all forms of [Zalgo text](https://en.wikipedia.org/wiki/Zalgo_text) 24 | - Most [leetspeak characters](https://en.wikipedia.org/wiki/Leet) 25 | - Most [homoglyphs](https://en.wikipedia.org/wiki/Homoglyph) 26 | - Several emojis 27 | - Unlike other packages, this package is **[unicode bidi-aware](https://en.wikipedia.org/wiki/Bidirectional_text)** where it also interprets right-to-left characters in the same way as it were to be rendered by an application! 28 | - Its behavior is also highly customizable to your liking! 29 | 30 | ## Installation 31 | In your `Cargo.toml`: 32 | 33 | ```toml 34 | decancer = "3.3.0" 35 | ``` 36 | ## Examples 37 | For more information, please read the [documentation](https://docs.rs/decancer). 38 | 39 | ```rust 40 | let mut cured = decancer::cure!(r"vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣 wWiIiIIttHh l133t5p3/-\|<").unwrap(); 41 | 42 | assert_eq!(cured, "very funny text with leetspeak"); 43 | 44 | // WARNING: it's NOT recommended to coerce this output to a Rust string 45 | // and process it manually from there, as decancer has its own 46 | // custom comparison measures, including leetspeak matching! 47 | assert_ne!(cured.as_str(), "very funny text with leetspeak"); 48 | 49 | assert!(cured.contains("funny")); 50 | 51 | cured.censor("funny", '*'); 52 | assert_eq!(cured, "very ***** text with leetspeak"); 53 | 54 | cured.censor_multiple(["very", "text"], '-'); 55 | assert_eq!(cured, "---- ***** ---- with leetspeak"); 56 | ``` 57 | ## Donations 58 | 59 | If you want to support my eyes for manually looking at thousands of unicode characters, consider donating! ❤ 60 | 61 | [![ko-fi][ko-fi-image]][ko-fi-url] 62 | -------------------------------------------------------------------------------- /core/benches/decancer_bench.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; 2 | use rand::random; 3 | 4 | fn cure(c: &mut Criterion) { 5 | c.bench_function("cure", |b| { 6 | b.iter(|| decancer::cure!("vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣").unwrap()); 7 | }); 8 | } 9 | 10 | fn cure_char(c: &mut Criterion) { 11 | c.bench_function("cure_char", |b| { 12 | b.iter_batched( 13 | random::, 14 | |character| decancer::cure_char!(character), 15 | BatchSize::SmallInput, 16 | ); 17 | }); 18 | } 19 | 20 | #[cfg(feature = "leetspeak")] 21 | fn leetspeak(c: &mut Criterion) { 22 | c.bench_function("leetspeak", |b| { 23 | b.iter(|| { 24 | let cured = decancer::cure!(r"vEⓡ𝔂 𝔽𝕌Ňℕy ţ乇𝕏𝓣 wWiIiIIttHh l133t5p3/-\| (u32, u32) { 19 | if a > b { 20 | (b, a) 21 | } else { 22 | (a, b) 23 | } 24 | } 25 | 26 | impl OpeningBracket { 27 | pub(crate) const fn new(code: u32) -> Option { 28 | let mut start = 0i32; 29 | let mut end = BIDI_BRACKETS_COUNT as i32; 30 | 31 | while start <= end { 32 | let mid = (start + end) / 2; 33 | let offset = (4 + (mid * 5)) as _; 34 | 35 | let first = BIDI.u32_at(offset); 36 | let opening = ((BIDI.u16_at(offset + 4) as u32) << 8) | ((first >> 20) & 0xff); 37 | 38 | let diff = (first >> 28) & 7; 39 | 40 | let closing = if first >= 0x80000000 { 41 | opening - diff 42 | } else { 43 | opening + diff 44 | }; 45 | 46 | let (min, max) = min_max(opening, closing); 47 | 48 | if code < min { 49 | end = mid - 1; 50 | } else if code > max { 51 | start = mid + 1; 52 | } else { 53 | let is_open = code == opening; 54 | 55 | if is_open || code == closing { 56 | let mut decomps = first & CODEPOINT_MASK; 57 | 58 | if decomps == 0 { 59 | decomps = opening; 60 | } 61 | 62 | return Some(Self { 63 | opening: decomps, 64 | is_open, 65 | }); 66 | } 67 | 68 | break; 69 | } 70 | } 71 | 72 | None 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /core/src/bidi/class.rs: -------------------------------------------------------------------------------- 1 | use super::{OverrideStatus, BIDI, BIDI_DICTIONARY_COUNT, BIDI_DICTIONARY_OFFSET}; 2 | use crate::util::{numbered_enum, CODEPOINT_MASK}; 3 | 4 | numbered_enum! { 5 | #[allow(dead_code)] 6 | #[cfg_attr(test, derive(Debug))] 7 | #[derive(Copy, Clone, PartialEq)] 8 | pub(crate) enum Class: u8 { 9 | B = 0, 10 | S = 1, 11 | WS = 2, 12 | ON = 3, 13 | ET = 4, 14 | ES = 5, 15 | CS = 6, 16 | EN = 7, 17 | L = 8, 18 | BN = 9, 19 | R = 10, 20 | AN = 11, 21 | AL = 12, 22 | LRE = 13, 23 | RLE = 14, 24 | PDF = 15, 25 | LRO = 16, 26 | RLO = 17, 27 | LRI = 18, 28 | RLI = 19, 29 | FSI = 20, 30 | PDI = 21, 31 | } 32 | } 33 | 34 | impl Class { 35 | pub(crate) fn new(code: u32) -> Option { 36 | let mut start = 0i32; 37 | let mut end = BIDI_DICTIONARY_COUNT as i32; 38 | 39 | while start <= end { 40 | let mid = (start + end) / 2; 41 | let offset = ((BIDI_DICTIONARY_OFFSET as i32) + (mid * 6)) as _; 42 | let kv = BIDI.u32_at(offset); 43 | 44 | let other = kv & CODEPOINT_MASK; 45 | 46 | if code < other { 47 | end = mid - 1; 48 | } else if code > (other + BIDI.u16_at(offset + 4) as u32) { 49 | start = mid + 1; 50 | } else { 51 | return Some(((kv >> 20) as u8).into()); 52 | } 53 | } 54 | 55 | None 56 | } 57 | 58 | pub(crate) const fn is_neutral_or_isolate(self) -> bool { 59 | matches!(self, Self::B | Self::S | Self::WS | Self::ON | Self::PDI) || self.is_isolate() 60 | } 61 | 62 | pub(crate) const fn is_rtl(self) -> bool { 63 | matches!(self, Self::RLE | Self::RLO | Self::RLI) 64 | } 65 | 66 | pub(crate) const fn is_isolate(self) -> bool { 67 | matches!(self, Self::RLI | Self::LRI | Self::FSI) 68 | } 69 | 70 | pub(crate) const fn override_status(self) -> OverrideStatus { 71 | match self { 72 | Self::RLO => OverrideStatus::RTL, 73 | Self::LRO => OverrideStatus::LTR, 74 | Self::RLI | Self::LRI | Self::FSI => OverrideStatus::Isolate, 75 | _ => OverrideStatus::Neutral, 76 | } 77 | } 78 | 79 | pub(crate) const fn removed_by_x9(self) -> bool { 80 | matches!( 81 | self, 82 | Self::RLE | Self::LRE | Self::RLO | Self::LRO | Self::PDF | Self::BN 83 | ) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /core/src/bidi/level.rs: -------------------------------------------------------------------------------- 1 | use super::Class; 2 | use crate::Error; 3 | 4 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] 5 | pub(crate) struct Level(pub(crate) u8); 6 | 7 | pub(crate) const MAX_EXPLICIT_DEPTH: u8 = 125; 8 | pub(crate) const MAX_IMPLICIT_DEPTH: u8 = MAX_EXPLICIT_DEPTH + 1; 9 | 10 | impl Level { 11 | pub(crate) const fn ltr() -> Self { 12 | Self(0) 13 | } 14 | 15 | pub(crate) const fn rtl() -> Self { 16 | Self(1) 17 | } 18 | 19 | pub(crate) const fn new_explicit(number: u8) -> Result { 20 | if number <= MAX_EXPLICIT_DEPTH { 21 | Ok(Self(number)) 22 | } else { 23 | Err(Error::LevelExplicitOverflow) 24 | } 25 | } 26 | 27 | pub(crate) const fn new_implicit(number: u8) -> Result { 28 | if number <= MAX_IMPLICIT_DEPTH { 29 | Ok(Self(number)) 30 | } else { 31 | Err(Error::LevelImplicitOverflow) 32 | } 33 | } 34 | 35 | pub(crate) fn lower(&mut self, amount: u8) -> Result<(), Error> { 36 | self.0 = self 37 | .0 38 | .checked_sub(amount) 39 | .ok_or(Error::LevelModificationUnderflow)?; 40 | 41 | Ok(()) 42 | } 43 | 44 | pub(crate) fn raise(&mut self, amount: u8) -> Result<(), Error> { 45 | let number = self 46 | .0 47 | .checked_add(amount) 48 | .ok_or(Error::LevelModificationOverflow)?; 49 | 50 | if number <= MAX_IMPLICIT_DEPTH { 51 | self.0 = number; 52 | 53 | Ok(()) 54 | } else { 55 | Err(Error::LevelModificationOverflow) 56 | } 57 | } 58 | 59 | pub(crate) const fn is_ltr(self) -> bool { 60 | self.0 % 2 == 0 61 | } 62 | 63 | pub(crate) const fn is_rtl(self) -> bool { 64 | self.0 % 2 == 1 65 | } 66 | 67 | pub(crate) const fn class(self) -> Class { 68 | if self.is_ltr() { 69 | Class::L 70 | } else { 71 | Class::R 72 | } 73 | } 74 | 75 | pub(crate) const fn new_explicit_next_ltr(self) -> Result { 76 | Self::new_explicit((self.0 + 2) & !1) 77 | } 78 | 79 | pub(crate) const fn new_explicit_next_rtl(self) -> Result { 80 | Self::new_explicit((self.0 + 1) | 1) 81 | } 82 | 83 | pub(crate) const fn new_lowest_ge_rtl(self) -> Result { 84 | Self::new_implicit(self.0 | 1) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /core/src/bidi/mod.rs: -------------------------------------------------------------------------------- 1 | mod class; 2 | 3 | use crate::util::Binary; 4 | pub(crate) use class::Class; 5 | 6 | const BIDI: Binary = Binary::new(include_bytes!("../../bin/bidi.bin")); 7 | 8 | const BIDI_DICTIONARY_OFFSET: u16 = BIDI.u16_at(0); 9 | const BIDI_DICTIONARY_COUNT: u16 = BIDI.u16_at(2); 10 | const BIDI_BRACKETS_COUNT: u16 = ((BIDI_DICTIONARY_OFFSET - 4) / 5) - 1; 11 | 12 | mod brackets; 13 | mod level; 14 | mod paragraph; 15 | 16 | use brackets::{BracketPair, OpeningBracket}; 17 | use paragraph::OverrideStatus; 18 | 19 | pub(crate) use level::Level; 20 | #[cfg(test)] 21 | pub(crate) use paragraph::IsolatingRunSequence; 22 | pub(crate) use paragraph::Paragraph; 23 | -------------------------------------------------------------------------------- /core/src/codepoints.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "options")] 2 | use crate::Options; 3 | use crate::{ 4 | similar::SIMILAR_START, 5 | translation::Translation, 6 | util::{Binary, CODEPOINT_MASK}, 7 | }; 8 | use std::cmp::Ordering; 9 | 10 | pub(crate) const CODEPOINTS: Binary = Binary::new(include_bytes!("../bin/codepoints.bin")); 11 | 12 | pub(crate) const CASE_SENSITIVE_CODEPOINTS_COUNT: u16 = 13 | ((SIMILAR_START - CASE_SENSITIVE_CODEPOINTS_OFFSET) / 6) - 1; 14 | pub(crate) const CASE_SENSITIVE_CODEPOINTS_OFFSET: u16 = CODEPOINTS.u16_at(0); 15 | pub(crate) const CODEPOINTS_COUNT: u16 = ((CASE_SENSITIVE_CODEPOINTS_OFFSET - 6) / 6) - 1; 16 | 17 | const STRING_TRANSLATION_MASK: u32 = 0x10000000; 18 | 19 | #[derive(Copy, Clone)] 20 | #[cfg_attr(not(feature = "options"), allow(dead_code))] 21 | pub(crate) struct Codepoint(u32, u8, u8); 22 | 23 | impl Codepoint { 24 | const fn get_codepoint(self) -> u32 { 25 | self.0 & CODEPOINT_MASK 26 | } 27 | 28 | const fn range_size(self) -> u32 { 29 | if self.is_string_translation() { 30 | 0 31 | } else { 32 | (self.1 & 0x7f) as _ 33 | } 34 | } 35 | 36 | const fn is_string_translation(self) -> bool { 37 | self.0 >= STRING_TRANSLATION_MASK 38 | } 39 | 40 | const fn ascii_translation(self) -> u32 { 41 | (self.0 >> 20) & 0x7f 42 | } 43 | 44 | const fn is_translation_synced(self) -> bool { 45 | self.1 >= 0x80 46 | } 47 | 48 | pub(crate) const fn at(offset: i32) -> Self { 49 | Self( 50 | CODEPOINTS.u32_at(offset as _), 51 | CODEPOINTS.at((4 + offset) as _), 52 | CODEPOINTS.at((5 + offset) as _), 53 | ) 54 | } 55 | 56 | pub(crate) const fn matches( 57 | self, 58 | other: u32, 59 | #[cfg(feature = "options")] options: Options, 60 | ) -> Option { 61 | let mut conf = self.get_codepoint(); 62 | 63 | if other < conf { 64 | return Some(Ordering::Less); 65 | } 66 | 67 | conf += self.range_size(); 68 | 69 | if other > conf { 70 | return Some(Ordering::Greater); 71 | } 72 | 73 | #[cfg(feature = "options")] 74 | if options.refuse_cure(self.2) { 75 | return None; 76 | } 77 | 78 | Some(Ordering::Equal) 79 | } 80 | 81 | pub(crate) fn translation(self, other: u32) -> Translation { 82 | if self.is_string_translation() { 83 | Translation::string(self.0, self.1) 84 | } else { 85 | let mut code = self.ascii_translation(); 86 | 87 | if code == 0 { 88 | return Translation::None; 89 | } else if self.is_translation_synced() { 90 | code += other - self.get_codepoint(); 91 | } 92 | 93 | Translation::character(code) 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /core/src/leetspeak.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use regex::bytes::{Regex, RegexBuilder}; 3 | 4 | macro_rules! re { 5 | ($pattern:literal) => {{ 6 | Some( 7 | RegexBuilder::new($pattern) 8 | .unicode(false) 9 | .dfa_size_limit(83886080) 10 | .build() 11 | .unwrap(), 12 | ) 13 | }}; 14 | } 15 | 16 | lazy_static! { 17 | static ref REGEXES: [Option; 26] = [ 18 | re!(r"^(?:\/-*\\|[\[\(\{<]L)"), 19 | re!(r"^[\\\/\[\]\{\}\(\)\:\|iIljJ17T!](?:3|\-*[\]\)\}>])"), 20 | None, 21 | re!( 22 | r"^(?:[\\\/\[\{\(\:\|iIljJ1T!][\]\}\)>7]|\][\}\)>7]|\}[\]\)>7]|\)[\]\}>7]|7[\]\}\)>]|c[\\\/\[\{\(\:\|iIljJ1T!])" 23 | ), 24 | re!(r"^[\(\{\[\\\[\]\{\}\(\)\:\|iIljJ17T!]|[\/\[\]\{\}\(\)\:\|iIljJ17T!]\\[\/\[\]\{\}\(\)\:\|iIljJ17T!]\\|[\\\[\]\{\}\(\)\:\|iIljJ17T!]\/[\\\[\]\{\}\(\)\:\|iIljJ17T!]\/|[n1^]{2}|rn)" 34 | ), 35 | re!(r"^(?:[\/\[\]\{\}\(\)\:\|iIljJ17T!<]\\[>\/\[\]\{\}\(\)\:\|iIljJ17T!]|\/[vV]|^\/)"), 36 | re!(r"^[\(\{\[7]"), 37 | re!(r"^[\\\/\[\]\{\}\(\)\:\|iIljJ17T!][o0\*^]"), 38 | re!(r"^(?:[\(\{\[7]|[oO0])_+"), 39 | re!(r"^[\\\/\[\]\{\}\(\)\:\|iIljJ17T!][2Zz`\?]"), 40 | None, 41 | re!(r"^[\-~]+[\\\/\[\]\{\}\(\)\:\|iIljJ17T!][\-~]+"), 42 | re!( 43 | r"^(?:[\\\/\[\]\{\}\(\)\:\|iIljJ17T!<]_+[>\\\/\[\]\{\}\(\)\:\|iIljJ17T!]|L[\\\/\[\]\{\}\(\)\:\|iIljJ17T!])" 44 | ), 45 | re!(r"^(?:[\\\[\]\{\}\(\)\:\|iIljJ17T!]\/|\\[\/\[\]\{\}\(\)\:\|iIljJ17T!]|\\\|)"), 46 | re!( 47 | r"^(?:[\(\{\[7]|\\_*(?:\/\\|[^xX\[\]\{\}\(\)\:\|iIljJ17T!])_*\/|[vVuU]{2}|\\N)" 48 | ), 49 | re!(r"^[>\}\)\]][<\{\(\[]"), 50 | re!(r"^`\/"), 51 | re!(r"^(?:([~\-][\\\/])|[7>])_+"), 52 | ]; 53 | } 54 | 55 | pub(crate) fn find(haystack: &[u8], character: u32) -> Option { 56 | let idx = match character { 57 | 65..=90 => character - 65, 58 | 97..=122 => character - 97, 59 | _ => return None, 60 | }; 61 | 62 | REGEXES[idx as usize] 63 | .as_ref()? 64 | .find(haystack) 65 | .map(|mat| mat.len()) 66 | } 67 | -------------------------------------------------------------------------------- /core/src/options.rs: -------------------------------------------------------------------------------- 1 | use crate::{codepoints::Codepoint, Translation}; 2 | use std::cmp::Ordering; 3 | 4 | /// A configuration struct where you can customize decancer's behavior. 5 | /// 6 | /// By default, decancer cures as much characters as possible and turns all the output characters to lowercase. 7 | /// 8 | /// If you don't plan on using this struct and only using decancer's defaults, it's recommended to disable the default `options` feature flag to optimize away unnecessary option checks. 9 | /// 10 | /// ```rust 11 | /// use decancer::Options; 12 | /// 13 | /// // by default, all options are disabled 14 | /// let _options = Options::default(); 15 | /// ``` 16 | #[derive(Copy, Clone, Eq, PartialEq, Default, Hash)] 17 | pub struct Options(pub(crate) u32); 18 | 19 | macro_rules! options { 20 | ($( 21 | $(#[$extra_meta:meta])* 22 | $idx:literal: $name:ident, 23 | )*) => { 24 | $( 25 | $(#[$extra_meta])* 26 | #[cfg_attr(not(feature = "options"), cold)] 27 | pub const fn $name(self) -> Self { 28 | #[cfg(feature = "options")] 29 | return Self(self.0 | (1 << $idx)); 30 | 31 | #[cfg(not(feature = "options"))] 32 | return self; 33 | } 34 | )* 35 | }; 36 | } 37 | 38 | impl Options { 39 | /// Creates a new configuration where every option is enabled. 40 | #[cfg_attr(not(feature = "options"), cold)] 41 | pub const fn all() -> Self { 42 | #[cfg(feature = "options")] 43 | return Self(0x1ffffff); 44 | 45 | #[cfg(not(feature = "options"))] 46 | return Self(0); 47 | } 48 | 49 | /// Creates a new configuration that prevents decancer from curing characters from major foreign writing systems, including diacritics. 50 | #[cfg_attr(not(feature = "options"), cold)] 51 | pub const fn pure_homoglyph() -> Self { 52 | #[cfg(feature = "options")] 53 | return Self(0x3ffffc); 54 | 55 | #[cfg(not(feature = "options"))] 56 | return Self(0); 57 | } 58 | 59 | options! { 60 | /// Prevents decancer from changing all characters to lowercase. Therefore, if the input character is in uppercase, the output character will be in uppercase as well. 61 | /// 62 | /// **NOTE:** Many confusables are neither an uppercase or a lowercase character. Therefore, the decancer defaults to displaying the translation **in lowercase**: 63 | /// 64 | /// ```rust 65 | /// use decancer::{Translation, Options}; 66 | /// use std::borrow::Cow; 67 | /// 68 | /// let options = Options::default() 69 | /// .retain_capitalization(); 70 | /// 71 | /// assert_eq!('🆐'.to_lowercase().collect::(), '🆐'.to_uppercase().collect::()); 72 | /// assert_eq!(decancer::cure_char('🆐', options), Translation::String(Cow::Borrowed("dj"))); 73 | /// ``` 74 | 0: retain_capitalization, 75 | 76 | /// Prevents decancer from applying the [Unicode Bidirectional Algorithm](https://en.wikipedia.org/wiki/Bidirectional_text). Use this **only** when you don't expect any right-to-left characters. Enabling this option has no effect if it's called on [`cure_char`][crate::cure_char()]. 77 | /// 78 | /// **NOTE:** This speeds up the function call, but **can break [right-to-left characters](https://en.wikipedia.org/wiki/Bidirectional_text)**. It's highly recommended to also use [`retain_arabic`][Options::retain_arabic] and [`retain_hebrew`][Options::retain_hebrew]. 79 | 1: disable_bidi, 80 | 81 | /// Prevents decancer from curing characters *with* diacritics or accents. 82 | /// 83 | /// **NOTE:** Decancer can still cure standalone diacritic characters, which is used in [Zalgo texts](https://en.wikipedia.org/wiki/Zalgo_text). 84 | 2: retain_diacritics, 85 | 86 | /// Prevents decancer from curing all greek characters. 87 | 3: retain_greek, 88 | 89 | /// Prevents decancer from curing all cyrillic characters. 90 | 4: retain_cyrillic, 91 | 92 | /// Prevents decancer from curing all hebrew characters. 93 | 5: retain_hebrew, 94 | 95 | /// Prevents decancer from curing all arabic characters. 96 | 6: retain_arabic, 97 | 98 | /// Prevents decancer from curing all devanagari characters. 99 | 7: retain_devanagari, 100 | 101 | /// Prevents decancer from curing all bengali characters. 102 | 8: retain_bengali, 103 | 104 | /// Prevents decancer from curing all armenian characters. 105 | 9: retain_armenian, 106 | 107 | /// Prevents decancer from curing all gujarati characters. 108 | 10: retain_gujarati, 109 | 110 | /// Prevents decancer from curing all tamil characters. 111 | 11: retain_tamil, 112 | 113 | /// Prevents decancer from curing all thai characters. 114 | 12: retain_thai, 115 | 116 | /// Prevents decancer from curing all lao characters. 117 | 13: retain_lao, 118 | 119 | /// Prevents decancer from curing all burmese characters. 120 | 14: retain_burmese, 121 | 122 | /// Prevents decancer from curing all khmer characters. 123 | 15: retain_khmer, 124 | 125 | /// Prevents decancer from curing all mongolian characters. 126 | 16: retain_mongolian, 127 | 128 | /// Prevents decancer from curing all chinese characters. 129 | 17: retain_chinese, 130 | 131 | /// Prevents decancer from curing all katakana and hiragana characters. 132 | /// 133 | /// **NOTE:** To also prevent decancer from curing kanji characters, use [`retain_chinese`][Options::retain_chinese]. 134 | 18: retain_japanese, 135 | 136 | /// Prevents decancer from curing all korean characters. 137 | 19: retain_korean, 138 | 139 | /// Prevents decancer from curing all braille characters. 140 | 20: retain_braille, 141 | 142 | /// Prevents decancer from curing all emojis. 143 | 21: retain_emojis, 144 | 145 | /// Prevents decancer from curing all turkish characters. 146 | /// 147 | /// **NOTE:** To also prevent decancer from curing [the uppercase dotted i character](https://en.wikipedia.org/wiki/İ) (`İ`), use [`retain_capitalization`][Options::retain_capitalization]. 148 | 22: retain_turkish, 149 | 150 | /// Removes all non-ASCII characters from the result. 151 | 23: ascii_only, 152 | 153 | /// Removes all non-alphanumeric characters from the result. 154 | 24: alphanumeric_only, 155 | } 156 | 157 | #[cfg(feature = "options")] 158 | pub(crate) const fn is(self, attribute_idx: u8) -> bool { 159 | (self.0 & (1 << attribute_idx as u32)) != 0 160 | } 161 | 162 | #[cfg(feature = "options")] 163 | pub(crate) const fn refuse_cure(self, attributes: u8) -> bool { 164 | let locale = attributes >> 2; 165 | 166 | ((attributes & 1) != 0 && self.is(2)) 167 | || ((attributes & 2) != 0 && self.is(22)) 168 | || locale > 2 && self.is(locale) 169 | } 170 | 171 | pub(crate) fn translate(self, code: u32, offset: i32, mut end: i32) -> Option { 172 | let mut start = 0; 173 | 174 | while start <= end { 175 | let mid = (start + end) / 2; 176 | let codepoint = Codepoint::at(offset + (mid * 6)); 177 | #[cfg(feature = "options")] 178 | let ord = codepoint.matches(code, self)?; 179 | 180 | #[cfg(not(feature = "options"))] 181 | let ord = codepoint.matches(code)?; 182 | 183 | match ord { 184 | Ordering::Equal => return Some(codepoint.translation(code)), 185 | Ordering::Greater => start = mid + 1, 186 | Ordering::Less => end = mid - 1, 187 | } 188 | } 189 | 190 | None 191 | } 192 | } 193 | 194 | #[doc(hidden)] 195 | #[cfg(feature = "options")] 196 | impl From for Options { 197 | #[inline(always)] 198 | fn from(value: u32) -> Self { 199 | Self(value) 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /core/src/similar.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "leetspeak")] 2 | use crate::leetspeak; 3 | use crate::{codepoints::CODEPOINTS, util::unwrap_or_ret}; 4 | use std::{iter::FusedIterator, ops::Range, str::Chars}; 5 | 6 | pub(crate) const SIMILAR_START: u16 = CODEPOINTS.u16_at(2); 7 | pub(crate) const SIMILAR_END: u16 = CODEPOINTS.u16_at(4); 8 | 9 | pub(crate) fn is(self_char: char, other_char: char) -> bool { 10 | let self_char = self_char.to_lowercase().next().unwrap() as u32; 11 | let other_char = other_char.to_lowercase().next().unwrap() as u32; 12 | 13 | if self_char == other_char { 14 | return true; 15 | } else if self_char <= 0x7f && other_char <= 0x7f { 16 | let mut id = 0; 17 | 18 | for offset in SIMILAR_START..SIMILAR_END { 19 | let cur = CODEPOINTS.at(offset as _); 20 | let sim = cur & 0x7f; 21 | 22 | if sim == (self_char as u8) { 23 | id |= 1; 24 | } else if sim == (other_char as u8) { 25 | id |= 2; 26 | } 27 | 28 | if id == 3 { 29 | return true; 30 | } else if cur >= 0x80 { 31 | id = 0; 32 | } 33 | } 34 | } 35 | 36 | false 37 | } 38 | 39 | struct CachedPeek<'a> { 40 | iterator: Chars<'a>, 41 | cache: Vec, 42 | index: usize, 43 | ended: bool, 44 | } 45 | 46 | impl<'a> CachedPeek<'a> { 47 | #[inline(always)] 48 | pub(crate) fn new(iterator: Chars<'a>, first: char) -> Self { 49 | Self { 50 | iterator, 51 | cache: vec![first], 52 | index: 0, 53 | ended: false, 54 | } 55 | } 56 | 57 | fn next_value(&mut self) -> Option { 58 | self.index += 1; 59 | 60 | match self.cache.get(self.index) { 61 | Some(value) => Some(*value), 62 | 63 | None => { 64 | let value = self.iterator.next()?; 65 | self.cache.push(value); 66 | 67 | Some(value) 68 | }, 69 | } 70 | } 71 | 72 | #[inline(always)] 73 | fn restart(&mut self) { 74 | self.index = 0; 75 | self.ended = false; 76 | } 77 | } 78 | 79 | impl Iterator for CachedPeek<'_> { 80 | type Item = (char, Option); 81 | 82 | fn next(&mut self) -> Option { 83 | if self.ended { 84 | return None; 85 | } 86 | 87 | let current = self.cache[self.index]; 88 | let next_element = self.next_value(); 89 | 90 | if next_element.is_none() { 91 | self.ended = true; 92 | } 93 | 94 | Some((current, next_element)) 95 | } 96 | } 97 | 98 | /// A matcher iterator around a string that yields a non-inclusive [`Range`] whenever it detects a similar match. 99 | pub struct Matcher<'a, 'b> { 100 | self_iterator: Chars<'a>, 101 | #[cfg(feature = "leetspeak")] 102 | self_str: &'a str, 103 | self_index: usize, 104 | other_iterator: CachedPeek<'b>, 105 | } 106 | 107 | impl<'a, 'b> Matcher<'a, 'b> { 108 | pub(crate) fn new(mut self_str: &'a str, other_str: &'b str) -> Self { 109 | let mut other_chars = other_str.chars(); 110 | let other_first = other_chars.next(); 111 | 112 | if other_first.is_none() || self_str.len() < other_str.len() { 113 | self_str = ""; 114 | } 115 | 116 | Self { 117 | self_iterator: self_str.chars(), 118 | #[cfg(feature = "leetspeak")] 119 | self_str, 120 | self_index: 0, 121 | other_iterator: CachedPeek::new(other_chars, other_first.unwrap()), 122 | } 123 | } 124 | 125 | #[cfg(feature = "leetspeak")] 126 | fn matches_leetspeak(&mut self, other_char: char) -> Option { 127 | let haystack = &self.self_str[self.self_index..]; 128 | let matched_len = leetspeak::find(haystack.as_bytes(), other_char as _)?; 129 | 130 | self.self_iterator = haystack[matched_len..].chars(); 131 | 132 | Some(matched_len) 133 | } 134 | 135 | #[cfg_attr(not(feature = "leetspeak"), inline(always))] 136 | fn matches_character(self_char: char, other_char: char) -> Option { 137 | if is(self_char, other_char) { 138 | Some(other_char.len_utf8()) 139 | } else { 140 | None 141 | } 142 | } 143 | 144 | fn matches(&mut self, self_char: char, other_char: char) -> Option { 145 | #[cfg(feature = "leetspeak")] 146 | { 147 | Self::matches_character(self_char, other_char).or_else(|| self.matches_leetspeak(other_char)) 148 | } 149 | 150 | #[cfg(not(feature = "leetspeak"))] 151 | { 152 | Self::matches_character(self_char, other_char) 153 | } 154 | } 155 | 156 | pub(crate) fn is_equal(self_str: &'a str, other_str: &'b str) -> bool { 157 | let mut iter = Self::new(self_str, other_str); 158 | let Some(mat) = iter.next() else { 159 | return false; 160 | }; 161 | 162 | mat.start == 0 && mat.end == self_str.len() 163 | } 164 | 165 | fn skip_until(&mut self, other_char: char) -> Option<(usize, usize)> { 166 | let mut skipped = 0; 167 | 168 | loop { 169 | let next_self_char = self.self_iterator.next()?; 170 | 171 | if let Some(matched_skip) = self.matches(next_self_char, other_char) { 172 | return Some((skipped, matched_skip)); 173 | } 174 | 175 | skipped += next_self_char.len_utf8(); 176 | } 177 | } 178 | } 179 | 180 | impl Iterator for Matcher<'_, '_> { 181 | type Item = Range; 182 | 183 | fn next(&mut self) -> Option { 184 | self.other_iterator.restart(); 185 | 186 | let mut current_other = self.other_iterator.next()?; 187 | 188 | let (skipped, matched_skip) = self.skip_until(current_other.0)?; 189 | 190 | let mut start_index = self.self_index + skipped; 191 | self.self_index = start_index + matched_skip; 192 | let mut last_match_end = start_index; 193 | #[cfg(feature = "separators")] 194 | let mut current_separator: Option = None; 195 | 196 | while let Some(next_self_char) = self.self_iterator.next() { 197 | if let Some(matched_skip) = current_other 198 | .1 199 | .and_then(|next_other| self.matches(next_self_char, next_other)) 200 | { 201 | self.self_index += matched_skip; 202 | last_match_end = self.self_index; 203 | #[cfg(feature = "separators")] 204 | { 205 | current_separator = None; 206 | } 207 | 208 | current_other = unwrap_or_ret!( 209 | self.other_iterator.next(), 210 | Some(start_index..last_match_end) 211 | ); 212 | 213 | continue; 214 | } 215 | 216 | if let Some(matched_skip) = self.matches(next_self_char, current_other.0) { 217 | self.self_index += matched_skip; 218 | last_match_end = self.self_index; 219 | #[cfg(feature = "separators")] 220 | { 221 | current_separator = None; 222 | } 223 | } else { 224 | self.self_index += next_self_char.len_utf8(); 225 | 226 | #[cfg(feature = "separators")] 227 | match current_separator { 228 | Some(separator) => { 229 | if !is(next_self_char, separator) { 230 | if current_other.1.is_none() { 231 | return Some(start_index..last_match_end); 232 | } 233 | 234 | self.other_iterator.restart(); 235 | 236 | current_separator = None; 237 | current_other = self.other_iterator.next()?; 238 | 239 | let (skipped, matched_skip) = self.skip_until(current_other.0)?; 240 | 241 | start_index = self.self_index + skipped; 242 | self.self_index = start_index + matched_skip; 243 | } 244 | }, 245 | 246 | None => { 247 | current_separator.replace(next_self_char); 248 | }, 249 | } 250 | 251 | #[cfg(not(feature = "separators"))] 252 | { 253 | if current_other.1.is_none() { 254 | return Some(start_index..last_match_end); 255 | } 256 | 257 | self.other_iterator.restart(); 258 | 259 | current_other = self.other_iterator.next()?; 260 | 261 | let (skipped, matched_skip) = self.skip_until(current_other.0)?; 262 | 263 | start_index = self.self_index + skipped; 264 | self.self_index = start_index + matched_skip; 265 | } 266 | } 267 | } 268 | 269 | if current_other.1.is_none() { 270 | Some(start_index..last_match_end) 271 | } else { 272 | None 273 | } 274 | } 275 | } 276 | 277 | impl FusedIterator for Matcher<'_, '_> {} 278 | -------------------------------------------------------------------------------- /core/src/translation.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "options")] 2 | use crate::util::is_alphanumeric; 3 | use crate::{ 4 | codepoints::CODEPOINTS, 5 | similar::{self, SIMILAR_END as STRINGS_OFFSET}, 6 | Matcher, 7 | }; 8 | #[cfg(feature = "serde")] 9 | use serde::{Deserialize, Deserializer, Serialize, Serializer}; 10 | use std::{ 11 | borrow::Cow, 12 | cmp::PartialEq, 13 | fmt::{self, Debug, Display}, 14 | ops::{Add, AddAssign}, 15 | str, 16 | }; 17 | 18 | /// The translation for a single character/codepoint. 19 | #[derive(Clone, Debug, PartialEq, Hash)] 20 | pub enum Translation { 21 | /// A single unicode character. 22 | Character(char), 23 | /// A string. 24 | String(Cow<'static, str>), 25 | /// This suggests that the translation is an empty string. You can get this when the input character is a [control character](https://en.wikipedia.org/wiki/Control_character), [surrogate](https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Surrogates), [combining character](https://en.wikipedia.org/wiki/Script_(Unicode)#Special_script_property_values) (e.g diacritics), [private use character](https://en.wikipedia.org/wiki/Private_Use_Areas), [byte order character](https://en.wikipedia.org/wiki/Byte_order_mark), or any invalid unicode value (e.g beyond [`char::MAX`]). 26 | None, 27 | } 28 | 29 | impl Translation { 30 | pub(crate) fn string(integer: u32, second_byte: u8) -> Self { 31 | Self::String(Cow::Borrowed( 32 | str::from_utf8(CODEPOINTS.sliced( 33 | (STRINGS_OFFSET + (((((integer >> 20) as u16) & 0x07) << 8) | (second_byte as u16))) as _, 34 | ((integer >> 23) & 0x1f) as _, 35 | )) 36 | .unwrap(), 37 | )) 38 | } 39 | 40 | #[inline(always)] 41 | pub(crate) fn character(code: u32) -> Self { 42 | Self::Character(char::from_u32(code).unwrap()) 43 | } 44 | 45 | #[cfg(feature = "options")] 46 | pub(crate) fn make_uppercase(&mut self) { 47 | match self { 48 | Self::Character(c) => *c = c.to_uppercase().next().unwrap(), 49 | Self::String(s) => s.to_mut().make_ascii_uppercase(), 50 | Self::None => {}, 51 | } 52 | } 53 | 54 | #[cfg(feature = "options")] 55 | fn is_ascii(&self) -> bool { 56 | match self { 57 | Self::Character(c) => (*c as u32) <= 0x7f, 58 | Self::String(ref s) => s.is_ascii(), 59 | Self::None => false, 60 | } 61 | } 62 | 63 | #[cfg(feature = "options")] 64 | fn is_alphanumeric(&self) -> bool { 65 | match self { 66 | Self::Character(c) => is_alphanumeric(*c as _), 67 | Self::String(ref s) => s.bytes().all(|b| is_alphanumeric(b as _)), 68 | Self::None => false, 69 | } 70 | } 71 | 72 | #[cfg(feature = "options")] 73 | pub(crate) fn ensure_stripped_if(self, ascii_only: bool, alphanumeric_only: bool) -> Self { 74 | if (ascii_only && !self.is_ascii()) || (alphanumeric_only && !self.is_alphanumeric()) { 75 | Self::None 76 | } else { 77 | self 78 | } 79 | } 80 | } 81 | 82 | impl From for Cow<'static, str> { 83 | fn from(translation: Translation) -> Self { 84 | match translation { 85 | Translation::Character(c) => Self::Owned(String::from(c)), 86 | Translation::String(s) => s, 87 | Translation::None => Self::Borrowed(""), 88 | } 89 | } 90 | } 91 | 92 | impl Add for String { 93 | type Output = String; 94 | 95 | #[inline(always)] 96 | fn add(mut self, translation: Translation) -> Self::Output { 97 | self += translation; 98 | self 99 | } 100 | } 101 | 102 | impl AddAssign for String { 103 | fn add_assign(&mut self, translation: Translation) { 104 | match translation { 105 | Translation::Character(ch) => self.push(ch), 106 | Translation::String(s) => self.push_str(&s), 107 | Translation::None => {}, 108 | } 109 | } 110 | } 111 | 112 | /// Checks if this [`Translation`] is similar with another string. 113 | /// 114 | /// This comparison is *case-insensitive*. 115 | impl PartialEq for Translation 116 | where 117 | S: AsRef + ?Sized, 118 | { 119 | fn eq(&self, o: &S) -> bool { 120 | let o = o.as_ref(); 121 | 122 | match self { 123 | Self::Character(ch) => { 124 | let mut chars = o.chars(); 125 | 126 | chars.next().map_or_else(Default::default, |next_char| { 127 | chars.next().is_none() && similar::is(*ch as _, next_char) 128 | }) 129 | }, 130 | 131 | Self::String(s) => Matcher::is_equal(s, o), 132 | Self::None => o.is_empty(), 133 | } 134 | } 135 | } 136 | 137 | impl Display for Translation { 138 | #[inline(always)] 139 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 140 | match self { 141 | Self::Character(ch) => Display::fmt(ch, f), 142 | Self::String(s) => Display::fmt(s, f), 143 | Self::None => Ok(()), 144 | } 145 | } 146 | } 147 | 148 | #[cfg(feature = "serde")] 149 | #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] 150 | impl Serialize for Translation { 151 | fn serialize(&self, serializer: S) -> Result 152 | where 153 | S: Serializer, 154 | { 155 | match self { 156 | Self::Character(ch) => serializer.serialize_char(*ch), 157 | Self::String(s) => serializer.serialize_str(s), 158 | Self::None => serializer.serialize_unit(), 159 | } 160 | } 161 | } 162 | 163 | #[cfg(feature = "serde")] 164 | #[cfg_attr(docsrs, doc(cfg(feature = "serde")))] 165 | impl<'de> Deserialize<'de> for Translation { 166 | #[inline(always)] 167 | fn deserialize(deserializer: D) -> Result 168 | where 169 | D: Deserializer<'de>, 170 | { 171 | char::deserialize(deserializer).map(|character| crate::cure_char!(character)) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /core/src/util.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Index, IndexMut, Range}; 2 | 3 | pub(crate) const CODEPOINT_MASK: u32 = 0x000f_ffff; 4 | 5 | pub(crate) const fn is_none(code: u32) -> bool { 6 | matches!(code, 0..=9 | 14..=31 | 127 | 0xd800..=0xf8ff | 0xe01f0..) 7 | } 8 | 9 | #[cfg(feature = "options")] 10 | pub(crate) const fn is_special_rtl(code: u32) -> bool { 11 | matches!(code, 0x200e..=0x200f | 0x202a..=0x202e | 0x2066..=0x2069) 12 | } 13 | 14 | #[cfg(feature = "options")] 15 | pub(crate) const fn is_alphanumeric(code: u32) -> bool { 16 | matches!(code, 48..=57 | 97..=122 | 65..=90 | 32) 17 | } 18 | 19 | #[derive(Copy, Clone)] 20 | pub(crate) struct Binary { 21 | bytes: &'static [u8], 22 | } 23 | 24 | impl Binary { 25 | pub(crate) const fn new(bytes: &'static [u8]) -> Self { 26 | Self { bytes } 27 | } 28 | 29 | pub(crate) const fn at(self, offset: usize) -> u8 { 30 | self.bytes[offset] 31 | } 32 | 33 | #[inline(always)] 34 | pub(crate) fn sliced(self, offset: usize, size: usize) -> &'static [u8] { 35 | &self.bytes[offset..offset + size] 36 | } 37 | 38 | pub(crate) const fn u16_at(self, offset: usize) -> u16 { 39 | u16::from_le_bytes([self.at(offset), self.at(offset + 1)]) 40 | } 41 | 42 | pub(crate) const fn u32_at(self, offset: usize) -> u32 { 43 | u32::from_le_bytes([ 44 | self.at(offset), 45 | self.at(offset + 1), 46 | self.at(offset + 2), 47 | self.at(offset + 3), 48 | ]) 49 | } 50 | } 51 | 52 | #[inline(always)] 53 | pub(crate) fn sliced + ?Sized>(slicable: &T, range: R) -> &>::Output { 54 | slicable.index(range) 55 | } 56 | 57 | #[inline(always)] 58 | pub(crate) fn sliced_mut + ?Sized>( 59 | slicable: &mut T, 60 | range: R, 61 | ) -> &mut >::Output { 62 | slicable.index_mut(range) 63 | } 64 | 65 | // special thanks to https://medium.com/@michealkeines/merge-overlapping-intervals-rust-117a7099f348 66 | // except i've improved upon it :) 67 | pub(crate) fn merge_ranges(ranges: &mut Vec>) 68 | where 69 | T: Ord + Copy, 70 | { 71 | if ranges.is_empty() { 72 | return; 73 | } 74 | 75 | ranges.sort_by(|a, b| a.start.cmp(&b.start)); 76 | 77 | let mut j = 0; 78 | 79 | for i in 1..ranges.len() { 80 | let current = ranges[i].clone(); 81 | let previous = &mut ranges[j]; 82 | 83 | if current.start >= previous.start && current.start <= previous.end { 84 | if current.end > previous.end { 85 | previous.end = current.end; 86 | } 87 | } else { 88 | j += 1; 89 | ranges[j] = current; 90 | } 91 | } 92 | 93 | ranges.truncate(j + 1); 94 | } 95 | 96 | macro_rules! error_enum { 97 | ( 98 | $(#[$enum_attrs:meta])* 99 | pub enum $enum_name:ident { 100 | $( 101 | #[doc = $prop_doc:literal] 102 | $prop_name:ident, 103 | )* 104 | } 105 | ) => { 106 | $(#[$enum_attrs])* 107 | pub enum $enum_name { 108 | $( 109 | #[doc = $prop_doc] 110 | $prop_name, 111 | )* 112 | } 113 | 114 | impl std::convert::AsRef for $enum_name { 115 | fn as_ref(&self) -> &str { 116 | match self { 117 | $( 118 | Self::$prop_name => stringify!($prop_doc), 119 | )* 120 | } 121 | } 122 | } 123 | 124 | impl std::fmt::Display for $enum_name { 125 | #[inline(always)] 126 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 127 | write!(f, "{}", <$enum_name as std::convert::AsRef>::as_ref(self)) 128 | } 129 | } 130 | 131 | impl std::error::Error for $enum_name {} 132 | } 133 | } 134 | 135 | pub(crate) use error_enum; 136 | 137 | macro_rules! numbered_enum { 138 | ( 139 | $(#[$enum_meta:meta])* 140 | $enum_vis:vis enum $enum_name:ident: $enum_type:ty { 141 | $($enum_prop:ident = $enum_prop_value:literal,)* 142 | } 143 | ) => { 144 | $(#[$enum_meta])* 145 | #[repr($enum_type)] 146 | $enum_vis enum $enum_name { 147 | $($enum_prop = $enum_prop_value,)* 148 | } 149 | 150 | impl From<$enum_type> for $enum_name { 151 | fn from(value: $enum_type) -> Self { 152 | match value { 153 | $($enum_prop_value => Self::$enum_prop,)* 154 | _ => panic!(concat!("invalid ", stringify!($enum_name), " value: {}"), value), 155 | } 156 | } 157 | } 158 | } 159 | } 160 | 161 | pub(crate) use numbered_enum; 162 | 163 | macro_rules! unwrap_or_ret { 164 | ($option:expr,$fallback:expr) => { 165 | match $option { 166 | Some(inner) => inner, 167 | None => return $fallback, 168 | } 169 | }; 170 | } 171 | 172 | pub(crate) use unwrap_or_ret; 173 | -------------------------------------------------------------------------------- /scripts/ci_artifacts.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdir, rename, mkdir } from 'node:fs/promises' 6 | import { exec } from 'node:child_process' 7 | import { dirname, join } from 'node:path' 8 | import { fileURLToPath } from 'node:url' 9 | import { promisify } from 'node:util' 10 | 11 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 12 | const ARTIFACTS_DIR = join(ROOT_DIR, 'artifacts') 13 | const NODE_DIR = join(ROOT_DIR, 'bindings', 'node') 14 | 15 | const execute = promisify(exec) 16 | 17 | const [artifacts] = await Promise.all([ 18 | readdir(join(ROOT_DIR, 'artifacts')), 19 | mkdir(join(NODE_DIR, 'artifacts')) 20 | ]) 21 | 22 | const expectedNativeTargets = [ 23 | 'aarch64-apple-darwin', 24 | 'aarch64-apple-ios', 25 | 'aarch64-apple-ios-sim', 26 | 'aarch64-linux-android', 27 | 'aarch64-pc-windows-msvc', 28 | 'aarch64-unknown-linux-gnu', 29 | 'aarch64-unknown-linux-musl', 30 | 'arm-unknown-linux-gnueabi', 31 | 'armv5te-unknown-linux-gnueabi', 32 | 'armv7-linux-androideabi', 33 | 'armv7-unknown-linux-gnueabi', 34 | 'armv7-unknown-linux-gnueabihf', 35 | 'i586-unknown-linux-gnu', 36 | 'i686-pc-windows-msvc', 37 | 'i686-unknown-freebsd', 38 | 'i686-unknown-linux-gnu', 39 | 'powerpc64le-unknown-linux-gnu', 40 | 'riscv64gc-unknown-linux-gnu', 41 | 's390x-unknown-linux-gnu', 42 | 'sparcv9-sun-solaris', 43 | 'thumbv7neon-unknown-linux-gnueabihf', 44 | 'x86_64-apple-darwin', 45 | 'x86_64-apple-ios', 46 | 'x86_64-pc-windows-msvc', 47 | 'x86_64-unknown-freebsd', 48 | 'x86_64-unknown-illumos', 49 | 'x86_64-unknown-linux-gnu', 50 | 'x86_64-unknown-linux-musl' 51 | ] 52 | 53 | const expectedNodeTargets = [ 54 | 'x86_64-apple-darwin', 55 | 'x86_64-pc-windows-msvc', 56 | 'i686-pc-windows-msvc', 57 | 'x86_64-unknown-linux-gnu', 58 | 'x86_64-unknown-linux-musl', 59 | 'aarch64-apple-darwin', 60 | 'aarch64-unknown-linux-gnu', 61 | 'armv7-unknown-linux-gnueabihf', 62 | 'aarch64-linux-android', 63 | 'armv7-linux-androideabi', 64 | 'aarch64-unknown-linux-musl', 65 | 'aarch64-pc-windows-msvc', 66 | 'freebsd-x64' 67 | ] 68 | 69 | let foundJavaJar = false 70 | 71 | void (await Promise.all( 72 | artifacts.map(async artifact => { 73 | if (artifact.startsWith('native-')) { 74 | const target = artifact.slice(7) 75 | 76 | expectedNativeTargets.splice(expectedNativeTargets.indexOf(target), 1) 77 | 78 | await execute(`zip ../decancer-${target}.zip ./${artifact}/*`, { 79 | cwd: ARTIFACTS_DIR, 80 | stdio: 'inherit' 81 | }) 82 | } else if (artifact.startsWith('node-')) { 83 | expectedNodeTargets.splice( 84 | expectedNodeTargets.indexOf(artifact.slice(5)), 85 | 1 86 | ) 87 | 88 | const artifactsDir = join( 89 | join(NODE_DIR, 'artifacts'), 90 | artifact.replace(/^node-/, 'bindings-') 91 | ) 92 | const originDir = join(ARTIFACTS_DIR, artifact) 93 | 94 | const [[nodeBinary]] = await Promise.all([ 95 | readdir(originDir), 96 | mkdir(artifactsDir) 97 | ]) 98 | 99 | await rename(join(originDir, nodeBinary), join(artifactsDir, nodeBinary)) 100 | } else if (artifact === 'java-jar') { 101 | await rename( 102 | join(ARTIFACTS_DIR, artifact, 'decancer.jar'), 103 | join(ROOT_DIR, 'decancer.jar') 104 | ) 105 | 106 | foundJavaJar = true 107 | } 108 | }) 109 | )) 110 | 111 | if (expectedNodeTargets.length !== 0) { 112 | console.error('error: found missing targets. exiting.') 113 | process.exit(1) 114 | } 115 | -------------------------------------------------------------------------------- /scripts/ci_native_artifacts.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdir, rename } from 'node:fs/promises' 6 | import { dirname, join } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const TARGET = process.argv[2] 10 | const IS_JAVA = process.argv.slice(3).some(argv => argv === '--java') 11 | 12 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 13 | const TARGET_DIR = join( 14 | ROOT_DIR, 15 | 'bindings', 16 | IS_JAVA ? 'java' : 'native', 17 | TARGET, 18 | 'release' 19 | ) 20 | 21 | const artifacts = await readdir(TARGET_DIR) 22 | const promises = [] 23 | 24 | for (let artifact of artifacts) { 25 | try { 26 | const ext = artifact.match(/\.\w+$/)[0].slice(1) 27 | 28 | if ( 29 | (!IS_JAVA && ext === 'lib') || 30 | ext === 'dll' || 31 | ext === 'so' || 32 | ext === 'dylib' 33 | ) { 34 | const outputArtifact = IS_JAVA 35 | ? artifact.replace('decancer', `decancer-${TARGET}`) 36 | : artifact 37 | 38 | promises.push( 39 | rename( 40 | join(TARGET_DIR, artifact), 41 | join(ROOT_DIR, 'artifacts', outputArtifact) 42 | ) 43 | ) 44 | } 45 | } catch { 46 | continue 47 | } 48 | } 49 | 50 | if (promises.length === 0) { 51 | console.error('error: target directory is empty') 52 | process.exit(1) 53 | } 54 | 55 | void (await Promise.all(promises)) 56 | -------------------------------------------------------------------------------- /scripts/ci_readme.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readFileSync, writeFileSync } from 'node:fs' 6 | import { dirname, join } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 10 | 11 | const modifiedWarning = 12 | '\n\n' 13 | const preprocessedLines = [] 14 | const inputDefinitions = process.argv.slice(3) 15 | let currentDefinition = null 16 | 17 | for (const line of readFileSync(join(ROOT_DIR, 'README.md')) 18 | .toString() 19 | .trim() 20 | .split(/\r?\n/g)) { 21 | if (line.startsWith('')) { 22 | for (let instruction of line 23 | .slice(6, -5) 24 | .trim() 25 | .split(/\s*\,\s*/)) { 26 | instruction = instruction.trim() 27 | 28 | if (instruction === 'end') { 29 | currentDefinition = null 30 | } else if (instruction.startsWith('begin')) { 31 | currentDefinition = instruction.replace(/^begin\s*/, '') 32 | } 33 | } 34 | } else if ( 35 | currentDefinition === null || 36 | inputDefinitions.includes(currentDefinition) 37 | ) { 38 | preprocessedLines.push(line) 39 | } 40 | } 41 | 42 | writeFileSync( 43 | process.argv[2], 44 | modifiedWarning + preprocessedLines.join('\n').replaceAll(/\n{3,}/g, '\n') 45 | ) 46 | -------------------------------------------------------------------------------- /scripts/ci_setup.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { appendFileSync } from 'node:fs' 6 | import { EOL } from 'node:os' 7 | 8 | const response = await fetch( 9 | 'https://api.github.com/repos/null8626/decancer/compare/HEAD~1...HEAD', 10 | { 11 | headers: { 12 | Authorization: `Bearer ${process.env.GITHUB_TOKEN}`, 13 | 'Content-Type': 'application/json' 14 | } 15 | } 16 | ) 17 | 18 | const { files } = await response.json() 19 | 20 | const coreAffected = files.some( 21 | ({ filename }) => 22 | filename.startsWith('core/src/') || filename === 'core/bin/codepoints.bin' 23 | ) 24 | 25 | appendFileSync( 26 | process.env.GITHUB_OUTPUT, 27 | Object.entries({ 28 | release: /^\d+\.\d+\.\d+$/.test(process.env.COMMIT_MESSAGE) 29 | ? process.env.COMMIT_MESSAGE 30 | : 'null', 31 | core_affected: coreAffected, 32 | node_affected: files.some( 33 | ({ filename }) => 34 | filename.startsWith('bindings/node/src') || 35 | filename === 'bindings/node/test.cjs' 36 | ), 37 | wasm_affected: 38 | coreAffected || 39 | files.some(({ filename }) => filename.startsWith('bindings/wasm/')), 40 | native_affected: files.some( 41 | ({ filename }) => 42 | filename.startsWith('bindings/native/src') || 43 | filename === 'bindings/native/decancer.h' 44 | ), 45 | native_docs_affected: files.some( 46 | ({ filename }) => 47 | filename.startsWith('bindings/native/docs') || 48 | filename === 'bindings/native/decancer.h' 49 | ), 50 | java_affected: 51 | coreAffected || 52 | files.some(({ filename }) => filename.startsWith('bindings/java/src')) 53 | }).reduce((a, [k, v]) => `${a}${k}=${v}${EOL}`, '') 54 | ) 55 | -------------------------------------------------------------------------------- /scripts/ci_setup_pages.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readFile, writeFile, readdir, stat, rm } from 'node:fs/promises' 6 | import { dirname, join, sep } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 10 | const MINIFIED_JS = join(ROOT_DIR, 'bindings', 'wasm', 'bin', 'decancer.min.js') 11 | const EXCLUDED = [ 12 | 'wasm_example.html', 13 | ['bindings', 'wasm', 'bin'], 14 | ['native_docs'], 15 | ['scripts'], 16 | ['.git'] 17 | ] 18 | 19 | const editedMinifiedJsContents = (await readFile(MINIFIED_JS)) 20 | .toString() 21 | .replace( 22 | /https\:\/\/cdn\.jsdelivr\.net\/gh\/null8626\/decancer@v[\d\.]+\/bindings\/wasm\/bin\/decancer\.wasm/, 23 | 'https://null8626.github.io/decancer/bindings/wasm/bin/decancer.wasm' 24 | ) 25 | await writeFile(MINIFIED_JS, editedMinifiedJsContents) 26 | 27 | function lookInside(fullPath) { 28 | for (const ne of EXCLUDED) { 29 | if (typeof ne === 'string') { 30 | if (fullPath === join(ROOT_DIR, ne)) { 31 | return false 32 | } 33 | } else { 34 | const pathSplitLength = fullPath 35 | .replace(ROOT_DIR + sep, '') 36 | .split(sep).length 37 | 38 | if (pathSplitLength < ne.length) { 39 | const joined = [ROOT_DIR] 40 | let matched = -1 41 | 42 | for (let i = 0; i < ne.length; i++) { 43 | joined.push(ne[i]) 44 | 45 | if (fullPath.startsWith(join(...joined))) { 46 | matched = i 47 | } 48 | } 49 | 50 | return matched === pathSplitLength - 1 51 | } 52 | } 53 | } 54 | 55 | return false 56 | } 57 | 58 | function isExcluded(fullPath) { 59 | for (const ne of EXCLUDED) { 60 | if (typeof ne === 'string') { 61 | if (fullPath === join(ROOT_DIR, ne)) { 62 | return false 63 | } 64 | } else if (fullPath === join(ROOT_DIR, ...ne)) { 65 | return false 66 | } 67 | } 68 | 69 | return true 70 | } 71 | 72 | async function resolveDirectory(directoryName) { 73 | const files = await readdir(directoryName) 74 | 75 | void (await Promise.all( 76 | files 77 | .map(path => join(directoryName, path)) 78 | .map(async path => { 79 | const fstat = await stat(path) 80 | const isDirectory = fstat.isDirectory() 81 | 82 | if (isDirectory && lookInside(path)) { 83 | return await resolveDirectory(path) 84 | } else if (isExcluded(path)) { 85 | await rm(path, { recursive: isDirectory, force: true }) 86 | } 87 | }) 88 | )) 89 | } 90 | 91 | void (await resolveDirectory(ROOT_DIR)) 92 | -------------------------------------------------------------------------------- /scripts/ci_validate_java_artifacts.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdir } from 'node:fs/promises' 6 | import { dirname, join } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 10 | 11 | const expectedJavaTargets = [ 12 | 'aarch64-apple-darwin', 13 | 'aarch64-pc-windows-msvc', 14 | 'aarch64-unknown-linux-gnu', 15 | 'aarch64-unknown-linux-musl', 16 | 'arm-unknown-linux-gnueabi', 17 | 'armv5te-unknown-linux-gnueabi', 18 | 'armv7-unknown-linux-gnueabi', 19 | 'armv7-unknown-linux-gnueabihf', 20 | 'i686-pc-windows-msvc', 21 | 'i686-unknown-freebsd', 22 | 'i686-unknown-linux-gnu', 23 | 'riscv64gc-unknown-linux-gnu', 24 | 'x86_64-apple-darwin', 25 | 'x86_64-pc-windows-msvc', 26 | 'x86_64-unknown-freebsd', 27 | 'x86_64-unknown-linux-gnu', 28 | 'x86_64-unknown-linux-musl' 29 | ] 30 | 31 | const artifacts = await readdir(join(ROOT_DIR, 'java-artifacts')) 32 | 33 | for (const artifact of artifacts) { 34 | expectedJavaTargets.splice(expectedJavaTargets.indexOf(artifact.slice(5)), 1) 35 | } 36 | 37 | if (expectedJavaTargets.length !== 0) { 38 | console.error('error: found missing targets. exiting.') 39 | process.exit(1) 40 | } 41 | -------------------------------------------------------------------------------- /scripts/pretty.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readFile, writeFile } from 'node:fs/promises' 6 | import { exec, execSync } from 'node:child_process' 7 | import { existsSync, readFileSync } from 'node:fs' 8 | import { dirname, join } from 'node:path' 9 | import { fileURLToPath } from 'node:url' 10 | import { promisify } from 'node:util' 11 | import { deserialize } from 'node:v8' 12 | import { options } from './util.mjs' 13 | 14 | const CODEPOINT_MASK = 0xfffff 15 | // 0..=9 | 14..=31 | 127 | 0xd800..=0xf8ff | 0xe01f0..=0x10ffff 16 | const NONE_CODEPOINTS_COUNT = 10 + 18 + 1 + 8448 + 196112 17 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 18 | const CORE_DIR = join(ROOT_DIR, 'core') 19 | const BINDINGS_DIR = join(ROOT_DIR, 'bindings') 20 | const SETUP_OUTPUTS = process.env.DECANCER_SETUP_OUTPUTS 21 | ? JSON.parse(process.env.DECANCER_SETUP_OUTPUTS) 22 | : null 23 | const OPTIONS = options(process.argv.slice(2)) 24 | 25 | function isAffected(value) { 26 | return SETUP_OUTPUTS !== null 27 | ? SETUP_OUTPUTS.release !== 'null' || 28 | SETUP_OUTPUTS[`${value}_affected`] === 'true' 29 | : true 30 | } 31 | 32 | if (!existsSync(join(ROOT_DIR, '.cache.bin'))) { 33 | execSync(`node ${join(ROOT_DIR, 'scripts', 'update_unicode.mjs')}`, { 34 | stdio: 'inherit' 35 | }) 36 | } 37 | 38 | const { alreadyHandledCount } = deserialize( 39 | readFileSync(join(ROOT_DIR, '.cache.bin')) 40 | ) 41 | 42 | const execute = promisify(exec) 43 | 44 | async function updateReadme() { 45 | if (isAffected('core')) { 46 | console.log('- [readme] reading codepoints.bin...') 47 | 48 | const bin = await readFile(join(CORE_DIR, 'bin', 'codepoints.bin')) 49 | 50 | console.log('- [readme] parsing codepoints.bin...') 51 | 52 | let codepointsCount = NONE_CODEPOINTS_COUNT + alreadyHandledCount 53 | 54 | const codepointsEnd = bin.readUint16LE() 55 | const caseSensitiveCodepointsEnd = bin.readUint16LE(2) 56 | const caseSensitiveCodepoints = [] 57 | let offset = codepointsEnd 58 | 59 | for (; offset < caseSensitiveCodepointsEnd; offset += 6) { 60 | const integer = bin.readUint32LE(offset) 61 | 62 | const codepoint = integer & CODEPOINT_MASK 63 | let toAdd = 1 64 | 65 | caseSensitiveCodepoints.push(codepoint) 66 | 67 | const rangeSize = bin.readUint8(offset + 4) & 0x7f 68 | 69 | caseSensitiveCodepoints.push( 70 | ...Array.from({ length: rangeSize }, (_, i) => codepoint + 1 + i) 71 | ) 72 | toAdd += rangeSize 73 | 74 | codepointsCount += toAdd 75 | } 76 | 77 | for (offset = 6; offset < codepointsEnd; offset += 6) { 78 | const integer = bin.readUint32LE(offset) 79 | 80 | const codepoint = integer & CODEPOINT_MASK 81 | let toAdd = 1 + (bin.readUint8(offset + 4) & 0x7f) 82 | 83 | const uppercasedCodepoint = String.fromCodePoint(codepoint) 84 | .toUpperCase() 85 | .codePointAt() 86 | 87 | if ( 88 | uppercasedCodepoint !== codepoint && 89 | !caseSensitiveCodepoints.includes(uppercasedCodepoint) 90 | ) { 91 | toAdd *= 2 92 | } 93 | 94 | codepointsCount += toAdd 95 | } 96 | 97 | console.log('- [readme] reading README.md...') 98 | 99 | const readme = await readFile(join(CORE_DIR, 'README.md')) 100 | 101 | await writeFile( 102 | join(CORE_DIR, 'README.md'), 103 | readme 104 | .toString() 105 | .trim() 106 | .replace( 107 | /\*\*[\d,\.]+ \(\d+[\.\,]\d{2}%\) different unicode codepoints\*\*/, 108 | `**${codepointsCount.toLocaleString('en-US')} (${( 109 | (codepointsCount / 0x10ffff) * 110 | 100 111 | ).toFixed(2)}%) different unicode codepoints**` 112 | ) 113 | ) 114 | 115 | console.log('- [readme] updated') 116 | } 117 | } 118 | 119 | async function prettier() { 120 | const extensions = ['css', 'js', 'ts', 'mjs', 'cjs', 'json'] 121 | 122 | await execute('npm i -g prettier') 123 | 124 | if (isAffected('java')) { 125 | await execute('npm i prettier-plugin-java --save-dev') 126 | 127 | extensions.push('java') 128 | } 129 | 130 | await execute(`npx prettier **/*.{${extensions.join(',')}} --write`, { 131 | cwd: ROOT_DIR 132 | }) 133 | 134 | await execute('git restore yarn.lock', { cwd: ROOT_DIR }) 135 | 136 | console.log('- [prettier] completed prettifying files') 137 | } 138 | 139 | async function cargo(cwd, ty) { 140 | if (isAffected(ty)) { 141 | console.log(`- [cargo -> ${cwd}] running clippy and rustfmt...`) 142 | 143 | await execute('cargo fmt', { cwd }) 144 | 145 | console.log(`- [cargo -> ${cwd}] completed`) 146 | } 147 | } 148 | 149 | async function clangFormat() { 150 | if (isAffected('native')) { 151 | const clangFormatExecutable = OPTIONS['clang-format'] ?? 'clang-format' 152 | 153 | console.log(`- [${clangFormatExecutable}] running...`) 154 | 155 | await execute(`${clangFormatExecutable} -i decancer.h`, { 156 | cwd: join(BINDINGS_DIR, 'native') 157 | }) 158 | 159 | console.log(`- [${clangFormatExecutable}] completed`) 160 | } 161 | } 162 | 163 | void (await Promise.all([ 164 | cargo(join(CORE_DIR), 'core'), 165 | cargo(join(BINDINGS_DIR, 'java'), 'java'), 166 | cargo(join(BINDINGS_DIR, 'node'), 'node'), 167 | cargo(join(BINDINGS_DIR, 'wasm'), 'wasm'), 168 | cargo(join(BINDINGS_DIR, 'native'), 'native'), 169 | clangFormat(), 170 | prettier(), 171 | updateReadme() 172 | ])) 173 | -------------------------------------------------------------------------------- /scripts/read.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readFileSync, writeFileSync } from 'node:fs' 6 | import { dirname, join } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const CODEPOINT_MASK = 0xfffff 10 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 11 | const STRING_TRANSLATION_MASK = 0x10000000 12 | 13 | class Codepoints { 14 | #inner 15 | 16 | constructor() { 17 | this.#inner = [] 18 | } 19 | 20 | push(input) { 21 | if (input.translation === '\0') { 22 | input.translation = '' 23 | } 24 | 25 | const ogTranslationCode = input.syncedTranslation 26 | ? input.translation.charCodeAt() 27 | : input.translation 28 | 29 | for (let i = 0; i <= input.rangeSize; i++) 30 | this.#inner.push({ 31 | codepoint: c, 32 | translation: 33 | typeof ogTranslationCode === 'number' 34 | ? String.fromCharCode(ogTranslationCode + input.codepoint + i) 35 | : ogTranslationCode 36 | }) 37 | } 38 | 39 | get inner() { 40 | return this.#inner 41 | } 42 | } 43 | 44 | const binary = readFileSync(join(ROOT_DIR, 'core', 'bin', 'codepoints.bin')) 45 | 46 | const similar = [] 47 | let currentSimilar = [] 48 | 49 | let offset = binary.readUint16LE(2) 50 | const offsetEnd = binary.readUint16LE(4) 51 | 52 | do { 53 | const current = binary.readUint8(offset) 54 | 55 | if (current >= 0x80) { 56 | similar.push([...currentSimilar, String.fromCharCode(current & 0x7f)]) 57 | currentSimilar = [] 58 | } else { 59 | currentSimilar.push(String.fromCharCode(current)) 60 | } 61 | 62 | offset++ 63 | } while (offset < offsetEnd) 64 | 65 | function getTranslation(integer, secondByte) { 66 | const offset = 67 | binary.readUint16LE(4) + ((((integer >> 20) & 0x07) << 8) | secondByte) 68 | 69 | return binary.subarray(offset, offset + ((integer >> 23) & 0x1f)).toString() 70 | } 71 | 72 | let codepointsEnd = binary.readUint16LE() 73 | let codepoints = new Codepoints() 74 | 75 | for (let offset = 6; offset < codepointsEnd; offset += 6) { 76 | const integer = binary.readUint32LE(offset) 77 | const secondByte = binary.readUint8(offset + 4) 78 | 79 | const codepoint = integer & CODEPOINT_MASK 80 | 81 | codepoints.push({ 82 | codepoint, 83 | translation: 84 | integer >= STRING_TRANSLATION_MASK 85 | ? getTranslation(integer, secondByte) 86 | : String.fromCharCode((integer >> 20) & 0x7f), 87 | rangeSize: secondByte & 0x7f, 88 | syncedTranslation: secondByte >= 0x80 89 | }) 90 | } 91 | 92 | codepointsEnd = binary.readUint16LE(2) 93 | 94 | for (let offset = binary.readUint16LE(); offset < codepointsEnd; offset += 6) { 95 | const integer = binary.readUint32LE(offset) 96 | const secondByte = binary.readUint8(offset + 4) 97 | 98 | const codepoint = integer & CODEPOINT_MASK 99 | 100 | codepoints.push({ 101 | codepoint, 102 | translation: 103 | integer >= STRING_TRANSLATION_MASK 104 | ? getTranslation(integer, secondByte) 105 | : String.fromCharCode((integer >> 20) & 0x7f), 106 | rangeSize: secondByte & 0x7f, 107 | syncedTranslation: secondByte >= 0x80 108 | }) 109 | } 110 | 111 | if (process.argv[2]?.endsWith('.txt')) { 112 | const translationMap = {} 113 | 114 | for (const { codepoint, translation } of codepoints.inner) { 115 | if (translationMap[translation]) { 116 | translationMap[translation].push(codepoint) 117 | } else { 118 | translationMap[translation] = [codepoint] 119 | } 120 | } 121 | 122 | writeFileSync( 123 | process.argv[2], 124 | Object.entries(translationMap) 125 | .map( 126 | ([translation, codepoints]) => 127 | `${translation}:\n${codepoints.map(c => String.fromCodePoint(c)).join('')}` 128 | ) 129 | .join('\n\n') 130 | ) 131 | } else { 132 | writeFileSync( 133 | process.argv[2]?.endsWith('.json') ? process.argv[2] : 'output.json', 134 | JSON.stringify( 135 | { 136 | codepoints: codepoints.inner.sort((a, b) => a.codepoint - b.codepoint), 137 | similar 138 | }, 139 | null, 140 | 2 141 | ) 142 | ) 143 | } 144 | -------------------------------------------------------------------------------- /scripts/util.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | const SETUP_OUTPUTS = process.env.DECANCER_SETUP_OUTPUTS 6 | ? JSON.parse(process.env.DECANCER_SETUP_OUTPUTS) 7 | : null 8 | 9 | export function isAffected(value) { 10 | return SETUP_OUTPUTS !== null 11 | ? SETUP_OUTPUTS.release !== 'null' || 12 | SETUP_OUTPUTS[`${value}_affected`] === 'true' 13 | : true 14 | } 15 | 16 | export function options(argv) { 17 | return Object.fromEntries( 18 | argv.map(x => x.match(/^\-\-([\w\-]+)=(.*)/)?.slice(1)).filter(x => x) 19 | ) 20 | } 21 | 22 | export async function request(url) { 23 | console.log(`- requesting to ${url}...`) 24 | 25 | const req = await fetch(url) 26 | const text = await req.text() 27 | 28 | console.log(`- parsing data returned from ${url}...`) 29 | return text 30 | } 31 | 32 | export function containsInclusive(value, rangeStart, rangeEnd) { 33 | return value >= rangeStart && value <= rangeEnd 34 | } 35 | 36 | export function strongAssert(expr, ...rest) { 37 | if (!expr) { 38 | console.error('- fatal error:', ...rest) 39 | process.exit(1) 40 | } 41 | } 42 | 43 | export function isCaseSensitive(x) { 44 | return String.fromCodePoint(x).toLowerCase().codePointAt() !== x 45 | } 46 | 47 | function merge(a, b, recurse = true) { 48 | if (a.includes(b)) { 49 | return a 50 | } else if (b.includes(a)) { 51 | return b 52 | } 53 | 54 | const minimumLength = Math.min(a.length, b.length) 55 | let maxLimit 56 | 57 | for (let limit = 1; limit <= minimumLength; limit++) { 58 | if (a.slice(0, limit) === b.slice(-limit)) { 59 | maxLimit = limit 60 | } 61 | } 62 | 63 | if (maxLimit === undefined) { 64 | if (recurse) { 65 | return merge(b, a, false) 66 | } 67 | } else { 68 | return b.slice(0, -maxLimit) + a 69 | } 70 | } 71 | 72 | export function mergeArray(arr, recurse = true) { 73 | const mergedSections = [] 74 | 75 | while (true) { 76 | let index = 0 77 | 78 | for (; index < arr.length; index++) { 79 | if (arr[index] !== undefined) { 80 | break 81 | } 82 | } 83 | 84 | if (index === arr.length) { 85 | break 86 | } 87 | 88 | let section = arr[index] 89 | arr[index] = undefined 90 | 91 | for (index++; index < arr.length; index++) { 92 | if (arr[index] === undefined) { 93 | continue 94 | } 95 | 96 | const newSection = merge(section, arr[index]) 97 | 98 | if (newSection) { 99 | section = newSection 100 | arr[index] = undefined 101 | } 102 | } 103 | 104 | mergedSections.push(section) 105 | } 106 | 107 | if (recurse) { 108 | return mergeArray(mergedSections, false) 109 | } else { 110 | return mergedSections.reduce((a, b) => a + b, '') 111 | } 112 | } 113 | 114 | export function removeFromSet(array, set) { 115 | for (const part of set) array.splice(array.indexOf(part), 1) 116 | 117 | return array 118 | } 119 | 120 | export function binarySearchExists(arr, val) { 121 | let start = 0 122 | let end = arr.length - 1 123 | 124 | while (start <= end) { 125 | const mid = Math.floor((start + end) / 2) 126 | 127 | if (arr[mid] === val) { 128 | return true 129 | } else if (val < arr[mid]) { 130 | end = mid - 1 131 | } else { 132 | start = mid + 1 133 | } 134 | } 135 | 136 | return false 137 | } 138 | 139 | const RETURNS_ITSELF = x => x 140 | 141 | export class SortedSet { 142 | #mapFn 143 | #array 144 | 145 | constructor(mapFn = RETURNS_ITSELF) { 146 | this.#mapFn = mapFn 147 | this.#array = [] 148 | } 149 | 150 | push(val) { 151 | const cmpVal = this.#mapFn(val) 152 | 153 | let start = 0 154 | let end = this.#array.length - 1 155 | 156 | while (start <= end) { 157 | const mid = Math.floor((start + end) / 2) 158 | const other = this.#mapFn(this.#array[mid]) 159 | 160 | if (other === cmpVal) { 161 | return 162 | } else if (cmpVal < other) { 163 | end = mid - 1 164 | } else { 165 | start = mid + 1 166 | } 167 | } 168 | 169 | this.#array.splice(start, 0, val) 170 | } 171 | 172 | get array() { 173 | return this.#array 174 | } 175 | 176 | get length() { 177 | return this.#array.length 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /scripts/version.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | 3 | 'use strict' 4 | 5 | import { readdir, readFile, writeFile } from 'node:fs/promises' 6 | import { dirname, join } from 'node:path' 7 | import { fileURLToPath } from 'node:url' 8 | 9 | const ROOT_DIR = join(dirname(fileURLToPath(import.meta.url)), '..') 10 | const CORE_DIR = join(ROOT_DIR, 'core') 11 | const NODE_DIR = join(ROOT_DIR, 'bindings', 'node') 12 | const JAVA_SOURCES_DIR = join( 13 | ROOT_DIR, 14 | 'bindings', 15 | 'java', 16 | 'src', 17 | 'main', 18 | 'java', 19 | 'io', 20 | 'github', 21 | 'null8626', 22 | 'decancer' 23 | ) 24 | const JRELEASER_VERSION = '1.12.0' 25 | 26 | async function update(filename, callback) { 27 | await writeFile(filename, callback(await readFile(filename, 'utf-8'))) 28 | console.log(`- updated file: ${filename}`) 29 | } 30 | 31 | function updateJsonFunc(str) { 32 | const json = JSON.parse(str) 33 | json.version = process.argv[2] 34 | 35 | return JSON.stringify(json, null, 2) 36 | } 37 | 38 | function updateGradleFunc(x) { 39 | return x 40 | .replace(JRELEASER_VERSION, '{JRELEASER_VERSION}') 41 | .replace(/'\d+\.\d+\.\d+'/g, `'${process.argv[2]}'`) 42 | .replace(/\/v\d+\.\d+\.\d+\//, `/v${process.argv[2]}/`) 43 | .replace('{JRELEASER_VERSION}', JRELEASER_VERSION) 44 | } 45 | 46 | const updateTomlFunc = x => 47 | x.replace(/version = "\d+\.\d+\.\d+"/, `version = "${process.argv[2]}"`) 48 | const directUpdateFunc = x => x.replace(/\d\.\d\.\d/g, process.argv[2]) 49 | 50 | const updateDocStringVersionFunc = x => 51 | x.replace(/@version \d\.\d\.\d/, `@version ${process.argv[2]}`) 52 | 53 | function updateNativeHeaderFunc(x) { 54 | const versionHex = `0x${process.argv[2] 55 | .split('.') 56 | .map(x => x.padStart(2, '0')) 57 | .join('')}` 58 | 59 | return updateDocStringVersionFunc( 60 | x 61 | .replace( 62 | /#define DECANCER_VERSION 0x[a-fA-F0-9]{6}/, 63 | `#define DECANCER_VERSION ${versionHex}` 64 | ) 65 | .replace( 66 | /@date \d{4}\-\d{2}\-\d{2}/, 67 | `@date ${new Date().toISOString().replace(/T[\d\:\.]+Z$/, '')}` 68 | ) 69 | ) 70 | } 71 | 72 | void (await Promise.all([ 73 | update(join(CORE_DIR, 'Cargo.toml'), updateTomlFunc), 74 | update(join(NODE_DIR, 'Cargo.toml'), updateTomlFunc), 75 | update(join(ROOT_DIR, 'bindings', 'wasm', 'Cargo.toml'), updateTomlFunc), 76 | update(join(ROOT_DIR, 'bindings', 'native', 'Cargo.toml'), updateTomlFunc), 77 | update(join(NODE_DIR, 'package.json'), updateJsonFunc), 78 | update( 79 | join(ROOT_DIR, 'bindings', 'wasm', 'bin', 'decancer.min.js'), 80 | directUpdateFunc 81 | ), 82 | update(join(ROOT_DIR, 'bindings', 'wasm', 'example.html'), directUpdateFunc), 83 | update(join(ROOT_DIR, 'README.md'), directUpdateFunc), 84 | update(join(CORE_DIR, 'README.md'), directUpdateFunc), 85 | update( 86 | join(ROOT_DIR, 'bindings', 'native', 'decancer.h'), 87 | updateNativeHeaderFunc 88 | ), 89 | update( 90 | join(ROOT_DIR, 'bindings', 'native', 'docs', 'Doxyfile'), 91 | directUpdateFunc 92 | ), 93 | update(join(ROOT_DIR, 'bindings', 'node', 'README.md'), directUpdateFunc), 94 | update(join(CORE_DIR, 'README.md'), directUpdateFunc), 95 | update(join(CORE_DIR, 'src', 'lib.rs'), directUpdateFunc), 96 | update(join(ROOT_DIR, 'bindings', 'java', 'build.gradle'), updateGradleFunc), 97 | new Promise(resolve => { 98 | readdir(join(NODE_DIR, 'npm')).then(files => { 99 | Promise.all( 100 | files.map(file => 101 | update(join(NODE_DIR, 'npm', file, 'package.json'), updateJsonFunc) 102 | ) 103 | ).then(resolve) 104 | }) 105 | }), 106 | new Promise(resolve => { 107 | readdir(JAVA_SOURCES_DIR).then(files => { 108 | Promise.all( 109 | files.map(file => 110 | update(join(JAVA_SOURCES_DIR, file), updateDocStringVersionFunc) 111 | ) 112 | ).then(resolve) 113 | }) 114 | }) 115 | ])) 116 | -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- 1 | # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. 2 | # yarn lockfile v1 3 | 4 | 5 | "@napi-rs/cli@^2.10.0": 6 | version "2.13.3" 7 | resolved "https://registry.yarnpkg.com/@napi-rs/cli/-/cli-2.13.3.tgz#ac4de8c551f190a646d0e1bd86ffbfaac08adef3" 8 | integrity sha512-nAlbKuakQ+YHZE+M3Afih9UA1jr+gx63Gt4xHA+j2xD1NY6TjQ+QCgF9Yaj/YZIkCc2t3CZh52znFrfbU8b2bA== 9 | 10 | "@types/node@^18.0.0": 11 | version "18.11.17" 12 | resolved "https://registry.yarnpkg.com/@types/node/-/node-18.11.17.tgz#5c009e1d9c38f4a2a9d45c0b0c493fe6cdb4bcb5" 13 | integrity sha512-HJSUJmni4BeDHhfzn6nF0sVmd1SMezP7/4F0Lq+aXzmp2xm9O7WXrUtHW/CHlYVtZUbByEvWidHqRtcJXGF2Ng== 14 | 15 | typescript@^4.7.4: 16 | version "4.9.4" 17 | resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.9.4.tgz#a2a3d2756c079abda241d75f149df9d561091e78" 18 | integrity sha512-Uz+dTXYzxXXbsFpM86Wh3dKCxrQqUcVMxwU54orwlJjOpO3ao8L7j5lH+dWfTwgCwIuM9GQ2kvVotzYJMXTBZg== --------------------------------------------------------------------------------