├── tests ├── test_fixtures │ ├── README.md │ ├── rot13_base64_hex_with_newline │ └── base64_3_times_with_no_new_line └── integration_test.rs ├── src ├── decoders │ ├── README.md │ ├── reverse_decoder.rs │ ├── interface.rs │ ├── braille_decoder.rs │ ├── atbash_decoder.rs │ └── url_decoder.rs ├── cli_input_parser │ ├── README.md │ └── mod.rs ├── config │ └── readme.md ├── filtration_system │ └── README.md ├── cli_pretty_printing │ ├── README.md │ └── tests.rs ├── storage │ ├── README.md │ ├── invisible_chars │ │ └── chars.txt │ ├── wait_athena_storage.rs │ └── mod.rs ├── searchers │ ├── README.md │ ├── search_node.rs │ ├── mod.rs │ └── bfs.rs ├── main.rs ├── checkers │ ├── checker_result.rs │ ├── default_checker.rs │ ├── regex_checker.rs │ ├── human_checker.rs │ ├── password.rs │ ├── checker_type.rs │ ├── lemmeknow_checker.rs │ └── wordlist.rs ├── api_library_input_struct.rs └── timer │ └── mod.rs ├── .github ├── ISSUE_TEMPLATE │ ├── proposal--large-feature--large-idea-.md │ ├── technical-debt.md │ ├── feature_request.md │ └── bug_report.md ├── dependabot.yml ├── workflows │ ├── dependabot-auto-merge.yml │ ├── codespell.yml │ ├── stalePRS.yml │ └── quickstart.yml ├── FUNDING.yml ├── release-drafter.yml └── build.yml ├── .config └── nextest.toml ├── .gitignore ├── docs ├── package-managers.md ├── changes │ ├── 2024-07-10-remove-cipher-mapping.md │ ├── 2024-07-10-astar-refactor.md │ ├── 2024-03-21-add-vigenere-decoder.md │ ├── 2024-07-10-remove-decoder-popularity.md │ ├── 2024-07-10-improve-string-pruning.md │ ├── 2024-07-01-wordlist-checker.md │ ├── 2024-07-02-sensitivity-trait.md │ ├── 2024-07-10-wait-athena-checker.md │ ├── 2024-03-11-fix-duplicate-human-prompts.md │ └── 2024-07-10-astar-simplified-heuristic-rewrite.md ├── storage.md ├── sensitivity.md ├── README.md ├── invisible_characters.md ├── parallel_astar_implementation_clarifications.md ├── astar_decoder_specific_nodes.md ├── database_implementation.md ├── first_run_implementation_plan.md ├── parallel_astar_search.md └── parallelization.md ├── justfile ├── Dockerfile ├── images ├── README.md ├── main_demo.cast ├── decoding.tape ├── better_demo.cast ├── first_run.tape ├── lemmeknow.cast └── main_demo.svg ├── benches ├── benchmark_checkers.rs ├── benchmark_crackers.rs ├── benchmark_whole_program.rs └── benchmark_decoders.rs ├── LICENSE └── Cargo.toml /tests/test_fixtures/README.md: -------------------------------------------------------------------------------- 1 | # Test Fixtures 2 | 3 | A bunch of files to help support testing <3 :) -------------------------------------------------------------------------------- /tests/test_fixtures/rot13_base64_hex_with_newline: -------------------------------------------------------------------------------- 1 | 52 33 56 32 5a 69 42 32 5a 69 42 75 49 47 64 79 5a 6d 63 68 2 | -------------------------------------------------------------------------------- /tests/test_fixtures/base64_3_times_with_no_new_line: -------------------------------------------------------------------------------- 1 | VkZoV2MyUkhiSGRpUjFWbldXMUdlbHBVV1RCSlIxWjFXVEk1YTJGWE5XNWpkejA5 -------------------------------------------------------------------------------- /src/decoders/README.md: -------------------------------------------------------------------------------- 1 | Please read [mod.rs](mod.rs) for the latest up to date documentation. 2 | 3 | The `interface.rs` defines what each decoder looks like. -------------------------------------------------------------------------------- /src/cli_input_parser/README.md: -------------------------------------------------------------------------------- 1 | # What is this? 2 | 3 | Our library takes a struct as an input. This module takes the CLI arguments and parses it into that struct. -------------------------------------------------------------------------------- /src/config/readme.md: -------------------------------------------------------------------------------- 1 | # Config 2 | 3 | The Config object is the configuration struct of our library API. 4 | The CLI arguments get parsed into a library config at runtime. 5 | -------------------------------------------------------------------------------- /src/filtration_system/README.md: -------------------------------------------------------------------------------- 1 | # This module serves 2 purposes: 2 | 1. Get all the nodes (crackers, decoders) 3 | 2. Apply filters on them and only return the ones that match them. -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/proposal--large-feature--large-idea-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Proposal (Large Feature, Large Idea) 3 | about: For a very large feature that could take weeks to implement 4 | title: '' 5 | labels: Proposal 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/technical-debt.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Technical Debt 3 | about: When you want to log technical debt 4 | title: "[TECHNICAL DEBT]" 5 | labels: Technical Debt 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Why? 11 | 12 | # How will this affect us? 13 | 14 | # What can we do to fix this in the future? 15 | -------------------------------------------------------------------------------- /src/cli_pretty_printing/README.md: -------------------------------------------------------------------------------- 1 | # What is this? 2 | 3 | When using the CLI, we want to print to the screen. 4 | 5 | We want to do somethings like if the answer is a pair of Latitude and Longitude coordinates we'll want to use plural. 6 | 7 | Or we'll want to print pretty tables, or other things! 8 | 9 | Thus, we need an entire module for this. -------------------------------------------------------------------------------- /.config/nextest.toml: -------------------------------------------------------------------------------- 1 | [test-groups] 2 | database = { max-threads = 1 } 3 | 4 | [[profile.default.overrides]] 5 | filter = 'test(perform_cracking)' 6 | test-group = 'database' 7 | 8 | [[profile.default.overrides]] 9 | filter = 'test(database::)' 10 | test-group = 'database' 11 | 12 | [[profile.default.overrides]] 13 | filter = 'package(ciphey) & test(cache)' 14 | test-group = 'database' 15 | -------------------------------------------------------------------------------- /src/storage/README.md: -------------------------------------------------------------------------------- 1 | # What is this? 2 | 3 | Storage is a module which deals with storing things. In the life time of ciphey, we'll want to: 4 | * Store word lists 5 | * Dictionaries 6 | * Crack results, keys, what the plaintext is 7 | * Other language dictionaries 8 | 9 | 10 | And more. Storage is a way to access this information, handling errors and caching them to speed up the program. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # These are backup files generated by rustfmt 6 | **/*.rs.bk 7 | 8 | /doc 9 | 10 | # Added by cargo 11 | 12 | /target 13 | 14 | doc/ 15 | 16 | # Added by cargo 17 | # 18 | # already existing elements were commented out 19 | 20 | #/target 21 | #Cargo.lock 22 | 23 | /.idea 24 | .aider* 25 | .cursor 26 | -------------------------------------------------------------------------------- /src/cli_input_parser/mod.rs: -------------------------------------------------------------------------------- 1 | /* 2 | When the user provides CLI input, we need to parse it for: 3 | - Text or file? 4 | - Verbose mode to level 5 | 6 | and so on. 7 | */ 8 | 9 | // build new library_input 10 | 11 | use crate::api_library_input_struct::LibraryInput; 12 | 13 | /// This creates a new LibraryInput struct and sets it to a default. 14 | /// added _ before name to let clippy know that they aren't used 15 | fn _main() { 16 | let _options = LibraryInput::default(); 17 | } 18 | -------------------------------------------------------------------------------- /docs/package-managers.md: -------------------------------------------------------------------------------- 1 | # Packing ciphey 2 | 3 | Please call the main ciphey program (the CLI) `ciphey_cli` and enable it to be called via `ciphey` in the terminal. 4 | 5 | This is because `ciphey` is a short name and is probably taken in a package manager already. 6 | 7 | ## Releases 8 | 9 | Please base your package on our releases and not our GitHub repo. If you must, please call the package `ciphey_cli_rolling` to ensure people understand that the package updates on a rolling basis (as our GitHub repo updates). -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "cargo" 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | build-all: 2 | cargo build 3 | docker build . 4 | 5 | test-all: 6 | cargo build 7 | cargo check 8 | cargo clippy 9 | cargo test 10 | 11 | fix-all: 12 | git add . 13 | git commit -m 'Clippy and fmt' 14 | cargo clippy --fix 15 | cargo fmt 16 | cargo nextest run 17 | git add . 18 | git commit -m 'Clippy and fmt' 19 | 20 | test: 21 | cargo nextest run 22 | 23 | publish: 24 | docker buildx build --platform linux/arm/v7,linux/amd64,linux/arm64/v8 -t autumnskerritt/ciphey:latest --push . 25 | -------------------------------------------------------------------------------- /.github/workflows/dependabot-auto-merge.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot Auto-merge 2 | on: pull_request 3 | 4 | permissions: 5 | contents: write 6 | pull-requests: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: ${{ github.actor == 'dependabot[bot]' }} 12 | steps: 13 | - name: Enable auto-merge for Dependabot PRs 14 | run: gh pr merge --auto --merge "$PR_URL" 15 | env: 16 | PR_URL: ${{github.event.pull_request.html_url}} 17 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:alpine as builder 2 | RUN apk add --no-cache build-base pkgconfig openssl-dev 3 | ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig 4 | ENV OPENSSL_DIR=/usr 5 | # Encourage some layer caching here rather then copying entire directory that includes docs to builder container ~CMN 6 | WORKDIR /app/ciphey 7 | COPY Cargo.toml Cargo.lock ./ 8 | COPY src/ src/ 9 | COPY benches/ benches/ 10 | RUN cargo build --release 11 | 12 | FROM alpine:3.12 13 | COPY --from=builder /app/ciphey/target/release/ciphey /usr/local/bin/ciphey 14 | ENTRYPOINT [ "/usr/local/bin/ciphey" ] 15 | -------------------------------------------------------------------------------- /src/searchers/README.md: -------------------------------------------------------------------------------- 1 | # What is a searcher? 2 | 3 | > How do you decide what decryptions to do next? 4 | 5 | We use a search algorithm for this. 6 | 7 | Click here: 8 | https://www.notion.so/b3cdc723444d4aafa30e8c1eb41e2cd9?v=81453058582641b2b744815c37643665 9 | 10 | And filter by "Search" to find all of our proposals which relate to searchers. For example, if you want to learn how the A* search algorithm was designed you can find a proposal for it which contains all of the theory and ideas. 11 | 12 | # Files 13 | `bfs.rs` is our simplest searcher, it's breadth first search! -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | on: 2 | - pull_request 3 | 4 | jobs: 5 | spellcheck: 6 | runs-on: ubuntu-latest 7 | steps: 8 | - name: Check out the repository 9 | - uses: actions/checkout@v2 10 | 11 | - name: Set up Python 12 | uses: actions/setup-python@v2 13 | with: 14 | python-version: 3.8 15 | 16 | - name: Install codespell with pip 17 | run: pip install codespell 18 | 19 | - name: Fix typos 20 | run: codespell ./ -w 21 | 22 | - name: Push changes 23 | uses: EndBug/add-and-commit@v7 -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: bee-san 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /images/README.md: -------------------------------------------------------------------------------- 1 | # Steps to make the gifs 2 | 3 | Install asciinema and svg-term-cli. 4 | 5 | Record with asciinema: 6 | 7 | asciinema rec demo.cast 8 | 9 | This records the session in the asciicast v2 plaintext file format (newline-delimited JSON with an initial header object followed by a timestamped event stream of stdin and stdout). 10 | 11 | Convert the .cast file to .svg with svg-term-cli: 12 | 13 | svg-term --in demo.cast --out demo.svg --window --width 80 --height 22 --no-optimize 14 | 15 | You probably want to play around with width and height 16 | window adds a fake OS window around the terminal session 17 | I found that no-optimize fixed some weird font rendering issues on my macOS – not sure why 18 | -------------------------------------------------------------------------------- /src/searchers/search_node.rs: -------------------------------------------------------------------------------- 1 | ///! This is the struct used to design what a search node looks like. 2 | ///! At each level, we have a node with some text, T. 3 | ///! And then the edges of that node are the decryption modules. 4 | 5 | /*struct Nodes { 6 | /// When we expand the node, we generate children node 7 | /// This is an vector of children. 8 | children: Vec>, 9 | /// Value is the text we are using 10 | value: V 11 | /// Edges so far enables us to know the decryption route 12 | /// Because decryptions are edges, we can write the route like: 13 | /// vec!["Base64", "Base32", "Rot13"] and so on indicating it 14 | /// started from base64, then base32, and finally rot13. 15 | edges_so_far: Vec<&str> 16 | } 17 | */ -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'feature' 7 | - 'enhancement' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'fix' 11 | - 'bugfix' 12 | - 'bug' 13 | - title: '🧰 Maintenance' 14 | label: 'chore' 15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 16 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 17 | version-resolver: 18 | major: 19 | labels: 20 | - 'major' 21 | minor: 22 | labels: 23 | - 'minor' 24 | patch: 25 | labels: 26 | - 'patch' 27 | default: patch 28 | template: | 29 | ## Changes 30 | 31 | $CHANGES 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /benches/benchmark_checkers.rs: -------------------------------------------------------------------------------- 1 | use ciphey::checkers::athena::Athena; 2 | use ciphey::checkers::checker_type::{Check, Checker}; 3 | use ciphey::checkers::CheckerTypes; 4 | use ciphey::decoders::base64_decoder::Base64Decoder; 5 | use ciphey::decoders::interface::{Crack, Decoder}; 6 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 7 | 8 | pub fn criterion_benchmark(c: &mut Criterion) { 9 | let decode_base64 = Decoder::::new(); 10 | let athena_checker = Checker::::new(); 11 | let checker = CheckerTypes::CheckAthena(athena_checker); 12 | c.bench_function("base64 successful decoding", |b| { 13 | b.iter(|| decode_base64.crack(black_box("aGVsbG8gd29ybGQ="), &checker)) 14 | }); 15 | } 16 | 17 | criterion_group!(benches, criterion_benchmark); 18 | criterion_main!(benches); 19 | -------------------------------------------------------------------------------- /.github/workflows/stalePRS.yml: -------------------------------------------------------------------------------- 1 | name: 'Handle stale PRs' 2 | on: 3 | schedule: 4 | - cron: '30 7 * * 1-5' 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v4 11 | with: 12 | only: pulls 13 | stale-pr-message: "This PR hasn't seen activity in 2 weeks! Should it be merged, closed, or worked on further? If you want to keep it open, post a comment or remove the `stale` label – otherwise this will be closed in another week." 14 | close-pr-message: 'This PR was closed due to 1 month of inactivity. Feel free to reopen it if still relevant.' 15 | days-before-pr-stale: 14 16 | days-before-pr-close: 30 17 | stale-issue-label: stale 18 | stale-pr-label: stale -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use ciphey::cli::parse_cli_args; 2 | use ciphey::cli_pretty_printing::{program_exiting_successful_decoding, success}; 3 | use ciphey::perform_cracking; 4 | 5 | fn main() { 6 | // Turn CLI arguments into a library object 7 | let (text, config) = parse_cli_args(); 8 | let result = perform_cracking(&text, config); 9 | success(&format!( 10 | "DEBUG: main.rs - Result from perform_cracking: {:?}", 11 | result.is_some() 12 | )); 13 | match result { 14 | // TODO: As result have array of CrackResult used, 15 | // we can print in better way with more info 16 | Some(result) => { 17 | success(&format!( 18 | "DEBUG: main.rs - Got successful result with {} decoders in path", 19 | result.path.len() 20 | )); 21 | program_exiting_successful_decoding(result); 22 | } 23 | None => { 24 | success("DEBUG: main.rs - Got None result, calling failed_to_decode"); 25 | ciphey::cli_pretty_printing::failed_to_decode() 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Bee @bee-san on GitHub 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /images/main_demo.cast: -------------------------------------------------------------------------------- 1 | {"version": 2, "width": 123, "height": 45, "timestamp": 1672149835, "env": {"SHELL": "/bin/zsh", "TERM": "xterm-256color"}} 2 | [0.111327, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜ \u001b[36m~\u001b[00m \u001b[K"] 3 | [0.111394, "o", "\u001b[?1h\u001b=\u001b[?2004h"] 4 | [0.940616, "o", "\u001b[7mciphey -t 'LJIVE222KFJGUWSRJZ2FUUKSNNNFCTTLLJIVE5C2KFJGWWSRKJVVUUKOORNFCUTLLJIVE222KFHHIWSRKJVVUUKSNNNEOUTULJIU4222KFJGW\u001b[7mW\u001b[7mSRJZ2FUUKONNNFCTTKLJIU45C2KFJGWWSHJZVVUR2SORNFCUTLLJIVE222I5JHIWSRKJVVUR2ONJNEOTTULJIVE222KFJGWWSRJZ2FUUKSNNNFCTTLLJIU45C2\u001b[7mK\u001b[7mFHGWWSRJZVFUUKSHU======' -d\u001b[27m\u001b[K"] 5 | [1.697765, "o", "\r\r\n"] 6 | [2.213287, "o", "\r\n🥳 ciphey has decoded 205 times times.\r\nIf you would have used Ciphey, it would have taken you 41 seconds\r\n\r\n"] 7 | [2.225368, "o", "The plaintext is: \r\n\u001b[1;33mhello, world!\u001b[0m\r\nand the decoders used are \u001b[1;33mBase32 → Caesar Cipher → Base64 → Binary\u001b[0m\r\n"] 8 | [2.237779, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜ \u001b[36m~\u001b[00m \u001b[K"] 9 | [2.237852, "o", "\u001b[?1h\u001b="] 10 | [2.237875, "o", "\u001b[?2004h"] 11 | [5.440228, "o", "\u001b[?2004l\r\r\n"] 12 | -------------------------------------------------------------------------------- /benches/benchmark_crackers.rs: -------------------------------------------------------------------------------- 1 | use ciphey::checkers::athena::Athena; 2 | use ciphey::checkers::checker_type::{Check, Checker}; 3 | use ciphey::checkers::CheckerTypes; 4 | use ciphey::config::{set_global_config, Config}; 5 | use ciphey::decoders::base64_decoder::Base64Decoder; 6 | use ciphey::decoders::interface::{Crack, Decoder}; 7 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 8 | use env_logger::Builder; 9 | use log::LevelFilter; 10 | 11 | pub fn criterion_benchmark(c: &mut Criterion) { 12 | // Initialize logger with only error level to suppress debug messages 13 | let mut builder = Builder::new(); 14 | builder.filter_level(LevelFilter::Error); 15 | builder.init(); 16 | 17 | // Setup global config to suppress output 18 | let mut config = Config::default(); 19 | config.api_mode = true; 20 | config.verbose = 0; 21 | set_global_config(config); 22 | 23 | let decode_base64 = Decoder::::new(); 24 | let athena_checker = Checker::::new(); 25 | let checker = CheckerTypes::CheckAthena(athena_checker); 26 | c.bench_function("base64 successful decoding", |b| { 27 | b.iter(|| decode_base64.crack(black_box("aGVsbG8gd29ybGQ="), &checker)) 28 | }); 29 | } 30 | 31 | criterion_group!(benches, criterion_benchmark); 32 | criterion_main!(benches); 33 | -------------------------------------------------------------------------------- /docs/changes/2024-07-10-remove-cipher-mapping.md: -------------------------------------------------------------------------------- 1 | # Change: Remove CIPHER_MAPPING from helper_functions 2 | 3 | ## Purpose 4 | Remove the incorrect mapping between Cipher Identifier's cipher names and ciphey decoder names. The mapping was inaccurate, particularly with "fractionatedMorse" being incorrectly mapped to "morseCode" when they are different encoding schemes. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Removes incorrect mappings that could lead to misidentification of ciphers 9 | - Simplifies the code by directly using the first result from Cipher Identifier 10 | - Eliminates potential confusion between different cipher types 11 | 12 | ### Disadvantages 13 | - No longer filters cipher types based on available decoders 14 | - May return cipher types that don't have corresponding decoders in ciphey 15 | 16 | ## Technical Implementation 17 | - Removed the `CIPHER_MAPPING` static variable and its documentation 18 | - Modified the `get_cipher_identifier_score` function to return the first result from Cipher Identifier instead of checking against the mapping 19 | - Verified that all tests still pass after the changes 20 | 21 | ## Future Improvements 22 | - Consider implementing a more accurate mapping if needed in the future 23 | - Potentially add a check to verify if ciphey has a decoder for the identified cipher type 24 | - Could add a more sophisticated scoring mechanism for cipher identification -------------------------------------------------------------------------------- /src/checkers/checker_result.rs: -------------------------------------------------------------------------------- 1 | use super::checker_type::Checker; 2 | 3 | /// The checkerResult struct is used to store the results of a checker. 4 | pub struct CheckResult { 5 | /// If our checkers return success, we change this bool to True 6 | pub is_identified: bool, 7 | /// text is the text before we check it. 8 | // we can make this &'text str 9 | // but then crack requires lifetime annotations. 10 | pub text: String, 11 | /// Description of the checked text. 12 | pub description: String, 13 | /// Name of the Checker we are using 14 | pub checker_name: &'static str, 15 | /// Description of the Checker we are using 16 | pub checker_description: &'static str, 17 | /// Link to more info about checker 18 | pub link: &'static str, 19 | } 20 | 21 | /// To save time we have a default 22 | /// for checkResult in case we fail 23 | /// I do not believe the checker is important if failed 24 | /// as we will not use it. To save time we will return a default 25 | /// checker. 26 | impl CheckResult { 27 | /// Creates a default CheckResult 28 | pub fn new(checker_used: &Checker) -> CheckResult { 29 | CheckResult { 30 | is_identified: false, 31 | text: "".to_string(), 32 | checker_name: checker_used.name, 33 | checker_description: checker_used.description, 34 | description: "".to_string(), 35 | link: checker_used.link, 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /docs/changes/2024-07-10-astar-refactor.md: -------------------------------------------------------------------------------- 1 | # Change: AStar Refactoring and String Quality Enhancement 2 | 3 | ## Purpose 4 | Refactor the AStar search implementation to improve code organization and enhance string quality assessment by filtering out strings with high percentages of invisible characters. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Improved code organization with helper functions in a separate module 9 | - Better memory efficiency by quickly rejecting strings with >50% invisible characters 10 | - Enhanced maintainability through clearer separation of concerns 11 | - Easier testing of individual helper functions 12 | 13 | ### Disadvantages 14 | - Slight increase in module complexity with an additional file 15 | - Potential for minor performance overhead from cross-module function calls 16 | 17 | ## Technical Implementation 18 | - Split AStar implementation into two files: 19 | - `astar.rs`: Core A* search algorithm implementation 20 | - `helper_functions.rs`: Supporting functions for heuristics, quality assessment, and statistics 21 | - Enhanced `calculate_string_quality` function to immediately reject strings with >50% invisible characters 22 | - Added a new test case to verify the invisible character filtering functionality 23 | - Updated module imports and exports in `mod.rs` 24 | 25 | ## Future Improvements 26 | - Persist decoder success statistics to disk for learning across sessions 27 | - Further optimize string quality assessment with more sophisticated language detection 28 | - Consider moving more common utility functions to the helper module for reuse by other search algorithms -------------------------------------------------------------------------------- /docs/changes/2024-03-21-add-vigenere-decoder.md: -------------------------------------------------------------------------------- 1 | # Change: Add Vigenère Cipher Decoder 2 | 3 | ## Purpose 4 | Implement a Vigenère cipher decoder to expand ciphey' classical cipher decoding capabilities. This decoder will automatically detect and break Vigenère encrypted text without requiring a key, making it valuable for cryptanalysis and historical cipher decoding. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Implements sophisticated frequency analysis for automated key length detection 9 | - Uses Index of Coincidence (IoC) for reliable key length determination 10 | - Employs statistical analysis to break the cipher without requiring the key 11 | - Handles both known-key and unknown-key scenarios 12 | 13 | ### Disadvantages 14 | - Computationally more intensive than simple substitution ciphers 15 | - May produce false positives with very short texts 16 | - Effectiveness depends on text length and language characteristics 17 | 18 | ## Technical Implementation 19 | - Added Vigenère decoder module with key length detection using IoC 20 | - Implemented frequency analysis for automated key discovery 21 | - Added comprehensive test suite with example ciphertexts 22 | - Integrated with ciphey' existing decoder infrastructure 23 | - Popularity score set to 0.8 reflecting its historical significance 24 | 25 | ## Future Improvements 26 | - Add support for multiple languages beyond English 27 | - Implement parallel processing for faster key space exploration 28 | - Add option to specify known key length or partial key 29 | - Enhance accuracy for very short ciphertexts 30 | - Add support for variant ciphers (Beaufort, Gronsfeld) -------------------------------------------------------------------------------- /docs/changes/2024-07-10-remove-decoder-popularity.md: -------------------------------------------------------------------------------- 1 | # Change: Remove get_decoder_popularity Function 2 | 3 | ## Purpose 4 | Remove the redundant `get_decoder_popularity` function from `helper_functions.rs` since decoders already have a `popularity` attribute in their implementation. This eliminates duplication and ensures that popularity values are maintained in a single location. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Eliminates redundant code that duplicated popularity values 9 | - Simplifies maintenance by having popularity values defined only in the decoder implementations 10 | - Reduces the risk of inconsistencies between the function and the actual decoder attributes 11 | 12 | ### Disadvantages 13 | - The `generate_heuristic` function no longer has direct access to the popularity values 14 | - Using success rate as a proxy for popularity may not perfectly match the original behavior 15 | 16 | ## Technical Implementation 17 | - Removed the `get_decoder_popularity` function from `helper_functions.rs` 18 | - Modified the `generate_heuristic` function to use the decoder's success rate as a proxy for popularity 19 | - Updated tests to verify that success rate affects the heuristic calculation 20 | - Removed the now-obsolete `test_popularity_affects_heuristic` test 21 | 22 | ## Future Improvements 23 | - Consider modifying the `CrackResult` struct to include the decoder's popularity attribute 24 | - Explore ways to directly access the decoder's popularity attribute in the `generate_heuristic` function 25 | - Evaluate whether success rate is an appropriate proxy for popularity or if another approach would be better -------------------------------------------------------------------------------- /docs/changes/2024-07-10-improve-string-pruning.md: -------------------------------------------------------------------------------- 1 | # Change: Improve String Pruning for Low-Quality Inputs 2 | 3 | ## Purpose 4 | Enhance the pruning mechanism to skip decoding of low-quality strings, which improves efficiency by avoiding wasted computation on strings that are unlikely to produce meaningful results. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Reduces computational resources spent on strings unlikely to yield useful results 9 | - Speeds up the overall decoding process by focusing on higher-quality candidates 10 | - Prevents the search algorithm from exploring unproductive paths 11 | - Improves memory usage by pruning low-quality strings early 12 | 13 | ### Disadvantages 14 | - May occasionally reject valid encodings that have unusual characteristics 15 | - Requires careful tuning of thresholds to balance efficiency and thoroughness 16 | - Adds additional computation for quality checks (though this is minimal compared to the savings) 17 | 18 | ## Technical Implementation 19 | - Enhanced the `check_if_string_cant_be_decoded` function to consider multiple quality factors: 20 | - String length (rejects strings with 2 or fewer characters) 21 | - Non-printable character ratio (rejects strings with >30% non-printable characters) 22 | - Overall string quality (rejects strings with quality score <0.2) 23 | - Added comprehensive tests to verify the pruning behavior 24 | - Updated documentation to explain the rationale behind each pruning criterion 25 | 26 | ## Future Improvements 27 | - Fine-tune the thresholds based on real-world usage data 28 | - Consider adding more sophisticated quality metrics (e.g., entropy, character distribution) 29 | - Implement adaptive thresholds that adjust based on the search context 30 | - Add logging to track how many strings are being pruned and why -------------------------------------------------------------------------------- /docs/changes/2024-07-01-wordlist-checker.md: -------------------------------------------------------------------------------- 1 | # Change: Add Wordlist Checker 2 | 3 | ## Purpose 4 | Implement a wordlist checker that performs exact matching against a user-provided list of words. This allows users to check if the input text exactly matches any word in their custom wordlist, which is useful for targeted decoding tasks where the expected output is known to be within a specific set of words. 5 | 6 | ## Trade-offs 7 | 8 | ### Advantages 9 | - Provides exact matching against custom wordlists 10 | - Efficient O(1) lookups using HashSet 11 | - Memory-mapped file handling for large wordlists (>10MB) 12 | - Takes precedence over other checkers when specified, allowing for targeted checking 13 | - Supports both CLI argument and config file specification 14 | 15 | ### Disadvantages 16 | - Requires additional memory to store the wordlist 17 | - Only performs exact matching (no partial or fuzzy matching) 18 | - Case-sensitive matching only 19 | - No support for multiple wordlists 20 | 21 | ## Technical Implementation 22 | - Added `wordlist_path` and `wordlist` fields to the `Config` struct 23 | - Implemented `load_wordlist` function using memory mapping for large files 24 | - Created a new `WordlistChecker` that performs exact matching against the wordlist 25 | - Updated Athena checker to prioritize wordlist checking when a wordlist is provided 26 | - Added `--wordlist` CLI argument that takes precedence over config file 27 | - Updated library API to accept pre-loaded wordlists 28 | 29 | ## Future Improvements 30 | - Add support for case-insensitive matching 31 | - Implement partial matching options 32 | - Support multiple wordlist files 33 | - Add progress indicator for loading large wordlists 34 | - Implement wordlist caching 35 | - Add support for alternative wordlist formats (CSV, JSON, etc.) -------------------------------------------------------------------------------- /src/storage/invisible_chars/chars.txt: -------------------------------------------------------------------------------- 1 | U+0009 CHARACTER TABULATION 2 | U+0020 SPACE 3 | U+00A0 NO-BREAK SPACE 4 | U+00AD SOFT HYPHEN 5 | U+034F COMBINING GRAPHEME JOINER 6 | U+061C ARABIC LETTER MARK 7 | U+115F HANGUL CHOSEONG FILLER 8 | U+1160 HANGUL JUNGSEONG FILLER 9 | U+17B4 KHMER VOWEL INHERENT AQ 10 | U+17B5 KHMER VOWEL INHERENT AA 11 | U+180E MONGOLIAN VOWEL SEPARATOR 12 | U+2000 EN QUAD 13 | U+2001 EM QUAD 14 | U+2002 EN SPACE 15 | U+2003 EM SPACE 16 | U+2004 THREE-PER-EM SPACE 17 | U+2005 FOUR-PER-EM SPACE 18 | U+2006 SIX-PER-EM SPACE 19 | U+2007 FIGURE SPACE 20 | U+2008 PUNCTUATION SPACE 21 | U+2009 THIN SPACE 22 | U+200A HAIR SPACE 23 | U+200B ZERO WIDTH SPACE 24 | U+200C ZERO WIDTH NON-JOINER 25 | U+200D ZERO WIDTH JOINER 26 | U+200E LEFT-TO-RIGHT MARK 27 | U+200F RIGHT-TO-LEFT MARK 28 | U+202F NARROW NO-BREAK SPACE 29 | U+205F MEDIUM MATHEMATICAL SPACE 30 | U+2060 WORD JOINER 31 | U+2061 FUNCTION APPLICATION 32 | U+2062 INVISIBLE TIMES 33 | U+2063 INVISIBLE SEPARATOR 34 | U+2064 INVISIBLE PLUS 35 | U+206A INHIBIT SYMMETRIC SWAPPING 36 | U+206B ACTIVATE SYMMETRIC SWAPPING 37 | U+206C INHIBIT ARABIC FORM SHAPING 38 | U+206D ACTIVATE ARABIC FORM SHAPING 39 | U+206E NATIONAL DIGIT SHAPES 40 | U+206F NOMINAL DIGIT SHAPES 41 | U+3000 IDEOGRAPHIC SPACE 42 | U+2800 BRAILLE PATTERN BLANK 43 | U+3164 HANGUL FILLER 44 | U+FEFF ZERO WIDTH NO-BREAK SPACE 45 | U+FFA0 HALFWIDTH HANGUL FILLER 46 | U+1D159 MUSICAL SYMBOL NULL NOTEHEAD 47 | U+1D173 MUSICAL SYMBOL BEGIN BEAM 48 | U+1D174 MUSICAL SYMBOL END BEAM 49 | U+1D175 MUSICAL SYMBOL BEGIN TIE 50 | U+1D176 MUSICAL SYMBOL END TIE 51 | U+1D177 MUSICAL SYMBOL BEGIN SLUR 52 | U+1D178 MUSICAL SYMBOL END SLUR 53 | U+1D179 MUSICAL SYMBOL BEGIN PHRASE 54 | U+1D17A MUSICAL SYMBOL END PHRASE -------------------------------------------------------------------------------- /src/checkers/default_checker.rs: -------------------------------------------------------------------------------- 1 | use gibberish_or_not::Sensitivity; 2 | use lemmeknow::Identifier; 3 | 4 | use super::{ 5 | checker_result::CheckResult, 6 | checker_type::{Check, Checker}, 7 | }; 8 | 9 | /// The default checker is used to check if the text is plaintext 10 | /// Based on what the ciphey team has found to be the best checker. 11 | pub struct DefaultChecker; 12 | 13 | impl Check for Checker { 14 | fn new() -> Self { 15 | Checker { 16 | name: "Template checker", 17 | description: "This is a default template checker. If you're seeing this, it's an error. Please contact us on Discord http://discord.skerritt.blog", 18 | link: "http://discord.skerritt.blog", 19 | tags: vec![], 20 | expected_runtime: 0.0, 21 | popularity: 0.0, 22 | lemmeknow_config: Identifier::default(), 23 | sensitivity: Sensitivity::Medium, // Default to Medium sensitivity 24 | enhanced_detector: None, 25 | _phantom: std::marker::PhantomData, 26 | } 27 | } 28 | 29 | fn check(&self, _text: &str) -> CheckResult { 30 | CheckResult::new(self) 31 | } 32 | 33 | fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self { 34 | self.sensitivity = sensitivity; 35 | self 36 | } 37 | 38 | fn get_sensitivity(&self) -> Sensitivity { 39 | self.sensitivity 40 | } 41 | } 42 | 43 | #[cfg(test)] 44 | mod tests { 45 | use crate::checkers::{ 46 | checker_result::CheckResult, 47 | checker_type::{Check, Checker}, 48 | default_checker::DefaultChecker, 49 | }; 50 | 51 | #[test] 52 | fn default_checker_works() { 53 | let checker = Checker::::new(); 54 | let checker_result = CheckResult::new(&checker); 55 | assert!(!checker_result.is_identified); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /docs/storage.md: -------------------------------------------------------------------------------- 1 | # Storage Module 2 | 3 | The storage module provides reusable data structures and constants that are used across the ciphey project. 4 | 5 | ## Contents 6 | 7 | ### English Letter Frequencies 8 | 9 | The `ENGLISH_FREQS` constant provides the frequency distribution of letters in the English language. This is used for frequency analysis in various decoders, such as the Vigenere decoder. 10 | 11 | ```rust 12 | pub const ENGLISH_FREQS: [f64; 26] = [ 13 | 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G 14 | 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N 15 | 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U 16 | 0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z 17 | ]; 18 | ``` 19 | 20 | These values represent the relative frequency of each letter in typical English text, from A to Z. They are used in statistical analysis for breaking classical ciphers. 21 | 22 | ### Invisible Characters 23 | 24 | The `INVISIBLE_CHARS` static collection contains a set of invisible Unicode characters that are loaded from a file at runtime. This is used for detecting and handling invisible characters in encoded text. 25 | 26 | ```rust 27 | pub static INVISIBLE_CHARS: Lazy> = Lazy::new(|| { 28 | // Implementation loads characters from a file 29 | // ... 30 | }); 31 | ``` 32 | 33 | The characters are loaded from `src/storage/invisible_chars/chars.txt` and include various whitespace and zero-width characters. 34 | 35 | ## Usage 36 | 37 | To use these resources in your code: 38 | 39 | ```rust 40 | use crate::storage::ENGLISH_FREQS; 41 | use crate::storage::INVISIBLE_CHARS; 42 | 43 | // Example: Using English frequencies for analysis 44 | fn analyze_text(text: &str) { 45 | // ...frequency analysis using ENGLISH_FREQS... 46 | } 47 | 48 | // Example: Checking for invisible characters 49 | fn check_for_invisible(text: &str) -> bool { 50 | text.chars().any(|c| INVISIBLE_CHARS.contains(&c)) 51 | } -------------------------------------------------------------------------------- /docs/changes/2024-07-02-sensitivity-trait.md: -------------------------------------------------------------------------------- 1 | # Change: Make Sensitivity an Optional Trait 2 | 3 | ## Purpose 4 | Implement an optional `SensitivityAware` trait for checkers that use sensitivity for gibberish detection. This separates the sensitivity functionality from the core `Check` trait, allowing checkers like the WordlistChecker to avoid implementing sensitivity-related methods that they don't actually use. 5 | 6 | ## Trade-offs 7 | 8 | ### Advantages 9 | - Cleaner separation of concerns between core checking functionality and sensitivity handling 10 | - Checkers that don't use sensitivity don't need to implement unused methods 11 | - More accurate representation of which checkers actually use sensitivity 12 | - Reduces code duplication and improves maintainability 13 | - Makes it clearer to developers which checkers support sensitivity adjustment 14 | 15 | ### Disadvantages 16 | - Requires changes to existing code that assumes all checkers implement sensitivity methods 17 | - Slightly more complex trait hierarchy 18 | - Requires careful handling in composite checkers like Athena 19 | 20 | ## Technical Implementation 21 | - Created a new `SensitivityAware` trait in `checker_type.rs` with the sensitivity-related methods 22 | - Removed sensitivity methods from the core `Check` trait 23 | - Updated the WordlistChecker to not implement the `SensitivityAware` trait 24 | - Updated the Athena checker to handle both sensitivity-aware and non-sensitivity-aware checkers 25 | - Kept the sensitivity field in the `Checker` struct for backward compatibility 26 | - Added documentation to clarify which checkers use sensitivity 27 | 28 | ## Future Improvements 29 | - Implement the `SensitivityAware` trait for all checkers that actually use sensitivity 30 | - Add runtime detection of whether a checker implements `SensitivityAware` 31 | - Consider making the sensitivity field optional in the `Checker` struct 32 | - Add helper methods to safely apply sensitivity only to checkers that support it 33 | - Update documentation to clearly indicate which checkers support sensitivity adjustment -------------------------------------------------------------------------------- /src/api_library_input_struct.rs: -------------------------------------------------------------------------------- 1 | /// import general checker 2 | use crate::checkers::{ 3 | checker_type::{Check, Checker}, 4 | default_checker::DefaultChecker, 5 | }; 6 | use lemmeknow::Identifier; 7 | use std::collections::HashSet; 8 | 9 | /// Library input is the default API input 10 | /// The CLI turns its arguments into a LibraryInput struct 11 | #[allow(dead_code)] 12 | pub struct LibraryInput { 13 | /// The input to be decoded. 14 | /// Given to us by the user. 15 | pub encoded_text: String, 16 | /// A level of verbosity to determine. 17 | /// How much we print in logs. 18 | pub verbose: i32, 19 | /// The checker to use 20 | pub checker: Checker, 21 | /// The lemmeknow config to use 22 | pub lemmeknow_config: Identifier, 23 | /// Pre-loaded wordlist (allows library users to provide wordlist directly) 24 | pub wordlist: Option>, 25 | } 26 | 27 | /// Creates a default lemmeknow config 28 | const LEMMEKNOW_DEFAULT_CONFIG: Identifier = Identifier { 29 | min_rarity: 0.0, 30 | max_rarity: 0.0, 31 | tags: vec![], 32 | exclude_tags: vec![], 33 | file_support: false, 34 | boundaryless: false, 35 | }; 36 | 37 | impl Default for LibraryInput { 38 | fn default() -> Self { 39 | LibraryInput { 40 | encoded_text: String::new(), 41 | // this will be of type Checker 42 | verbose: 0, 43 | checker: Checker::new(), 44 | lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG, 45 | wordlist: None, 46 | } 47 | } 48 | } 49 | 50 | impl LibraryInput { 51 | /// Set a pre-loaded wordlist 52 | /// 53 | /// This method is part of the public API for library users who want to provide 54 | /// a pre-loaded wordlist directly. While it may not be used internally yet, 55 | /// it's maintained for API compatibility and future use cases. 56 | #[allow(dead_code)] 57 | pub fn with_wordlist(mut self, wordlist: HashSet) -> Self { 58 | self.wordlist = Some(wordlist); 59 | self 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/checkers/regex_checker.rs: -------------------------------------------------------------------------------- 1 | use gibberish_or_not::Sensitivity; 2 | use lemmeknow::Identifier; 3 | 4 | use super::checker_type::{Check, Checker}; 5 | use crate::{checkers::checker_result::CheckResult, config::get_config}; 6 | use log::trace; 7 | use regex::Regex; 8 | 9 | /// The Regex Checker checks if the text matches a known Regex pattern. 10 | /// This is the struct for it. 11 | pub struct RegexChecker; 12 | 13 | impl Check for Checker { 14 | fn new() -> Self { 15 | Checker { 16 | name: "Regex Checker", 17 | description: "Uses Regex to check for regex matches, useful for finding cribs.", 18 | link: "https://github.com/rust-lang/regex", 19 | tags: vec!["crib", "regex"], 20 | expected_runtime: 0.01, 21 | popularity: 1.0, 22 | lemmeknow_config: Identifier::default(), 23 | sensitivity: Sensitivity::Medium, // Default to Medium sensitivity 24 | enhanced_detector: None, 25 | _phantom: std::marker::PhantomData, 26 | } 27 | } 28 | 29 | fn check(&self, text: &str) -> CheckResult { 30 | trace!("Checking {} with regex", text); 31 | // TODO put this into a lazy static so we don't generate it everytime 32 | let config = get_config(); 33 | let regex_to_parse = config.regex.clone(); 34 | let re = Regex::new(®ex_to_parse.unwrap()).unwrap(); 35 | 36 | let regex_check_result = re.is_match(text); 37 | let mut plaintext_found = false; 38 | let printed_name = format!("Regex matched: {re}"); 39 | if regex_check_result { 40 | plaintext_found = true; 41 | } 42 | 43 | CheckResult { 44 | is_identified: plaintext_found, 45 | text: text.to_string(), 46 | checker_name: self.name, 47 | checker_description: self.description, 48 | description: printed_name, 49 | link: self.link, 50 | } 51 | } 52 | 53 | fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self { 54 | self.sensitivity = sensitivity; 55 | self 56 | } 57 | 58 | fn get_sensitivity(&self) -> Sensitivity { 59 | self.sensitivity 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /.github/workflows/quickstart.yml: -------------------------------------------------------------------------------- 1 | # Based on https://github.com/actions-rs/meta/blob/master/recipes/quickstart.md 2 | # 3 | # While our "example" application has the platform-specific code, 4 | # for simplicity we are compiling and testing everything on the Ubuntu environment only. 5 | # For multi-OS testing see the `cross.yml` workflow. 6 | 7 | on: [push, pull_request] 8 | 9 | name: Test 10 | 11 | jobs: 12 | check: 13 | name: Check 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout sources 17 | uses: actions/checkout@v2 18 | with: 19 | lfs: true 20 | 21 | - name: Install stable toolchain 22 | uses: actions-rs/toolchain@v1 23 | with: 24 | profile: minimal 25 | toolchain: stable 26 | override: true 27 | 28 | - name: Run cargo check 29 | uses: actions-rs/cargo@v1 30 | with: 31 | command: check 32 | 33 | test: 34 | name: Test Suite 35 | strategy: 36 | fail-fast: false 37 | matrix: 38 | os: [ubuntu-latest, windows-latest, macos-latest] 39 | runs-on: ubuntu-latest 40 | steps: 41 | - name: Checkout sources 42 | uses: actions/checkout@v2 43 | 44 | - name: Install stable toolchain 45 | uses: actions-rs/toolchain@v1 46 | with: 47 | profile: minimal 48 | toolchain: stable 49 | override: true 50 | 51 | - name: Run cargo test 52 | uses: actions-rs/cargo@v1 53 | with: 54 | command: test 55 | 56 | lints: 57 | name: Lints 58 | runs-on: ubuntu-latest 59 | steps: 60 | - name: Checkout sources 61 | uses: actions/checkout@v2 62 | 63 | - name: Install stable toolchain 64 | uses: actions-rs/toolchain@v1 65 | with: 66 | profile: minimal 67 | toolchain: stable 68 | override: true 69 | components: rustfmt, clippy 70 | 71 | - name: Run cargo fmt 72 | uses: actions-rs/cargo@v1 73 | with: 74 | command: fmt 75 | args: --all -- --check 76 | 77 | - name: Run cargo clippy 78 | uses: actions-rs/cargo@v1 79 | with: 80 | command: clippy 81 | -------------------------------------------------------------------------------- /docs/sensitivity.md: -------------------------------------------------------------------------------- 1 | # Sensitivity Levels in Gibberish Detection 2 | 3 | ## Overview 4 | 5 | ciphey uses the `gibberish_or_not` library to detect whether decoded text is meaningful English. This library provides three sensitivity levels to fine-tune gibberish detection: 6 | 7 | ### Low Sensitivity 8 | - Most strict classification 9 | - Requires very high confidence to classify text as English 10 | - Best for detecting texts that appear English-like but are actually gibberish 11 | - Used by classical ciphers like Caesar cipher that produce more English-like results 12 | 13 | ### Medium Sensitivity (Default) 14 | - Balanced approach for general use 15 | - Combines dictionary and n-gram analysis 16 | - Default mode suitable for most applications 17 | - Used by most decoders in ciphey 18 | 19 | ### High Sensitivity 20 | - Most lenient classification 21 | - Favors classifying text as English 22 | - Best when input is mostly gibberish and any English-like patterns are significant 23 | 24 | ## Implementation in ciphey 25 | 26 | In ciphey, different decoders use different sensitivity levels based on their characteristics: 27 | 28 | 1. **Caesar Cipher**: Uses Low sensitivity because classical ciphers often produce text that can appear English-like even when the shift is incorrect. 29 | 30 | 2. **Other Decoders**: Use Medium sensitivity by default, which provides a balanced approach for most types of encoded text. 31 | 32 | ## Customizing Sensitivity 33 | 34 | Decoders can override the default sensitivity level when needed. The `CheckerTypes` enum provides a `with_sensitivity` method that allows changing the sensitivity level: 35 | 36 | ```rust 37 | // Example: Using a checker with a custom sensitivity level 38 | let checker_with_sensitivity = checker.with_sensitivity(Sensitivity::High); 39 | let result = checker_with_sensitivity.check(text); 40 | ``` 41 | 42 | ## Technical Details 43 | 44 | The sensitivity level affects the thresholds used for n-gram analysis and dictionary checks: 45 | 46 | - **Low Sensitivity**: Stricter thresholds, requiring more evidence to classify text as English 47 | - **Medium Sensitivity**: Balanced thresholds suitable for most applications 48 | - **High Sensitivity**: Lenient thresholds, more likely to classify text as English 49 | 50 | For more details on how the sensitivity levels work, see the [gibberish_or_not documentation](https://crates.io/crates/gibberish-or-not). -------------------------------------------------------------------------------- /docs/changes/2024-07-10-wait-athena-checker.md: -------------------------------------------------------------------------------- 1 | # Change: Add WaitAthena Checker for Collecting Multiple Plaintexts 2 | 3 | ## Purpose 4 | Implement a variant of the Athena checker that collects all potential plaintexts found during the search instead of exiting immediately when the first plaintext is found. This allows users to see all possible interpretations of their ciphertext, which is particularly useful for ambiguous encodings or when multiple valid plaintexts might exist. 5 | 6 | ## Trade-offs 7 | ### Advantages 8 | - Provides users with multiple potential plaintexts instead of just the first one found 9 | - Allows for more comprehensive analysis of ambiguous ciphertexts 10 | - Maintains compatibility with all existing decoders and checkers 11 | - Simple to use via a single command-line flag (`--top-results`) 12 | - Automatically disables the human checker to avoid interrupting the search process 13 | - Continues searching until the timer expires, maximizing the number of potential plaintexts found 14 | 15 | ### Disadvantages 16 | - May take longer to complete as it continues searching even after finding valid plaintexts 17 | - Could potentially return false positives along with true plaintexts 18 | - Increases memory usage as all results must be stored until the timer expires 19 | 20 | ## Technical Implementation 21 | - Created a new `WaitAthena` checker that is a variant of `Athena` but stores results instead of returning immediately 22 | - Implemented a thread-safe storage mechanism using `Mutex` and `lazy_static` to store plaintext results 23 | - Modified the timer module to display all collected plaintext results when the timer expires 24 | - Added a new configuration option (`top_results`) to enable WaitAthena mode 25 | - Added a new command-line flag (`--top-results`) to enable WaitAthena mode 26 | - Updated the library interface to use WaitAthena when the `top_results` option is enabled 27 | - Automatically disabled the human checker when `--top-results` is specified to avoid interrupting the search process 28 | - Modified the search algorithm to continue searching until the timer expires when in top_results mode 29 | 30 | ## Future Improvements 31 | - Add filtering options for WaitAthena results to reduce false positives 32 | - Implement sorting of results by confidence level or other metrics 33 | - Add an option to save results to a file for later analysis 34 | - Implement deduplication logic if duplicate plaintexts become an issue in practice -------------------------------------------------------------------------------- /src/storage/wait_athena_storage.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use log::{trace, warn}; 3 | use std::sync::Mutex; 4 | 5 | /// Represents a plaintext result with its description, checker name, and decoder name 6 | #[derive(Debug, Clone)] 7 | pub struct PlaintextResult { 8 | /// The plaintext text 9 | pub text: String, 10 | /// The description of the result 11 | pub description: String, 12 | /// The name of the checker used to generate the result 13 | pub checker_name: String, 14 | /// The name of the decoder used to generate the result 15 | pub decoder_name: String, 16 | } 17 | 18 | lazy_static! { 19 | static ref PLAINTEXT_RESULTS: Mutex> = Mutex::new(Vec::new()); 20 | } 21 | 22 | /// Adds a plaintext result to the storage 23 | pub fn add_plaintext_result( 24 | text: String, 25 | description: String, 26 | checker_name: String, 27 | decoder_name: String, 28 | ) { 29 | let result = PlaintextResult { 30 | text: text.clone(), 31 | description: description.clone(), 32 | checker_name: checker_name.clone(), 33 | decoder_name: decoder_name.clone(), 34 | }; 35 | 36 | trace!( 37 | "Adding plaintext result: [{}] {} (decoder: {})", 38 | checker_name, 39 | text, 40 | decoder_name 41 | ); 42 | 43 | let mut results = match PLAINTEXT_RESULTS.lock() { 44 | Ok(guard) => guard, 45 | Err(poisoned) => { 46 | warn!("Mutex was poisoned, recovering"); 47 | poisoned.into_inner() 48 | } 49 | }; 50 | 51 | results.push(result); 52 | trace!("Storage now has {} results", results.len()); 53 | } 54 | 55 | /// Retrieves all plaintext results from the storage 56 | pub fn get_plaintext_results() -> Vec { 57 | let results = match PLAINTEXT_RESULTS.lock() { 58 | Ok(guard) => guard, 59 | Err(poisoned) => { 60 | warn!("Mutex was poisoned, recovering"); 61 | poisoned.into_inner() 62 | } 63 | }; 64 | 65 | trace!("Retrieving {} plaintext results", results.len()); 66 | results.clone() 67 | } 68 | 69 | /// Clears all plaintext results from the storage 70 | pub fn clear_plaintext_results() { 71 | let mut results = match PLAINTEXT_RESULTS.lock() { 72 | Ok(guard) => guard, 73 | Err(poisoned) => { 74 | warn!("Mutex was poisoned, recovering"); 75 | poisoned.into_inner() 76 | } 77 | }; 78 | 79 | trace!("Clearing plaintext results (had {} results)", results.len()); 80 | results.clear(); 81 | } 82 | -------------------------------------------------------------------------------- /docs/changes/2024-03-11-fix-duplicate-human-prompts.md: -------------------------------------------------------------------------------- 1 | # Fix Duplicate Human Verification Prompts 2 | 3 | ## Issue 4 | When running ciphey in top_results mode with parallel A* search, users would sometimes see duplicate human verification prompts for the same plaintext. This occurred because: 5 | 6 | 1. The parallel A* search could discover the same solution path multiple times 7 | 2. Each discovery would trigger Athena's checker 8 | 3. The human checker would prompt for verification each time, even for identical results 9 | 10 | Example of duplicated prompts: 11 | ``` 12 | 🕵️ I think the plaintext is Words. 13 | Possible plaintext: 'hello this text...' (y/N): 14 | ... 15 | 🕵️ I think the plaintext is Words. 16 | Possible plaintext: 'hello this text...' (y/N): 17 | ``` 18 | 19 | ## Root Cause Analysis 20 | The issue stemmed from multiple factors: 21 | 1. Parallel processing in A* search allowing multiple threads to find the same solution 22 | 2. Top_results mode continuing the search after finding a valid result 23 | 3. No deduplication of human verification prompts 24 | 4. State being maintained separately in each Athena checker instance 25 | 26 | ## Solution 27 | Added prompt deduplication to the human checker using a thread-safe cache: 28 | 29 | ```rust 30 | use dashmap::DashSet; 31 | use std::sync::OnceLock; 32 | 33 | static SEEN_PROMPTS: OnceLock> = OnceLock::new(); 34 | 35 | fn get_seen_prompts() -> &'static DashSet { 36 | SEEN_PROMPTS.get_or_init(|| DashSet::new()) 37 | } 38 | ``` 39 | 40 | The human checker now checks if it has already prompted for a given plaintext: 41 | ```rust 42 | let prompt_key = format!("{}{}", input.description, input.text); 43 | if !get_seen_prompts().insert(prompt_key) { 44 | println!("DEBUG: Skipping duplicate human verification prompt"); 45 | return true; // Return true to allow the search to continue 46 | } 47 | ``` 48 | 49 | Benefits of this approach: 50 | 1. Thread-safe using DashSet 51 | 2. Minimal code changes required 52 | 3. Maintains existing functionality while eliminating duplicates 53 | 4. Works regardless of which code path triggered the verification 54 | 55 | ## Alternative Approaches Considered 56 | 1. Result deduplication in A* search - Too late, prompts already shown 57 | 2. Modifying Athena checker - More complex, required state management 58 | 3. Disabling parallel processing - Would impact performance 59 | 4. Disabling top_results mode - Would limit functionality 60 | 61 | The chosen solution provides the best balance of: 62 | - Minimal code changes 63 | - No performance impact 64 | - Preserved functionality 65 | - Clean user experience -------------------------------------------------------------------------------- /src/timer/mod.rs: -------------------------------------------------------------------------------- 1 | use crossbeam::channel::{bounded, Receiver}; 2 | use std::sync::atomic::Ordering::Relaxed; 3 | use std::{ 4 | sync::atomic::AtomicBool, 5 | thread::{self, sleep}, 6 | time::Duration, 7 | }; 8 | 9 | use crate::cli_pretty_printing::{countdown_until_program_ends, display_top_results}; 10 | use crate::config::get_config; 11 | use crate::storage::wait_athena_storage; 12 | 13 | /// Indicate whether timer is paused 14 | static PAUSED: AtomicBool = AtomicBool::new(false); 15 | 16 | /// Start the timer with duration in seconds 17 | pub fn start(duration: u32) -> Receiver<()> { 18 | let (sender, recv) = bounded(1); 19 | thread::spawn(move || { 20 | let mut time_spent = 0; 21 | 22 | while time_spent < duration { 23 | if !PAUSED.load(Relaxed) { 24 | sleep(Duration::from_secs(1)); 25 | time_spent += 1; 26 | // Some pretty printing support 27 | countdown_until_program_ends(time_spent, duration); 28 | } 29 | } 30 | 31 | // When the timer expires, display all collected plaintext results 32 | // Only if we're in top_results mode 33 | let config = get_config(); 34 | log::trace!("Timer expired. top_results mode: {}", config.top_results); 35 | 36 | if config.top_results { 37 | log::info!("Displaying all collected plaintext results"); 38 | filter_and_display_results(); 39 | } else { 40 | log::info!("Not in top_results mode, skipping display_wait_athena_results()"); 41 | } 42 | 43 | // Replace the existing expect with a match that logs errors in case of send failure 44 | match sender.send(()) { 45 | Ok(_) => log::debug!("Timer signal sent successfully"), 46 | Err(e) => { 47 | // Just log the error instead of panicking 48 | log::warn!( 49 | "Failed to send timer signal: {:?}. This is expected in benchmarks.", 50 | e 51 | ); 52 | } 53 | } 54 | }); 55 | 56 | recv 57 | } 58 | 59 | /// Filter and display all plaintext results collected by WaitAthena 60 | fn filter_and_display_results() { 61 | let results = wait_athena_storage::get_plaintext_results(); 62 | 63 | log::trace!( 64 | "Retrieved {} results from wait_athena_storage", 65 | results.len() 66 | ); 67 | 68 | // Use the cli_pretty_printing function to display the results 69 | display_top_results(&results); 70 | } 71 | 72 | /// Pause timer 73 | pub fn pause() { 74 | PAUSED.store(true, Relaxed); 75 | } 76 | 77 | /// Resume timer 78 | pub fn resume() { 79 | PAUSED.store(false, Relaxed); 80 | } 81 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # ciphey Documentation 2 | 3 | Welcome to the ciphey documentation! This repository contains comprehensive documentation for ciphey, the next-generation automatic decoding and cracking tool. 4 | 5 | ## Table of Contents 6 | 7 | ### General Documentation 8 | 9 | - [ciphey Overview](ciphey_overview.md) - A high-level overview of ciphey, its features, and capabilities 10 | - [Using ciphey](using_ciphey.md) - A comprehensive guide on how to use ciphey, with examples and common use cases 11 | 12 | ### Technical Documentation 13 | 14 | - [ciphey Architecture](ciphey_architecture.md) - Detailed explanation of ciphey's internal architecture and components 15 | - [Plaintext Identification](plaintext_identification.md) - How ciphey identifies plaintext and determines when decoding is successful 16 | 17 | ### Feature-Specific Documentation 18 | 19 | - [Invisible Characters Detection](invisible_characters.md) - Information about ciphey's capability to detect and handle invisible Unicode characters 20 | - [Package Managers](package-managers.md) - Guidelines for packaging ciphey for different package managers 21 | 22 | ## About ciphey 23 | 24 | ciphey is the next generation of decoding tools, built by the same people that brought you [Ciphey](https://github.com/ciphey/ciphey). It's designed to automatically detect and decode various types of encoded or encrypted text, including (but not limited to) Base64, Hexadecimal, Caesar cipher, ROT13, URL encoding, and many more. 25 | 26 | Key features include: 27 | 28 | - Significantly faster performance (up to 700% faster than Ciphey) 29 | - Library-first architecture for easy integration 30 | - Advanced search algorithms for efficient decoding 31 | - Built-in timeout mechanism 32 | - Comprehensive documentation and testing 33 | - Support for multi-level encodings 34 | 35 | ## Getting Started 36 | 37 | The quickest way to get started with ciphey is to install it via Cargo: 38 | 39 | ```bash 40 | cargo install ciphey 41 | ``` 42 | 43 | Then use it with the `ciphey` command: 44 | 45 | ```bash 46 | ciphey "your encoded text here" 47 | ``` 48 | 49 | For more detailed instructions, see the [Using ciphey](using_ciphey.md) guide. 50 | 51 | ## Contributing 52 | 53 | Contributions to ciphey are welcome! Whether it's adding new decoders, improving existing ones, enhancing documentation, or fixing bugs, your help is appreciated. Check the [GitHub repository](https://github.com/bee-san/ciphey) for more information on how to contribute. 54 | 55 | ## Additional Resources 56 | 57 | - [GitHub Repository](https://github.com/bee-san/ciphey) 58 | - [Discord Server](http://discord.skerritt.blog) 59 | - [Blog Post: Introducing ciphey](https://skerritt.blog/introducing-ciphey/) 60 | - [Ciphey2 Documentation](https://broadleaf-angora-7db.notion.site/Ciphey2-32d5eea5d38b40c5b95a9442b4425710) -------------------------------------------------------------------------------- /src/storage/mod.rs: -------------------------------------------------------------------------------- 1 | use once_cell::sync::Lazy; 2 | use std::collections::HashSet; 3 | use std::fs; 4 | use std::path::Path; 5 | 6 | /// Module housing functions for managing SQLite database 7 | pub mod database; 8 | /// Module for storing WaitAthena results 9 | pub mod wait_athena_storage; 10 | 11 | /// English letter frequency distribution (A-Z) 12 | /// Used for frequency analysis in various decoders 13 | pub const ENGLISH_FREQS: [f64; 26] = [ 14 | 0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G 15 | 0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N 16 | 0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U 17 | 0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z 18 | ]; 19 | 20 | /// Loads invisible character list into a HashSet 21 | pub static INVISIBLE_CHARS: Lazy> = Lazy::new(|| { 22 | let mut entries: HashSet = HashSet::new(); 23 | 24 | // Path to the invisible characters file 25 | let chars_file_path = Path::new(env!("CARGO_MANIFEST_DIR")) 26 | .join("src") 27 | .join("storage") 28 | .join("invisible_chars") 29 | .join("chars.txt"); 30 | 31 | // Read the file content 32 | if let Ok(content) = fs::read_to_string(&chars_file_path) { 33 | let content_lines = content.split('\n'); 34 | for line in content_lines { 35 | if line.is_empty() { 36 | continue; 37 | } 38 | let unicode_line_split: Vec<&str> = line.split_ascii_whitespace().collect(); 39 | if unicode_line_split.is_empty() { 40 | continue; 41 | } 42 | let unicode_literal = unicode_line_split[0].trim_start_matches("U+"); 43 | if let Ok(unicode_value) = u32::from_str_radix(unicode_literal, 16) { 44 | if let Some(unicode_char) = char::from_u32(unicode_value) { 45 | entries.insert(unicode_char); 46 | } 47 | } 48 | } 49 | } 50 | 51 | entries 52 | }); 53 | 54 | // Rust tests 55 | #[cfg(test)] 56 | mod tests { 57 | use super::*; 58 | 59 | #[test] 60 | fn test_invisible_chars_loaded() { 61 | // Verify that the INVISIBLE_CHARS HashSet is not empty 62 | assert!(!INVISIBLE_CHARS.is_empty()); 63 | } 64 | 65 | #[test] 66 | fn test_invisible_chars_contains_space() { 67 | // Verify that the space character (U+0020) is in the HashSet 68 | assert!(INVISIBLE_CHARS.contains(&' ')); 69 | } 70 | 71 | #[test] 72 | fn test_invisible_chars_contains_zero_width_space() { 73 | // Verify that the zero width space (U+200B) is in the HashSet 74 | // This is a common invisible character 75 | let zero_width_space = char::from_u32(0x200B).unwrap(); 76 | assert!(INVISIBLE_CHARS.contains(&zero_width_space)); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ciphey" 3 | repository = "https://github.com/bee-san/ciphey" 4 | version = "0.12.0" 5 | edition = "2021" 6 | description = "Automated decoding tool, Ciphey but in Rust" 7 | license = "MIT" 8 | 9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 10 | 11 | [lib] 12 | name = "ciphey" 13 | path = "src/lib.rs" 14 | bench = false 15 | 16 | [[bin]] 17 | name = "ciphey" 18 | path = "src/main.rs" 19 | bench = false 20 | 21 | # Please keep this list in alphabetical order 22 | [dependencies] 23 | ansi_term = "0.12.1" 24 | chrono = "0.4.42" 25 | cipher_identifier = "0.2.0" 26 | clap = {version = "4.5.53", features = ["derive"]} 27 | colored = "3.0.0" 28 | crossbeam = "0.8" 29 | dirs = "6.0.0" 30 | env_logger = "0.11.8" 31 | gibberish-or-not = "5.0.7" 32 | human-panic = "2.0.4" 33 | include_dir = "0.7.3" 34 | lazy-regex = "3.4.2" 35 | lazy_static = "1.4.0" 36 | lemmeknow = "0.8.0" 37 | log = "0.4" 38 | memmap2 = "0.9.9" 39 | num = "0.4" 40 | once_cell = "1.21.3" 41 | proc-macro2 = "1.0.103" # Required due to https://github.com/rust-lang/rust/issues/113152 42 | rayon = "1.11.0" 43 | regex = "1.12.2" 44 | rpassword = "7.4.0" 45 | rusqlite = { version = "0.37", features = ["bundled"] } 46 | serde = { version = "1.0.228", features = ["derive"] } 47 | serde_derive = "1.0.197" 48 | serde_json = "1.0" 49 | serial_test = "3.2.0" 50 | text_io = "0.1.13" 51 | toml = "0.9.10" 52 | uuid = "1.19.0" 53 | rand = "0.9.2" # For generating random values 54 | 55 | # Dependencies used for decoding 56 | base64 = "0.22.1" 57 | base65536 = "1.0.1" 58 | base91 = "0.1.0" 59 | bs58 = "0.5.0" 60 | data-encoding = "2.9.0" 61 | urlencoding = "2.1.3" 62 | z85 = "3.0.5" 63 | brainfuck-exe = { version = "0.2.4", default-features = false } 64 | dashmap = "6.1.0" 65 | 66 | # Dev dependencies 67 | [dev-dependencies] 68 | cargo-nextest = "0.9.115" 69 | criterion = "0.8.1" 70 | 71 | [profile.release] 72 | lto = "fat" 73 | panic = "abort" 74 | strip = "symbols" 75 | codegen-units = 1 76 | 77 | # The profile that 'cargo dist' will build with 78 | [profile.dist] 79 | inherits = "release" 80 | 81 | [[bench]] 82 | name = "benchmark_crackers" 83 | harness = false 84 | 85 | [[bench]] 86 | name = "benchmark_decoders" 87 | harness = false 88 | 89 | [[bench]] 90 | name = "benchmark_whole_program" 91 | harness = false 92 | 93 | # Config for 'cargo dist' 94 | [workspace.metadata.dist] 95 | # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) 96 | cargo-dist-version = "0.1.0" 97 | # CI backends to support (see 'cargo dist generate-ci') 98 | ci = ["github"] 99 | # The installers to generate for each app 100 | installers = [] 101 | # Target platforms to build apps for (Rust target-triple syntax) 102 | targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"] 103 | -------------------------------------------------------------------------------- /src/checkers/human_checker.rs: -------------------------------------------------------------------------------- 1 | use crate::checkers::checker_result::CheckResult; 2 | use crate::cli_pretty_printing::human_checker_check; 3 | use crate::config::get_config; 4 | use crate::storage::database; 5 | use crate::{cli_pretty_printing, timer}; 6 | use dashmap::DashSet; 7 | use std::sync::atomic::{AtomicBool, Ordering}; 8 | use std::sync::OnceLock; 9 | use text_io::read; 10 | 11 | static SEEN_PROMPTS: OnceLock> = OnceLock::new(); 12 | // if human checker is called, we set this to true 13 | // so we dont call it again 14 | static HUMAN_CONFIRMED: AtomicBool = AtomicBool::new(false); 15 | 16 | fn get_seen_prompts() -> &'static DashSet { 17 | SEEN_PROMPTS.get_or_init(DashSet::new) 18 | } 19 | 20 | /// The Human Checker asks humans if the expected plaintext is real plaintext 21 | /// We can use all the automated checkers in the world, but sometimes they get false positives 22 | /// Humans have the last say. 23 | /// TODO: Add a way to specify a list of checkers to use in the library. This checker is not library friendly! 24 | // compile this if we are not running tests 25 | pub fn human_checker(input: &CheckResult) -> bool { 26 | // Check if a human has already confirmed a result 27 | if HUMAN_CONFIRMED.load(Ordering::Acquire) { 28 | return true; 29 | } 30 | timer::pause(); 31 | // wait instead of get so it waits for config being set 32 | let config = get_config(); 33 | // We still call human checker, just if config is false we return True 34 | if !config.human_checker_on || config.api_mode { 35 | timer::resume(); 36 | return true; 37 | } 38 | 39 | // Check if we've already prompted for this text 40 | let prompt_key = format!("{}{}", input.description, input.text); 41 | if !get_seen_prompts().insert(prompt_key) { 42 | return true; // Return true to allow the search to continue 43 | } 44 | human_checker_check(&input.description, &input.text); 45 | 46 | let reply: String = read!("{}\n"); 47 | cli_pretty_printing::success(&format!("DEBUG: Human checker received reply: '{}'", reply)); 48 | let result = reply.to_ascii_lowercase().starts_with('y'); 49 | // If the user confirmed, set the atomic boolean to true 50 | if result { 51 | HUMAN_CONFIRMED.store(true, Ordering::Release); 52 | cli_pretty_printing::success( 53 | "DEBUG: Human confirmed a result, future checks will be skipped", 54 | ); 55 | } 56 | timer::resume(); 57 | 58 | cli_pretty_printing::success(&format!("DEBUG: Human checker returning: {}", result)); 59 | 60 | if !result { 61 | let fd_result = database::insert_human_rejection(uuid::Uuid::new_v4(), &input.text, input); 62 | match fd_result { 63 | Ok(_) => (), 64 | Err(e) => { 65 | cli_pretty_printing::warning(&format!( 66 | "DEBUG: Failed to write human checker rejection due to error: {}", 67 | e 68 | )); 69 | } 70 | } 71 | return false; 72 | } 73 | true 74 | } 75 | -------------------------------------------------------------------------------- /src/checkers/password.rs: -------------------------------------------------------------------------------- 1 | use crate::checkers::checker_result::CheckResult; 2 | use gibberish_or_not::{is_password, Sensitivity}; 3 | use lemmeknow::Identifier; 4 | 5 | use crate::checkers::checker_type::{Check, Checker}; 6 | 7 | /// Checks if the input matches a known common password. 8 | pub struct PasswordChecker; 9 | 10 | /// Implementation of the Check trait for PasswordChecker 11 | impl Check for Checker { 12 | fn new() -> Self { 13 | Checker { 14 | name: "Password Checker", 15 | description: "Checks if the input exactly matches a known common password", 16 | link: "https://crates.io/crates/gibberish-or-not", 17 | tags: vec!["password", "security"], 18 | expected_runtime: 0.01, 19 | popularity: 1.0, 20 | lemmeknow_config: Identifier::default(), 21 | sensitivity: Sensitivity::Medium, 22 | enhanced_detector: None, 23 | _phantom: std::marker::PhantomData, 24 | } 25 | } 26 | 27 | fn check(&self, text: &str) -> CheckResult { 28 | CheckResult { 29 | is_identified: is_password(text), 30 | text: text.to_string(), 31 | checker_name: self.name, 32 | checker_description: self.description, 33 | description: "Common Password".to_string(), 34 | link: self.link, 35 | } 36 | } 37 | 38 | fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self { 39 | self.sensitivity = sensitivity; 40 | self 41 | } 42 | 43 | fn get_sensitivity(&self) -> Sensitivity { 44 | self.sensitivity 45 | } 46 | } 47 | 48 | #[cfg(test)] 49 | mod tests { 50 | use super::*; 51 | use gibberish_or_not::Sensitivity; 52 | 53 | #[test] 54 | fn test_check_common_password() { 55 | let checker = Checker::::new(); 56 | assert!(checker.check("123456").is_identified); 57 | } 58 | 59 | #[test] 60 | fn test_check_not_password() { 61 | let checker = Checker::::new(); 62 | assert!(!checker.check("not-a-common-password").is_identified); 63 | } 64 | 65 | #[test] 66 | fn test_check_case_sensitive() { 67 | let checker = Checker::::new(); 68 | // Test exact matching with different cases 69 | let original = checker.check("password").is_identified; 70 | let uppercase = checker.check("PASSWORD").is_identified; 71 | assert!(original != uppercase, "Case sensitivity test failed"); 72 | } 73 | 74 | #[test] 75 | fn test_default_sensitivity_is_medium() { 76 | let checker = Checker::::new(); 77 | assert!(matches!(checker.get_sensitivity(), Sensitivity::Medium)); 78 | } 79 | 80 | #[test] 81 | fn test_with_sensitivity_changes_sensitivity() { 82 | let checker = Checker::::new().with_sensitivity(Sensitivity::Low); 83 | assert!(matches!(checker.get_sensitivity(), Sensitivity::Low)); 84 | 85 | let checker = Checker::::new().with_sensitivity(Sensitivity::High); 86 | assert!(matches!(checker.get_sensitivity(), Sensitivity::High)); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /images/decoding.tape: -------------------------------------------------------------------------------- 1 | # VHS documentation 2 | # 3 | # Output: 4 | # Output .gif Create a GIF output at the given 5 | # Output .mp4 Create an MP4 output at the given 6 | # Output .webm Create a WebM output at the given 7 | # 8 | # Require: 9 | # Require Ensure a program is on the $PATH to proceed 10 | # 11 | # Settings: 12 | # Set FontSize Set the font size of the terminal 13 | # Set FontFamily Set the font family of the terminal 14 | # Set Height Set the height of the terminal 15 | # Set Width Set the width of the terminal 16 | # Set LetterSpacing Set the font letter spacing (tracking) 17 | # Set LineHeight Set the font line height 18 | # Set LoopOffset % Set the starting frame offset for the GIF loop 19 | # Set Theme Set the theme of the terminal 20 | # Set Padding Set the padding of the terminal 21 | # Set Framerate Set the framerate of the recording 22 | # Set PlaybackSpeed Set the playback speed of the recording 23 | # Set MarginFill Set the file or color the margin will be filled with. 24 | # Set Margin Set the size of the margin. Has no effect if MarginFill isn't set. 25 | # Set BorderRadius Set terminal border radius, in pixels. 26 | # Set WindowBar Set window bar type. (one of: Rings, RingsRight, Colorful, ColorfulRight) 27 | # Set WindowBarSize Set window bar size, in pixels. Default is 40. 28 | # Set TypingSpeed