├── tests
    ├── test_fixtures
    │   ├── README.md
    │   ├── rot13_base64_hex_with_newline
    │   └── base64_3_times_with_no_new_line
    └── integration_test.rs
├── src
    ├── decoders
    │   ├── README.md
    │   ├── reverse_decoder.rs
    │   ├── interface.rs
    │   ├── braille_decoder.rs
    │   ├── atbash_decoder.rs
    │   └── url_decoder.rs
    ├── cli_input_parser
    │   ├── README.md
    │   └── mod.rs
    ├── config
    │   └── readme.md
    ├── filtration_system
    │   └── README.md
    ├── cli_pretty_printing
    │   ├── README.md
    │   └── tests.rs
    ├── storage
    │   ├── README.md
    │   ├── invisible_chars
    │   │   └── chars.txt
    │   ├── wait_athena_storage.rs
    │   └── mod.rs
    ├── searchers
    │   ├── README.md
    │   ├── search_node.rs
    │   ├── mod.rs
    │   └── bfs.rs
    ├── main.rs
    ├── checkers
    │   ├── checker_result.rs
    │   ├── default_checker.rs
    │   ├── regex_checker.rs
    │   ├── human_checker.rs
    │   ├── password.rs
    │   ├── checker_type.rs
    │   ├── lemmeknow_checker.rs
    │   └── wordlist.rs
    ├── api_library_input_struct.rs
    └── timer
    │   └── mod.rs
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── proposal--large-feature--large-idea-.md
    │   ├── technical-debt.md
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── dependabot.yml
    ├── workflows
    │   ├── dependabot-auto-merge.yml
    │   ├── codespell.yml
    │   ├── stalePRS.yml
    │   └── quickstart.yml
    ├── FUNDING.yml
    ├── release-drafter.yml
    └── build.yml
├── .config
    └── nextest.toml
├── .gitignore
├── docs
    ├── package-managers.md
    ├── changes
    │   ├── 2024-07-10-remove-cipher-mapping.md
    │   ├── 2024-07-10-astar-refactor.md
    │   ├── 2024-03-21-add-vigenere-decoder.md
    │   ├── 2024-07-10-remove-decoder-popularity.md
    │   ├── 2024-07-10-improve-string-pruning.md
    │   ├── 2024-07-01-wordlist-checker.md
    │   ├── 2024-07-02-sensitivity-trait.md
    │   ├── 2024-07-10-wait-athena-checker.md
    │   ├── 2024-03-11-fix-duplicate-human-prompts.md
    │   └── 2024-07-10-astar-simplified-heuristic-rewrite.md
    ├── storage.md
    ├── sensitivity.md
    ├── README.md
    ├── invisible_characters.md
    ├── parallel_astar_implementation_clarifications.md
    ├── astar_decoder_specific_nodes.md
    ├── database_implementation.md
    ├── first_run_implementation_plan.md
    ├── parallel_astar_search.md
    └── parallelization.md
├── justfile
├── Dockerfile
├── images
    ├── README.md
    ├── main_demo.cast
    ├── decoding.tape
    ├── better_demo.cast
    ├── first_run.tape
    ├── lemmeknow.cast
    └── main_demo.svg
├── benches
    ├── benchmark_checkers.rs
    ├── benchmark_crackers.rs
    ├── benchmark_whole_program.rs
    └── benchmark_decoders.rs
├── LICENSE
└── Cargo.toml


/tests/test_fixtures/README.md:
--------------------------------------------------------------------------------
1 | # Test Fixtures
2 | 
3 | A bunch of files to help support testing <3 :)


--------------------------------------------------------------------------------
/tests/test_fixtures/rot13_base64_hex_with_newline:
--------------------------------------------------------------------------------
1 | 52 33 56 32 5a 69 42 32 5a 69 42 75 49 47 64 79 5a 6d 63 68
2 | 


--------------------------------------------------------------------------------
/tests/test_fixtures/base64_3_times_with_no_new_line:
--------------------------------------------------------------------------------
1 | VkZoV2MyUkhiSGRpUjFWbldXMUdlbHBVV1RCSlIxWjFXVEk1YTJGWE5XNWpkejA5


--------------------------------------------------------------------------------
/src/decoders/README.md:
--------------------------------------------------------------------------------
1 | Please read [mod.rs](mod.rs) for the latest up to date documentation.
2 | 
3 | The `interface.rs` defines what each decoder looks like.


--------------------------------------------------------------------------------
/src/cli_input_parser/README.md:
--------------------------------------------------------------------------------
1 | # What is this?
2 | 
3 | Our library takes a struct as an input. This module takes the CLI arguments and parses it into that struct.


--------------------------------------------------------------------------------
/src/config/readme.md:
--------------------------------------------------------------------------------
1 | # Config
2 | 
3 | The Config object is the configuration struct of our library API.
4 | The CLI arguments get parsed into a library config at runtime.
5 | 


--------------------------------------------------------------------------------
/src/filtration_system/README.md:
--------------------------------------------------------------------------------
1 | # This module serves 2 purposes:
2 | 1. Get all the nodes (crackers, decoders)
3 | 2. Apply filters on them and only return the ones that match them.


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/proposal--large-feature--large-idea-.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Proposal (Large Feature, Large Idea)
 3 | about: For a very large feature that could take weeks to implement
 4 | title: ''
 5 | labels: Proposal
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/technical-debt.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Technical Debt
 3 | about: When you want to log technical debt
 4 | title: "[TECHNICAL DEBT]"
 5 | labels: Technical Debt
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | # Why?
11 | 
12 | # How will this affect us?
13 | 
14 | # What can we do to fix this in the future?
15 | 


--------------------------------------------------------------------------------
/src/cli_pretty_printing/README.md:
--------------------------------------------------------------------------------
1 | # What is this?
2 | 
3 | When using the CLI, we want to print to the screen.
4 | 
5 | We want to do somethings like if the answer is a pair of Latitude and Longitude coordinates we'll want to use plural.
6 | 
7 | Or we'll want to print pretty tables, or other things!
8 | 
9 | Thus, we need an entire module for this.


--------------------------------------------------------------------------------
/.config/nextest.toml:
--------------------------------------------------------------------------------
 1 | [test-groups]
 2 | database = { max-threads = 1 }
 3 | 
 4 | [[profile.default.overrides]]
 5 | filter = 'test(perform_cracking)'
 6 | test-group = 'database'
 7 | 
 8 | [[profile.default.overrides]]
 9 | filter = 'test(database::)'
10 | test-group = 'database'
11 | 
12 | [[profile.default.overrides]]
13 | filter = 'package(ciphey) & test(cache)'
14 | test-group = 'database'
15 | 


--------------------------------------------------------------------------------
/src/storage/README.md:
--------------------------------------------------------------------------------
 1 | # What is this?
 2 | 
 3 | Storage is a module which deals with storing things. In the life time of ciphey, we'll want to:
 4 | * Store word lists
 5 | * Dictionaries
 6 | * Crack results, keys, what the plaintext is
 7 | * Other language dictionaries
 8 | 
 9 | 
10 | And more. Storage is a way to access this information, handling errors and caching them to speed up the program.


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | /target/
 4 | 
 5 | # These are backup files generated by rustfmt
 6 | **/*.rs.bk
 7 | 
 8 | /doc
 9 | 
10 | # Added by cargo
11 | 
12 | /target
13 | 
14 | doc/
15 | 
16 | # Added by cargo
17 | #
18 | # already existing elements were commented out
19 | 
20 | #/target
21 | #Cargo.lock
22 | 
23 | /.idea
24 | .aider*
25 | .cursor
26 | 


--------------------------------------------------------------------------------
/src/cli_input_parser/mod.rs:
--------------------------------------------------------------------------------
 1 | /*
 2 | When the user provides CLI input, we need to parse it for:
 3 | - Text or file?
 4 | - Verbose mode to level
 5 | 
 6 | and so on.
 7 | */
 8 | 
 9 | // build new library_input
10 | 
11 | use crate::api_library_input_struct::LibraryInput;
12 | 
13 | /// This creates a new LibraryInput struct and sets it to a default.
14 | /// added _ before name to let clippy know that they aren't used
15 | fn _main() {
16 |     let _options = LibraryInput::default();
17 | }
18 | 


--------------------------------------------------------------------------------
/docs/package-managers.md:
--------------------------------------------------------------------------------
1 | # Packing ciphey
2 | 
3 | Please call the main ciphey program (the CLI) `ciphey_cli` and enable it to be called via `ciphey` in the terminal.
4 | 
5 | This is because `ciphey` is a short name and is probably taken in a package manager already.
6 | 
7 | ## Releases
8 | 
9 | Please base your package on our releases and not our GitHub repo. If you must, please call the package `ciphey_cli_rolling` to ensure people understand that the package updates on a rolling basis (as our GitHub repo updates).


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "cargo" 
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | build-all:
 2 |   cargo build
 3 |   docker build .
 4 | 
 5 | test-all:
 6 |   cargo build
 7 |   cargo check
 8 |   cargo clippy
 9 |   cargo test
10 | 
11 | fix-all:
12 |   git add .
13 |   git commit -m 'Clippy and fmt'
14 |   cargo clippy --fix
15 |   cargo fmt
16 |   cargo nextest run
17 |   git add .
18 |   git commit -m 'Clippy and fmt'
19 | 
20 | test:
21 |   cargo nextest run
22 | 
23 | publish:
24 |   docker buildx build --platform linux/arm/v7,linux/amd64,linux/arm64/v8 -t autumnskerritt/ciphey:latest --push .
25 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot-auto-merge.yml:
--------------------------------------------------------------------------------
 1 | name: Dependabot Auto-merge
 2 | on: pull_request
 3 | 
 4 | permissions:
 5 |   contents: write
 6 |   pull-requests: write
 7 | 
 8 | jobs:
 9 |   dependabot:
10 |     runs-on: ubuntu-latest
11 |     if: ${{ github.actor == 'dependabot[bot]' }}
12 |     steps:
13 |       - name: Enable auto-merge for Dependabot PRs
14 |         run: gh pr merge --auto --merge "$PR_URL"
15 |         env:
16 |           PR_URL: ${{github.event.pull_request.html_url}}
17 |           GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
18 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:alpine as builder
 2 | RUN apk add --no-cache build-base pkgconfig openssl-dev
 3 | ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
 4 | ENV OPENSSL_DIR=/usr
 5 | # Encourage some layer caching here rather then copying entire directory that includes docs to builder container ~CMN
 6 | WORKDIR /app/ciphey
 7 | COPY Cargo.toml Cargo.lock ./
 8 | COPY src/ src/
 9 | COPY benches/ benches/
10 | RUN cargo build --release
11 | 
12 | FROM alpine:3.12
13 | COPY --from=builder /app/ciphey/target/release/ciphey /usr/local/bin/ciphey
14 | ENTRYPOINT [ "/usr/local/bin/ciphey" ]
15 | 


--------------------------------------------------------------------------------
/src/searchers/README.md:
--------------------------------------------------------------------------------
 1 | # What is a searcher?
 2 | 
 3 | > How do you decide what decryptions to do next?
 4 | 
 5 | We use a search algorithm for this.
 6 | 
 7 | Click here:
 8 | https://www.notion.so/b3cdc723444d4aafa30e8c1eb41e2cd9?v=81453058582641b2b744815c37643665
 9 | 
10 | And filter by "Search" to find all of our proposals which relate to searchers. For example, if you want to learn how the A* search algorithm was designed you can find a proposal for it which contains all of the theory and ideas.
11 | 
12 | # Files
13 | `bfs.rs` is our simplest searcher, it's breadth first search!


--------------------------------------------------------------------------------
/.github/workflows/codespell.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |     - pull_request
 3 | 
 4 | jobs:
 5 |     spellcheck:
 6 |         runs-on: ubuntu-latest
 7 |         steps:
 8 |             - name: Check out the repository
 9 |             - uses: actions/checkout@v2
10 | 
11 |             - name: Set up Python
12 |               uses: actions/setup-python@v2
13 |               with:
14 |                   python-version: 3.8
15 | 
16 |             - name: Install codespell with pip
17 |               run: pip install codespell
18 | 
19 |             - name: Fix typos
20 |               run: codespell ./ -w
21 | 
22 |             - name: Push changes
23 |               uses: EndBug/add-and-commit@v7


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: bee-san
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/images/README.md:
--------------------------------------------------------------------------------
 1 | # Steps to make the gifs
 2 | 
 3 | Install asciinema and svg-term-cli.
 4 | 
 5 | Record with asciinema:
 6 | 
 7 | asciinema rec demo.cast
 8 | 
 9 | This records the session in the asciicast v2 plaintext file format (newline-delimited JSON with an initial header object followed by a timestamped event stream of stdin and stdout).
10 | 
11 | Convert the .cast file to .svg with svg-term-cli:
12 | 
13 | svg-term --in demo.cast --out demo.svg --window --width 80 --height 22 --no-optimize
14 | 
15 | You probably want to play around with width and height
16 | window adds a fake OS window around the terminal session
17 | I found that no-optimize fixed some weird font rendering issues on my macOS – not sure why
18 | 


--------------------------------------------------------------------------------
/src/searchers/search_node.rs:
--------------------------------------------------------------------------------
 1 | ///! This is the struct used to design what a search node looks like.
 2 | ///! At each level, we have a node with some text, T.
 3 | ///! And then the edges of that node are the decryption modules.
 4 | 
 5 | /*struct Nodes<V> {
 6 |     /// When we expand the node, we generate children node
 7 |     /// This is an vector of children.
 8 |     children: Vec<Nodes<V>>,
 9 |     /// Value is the text we are using
10 |     value: V
11 |     /// Edges so far enables us to know the decryption route
12 |     /// Because decryptions are edges, we can write the route like:
13 |     /// vec!["Base64", "Base32", "Rot13"] and so on indicating it
14 |     /// started from base64, then base32, and finally rot13.
15 |     edges_so_far: Vec<&str>
16 | }
17 | */


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name-template: 'v$RESOLVED_VERSION 🌈'
 2 | tag-template: 'v$RESOLVED_VERSION'
 3 | categories:
 4 |   - title: '🚀 Features'
 5 |     labels:
 6 |       - 'feature'
 7 |       - 'enhancement'
 8 |   - title: '🐛 Bug Fixes'
 9 |     labels:
10 |       - 'fix'
11 |       - 'bugfix'
12 |       - 'bug'
13 |   - title: '🧰 Maintenance'
14 |     label: 'chore'
15 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
16 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks.
17 | version-resolver:
18 |   major:
19 |     labels:
20 |       - 'major'
21 |   minor:
22 |     labels:
23 |       - 'minor'
24 |   patch:
25 |     labels:
26 |       - 'patch'
27 |   default: patch
28 | template: |
29 |   ## Changes
30 | 
31 |   $CHANGES
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Additional context**
32 | Add any other context about the problem here.
33 | 


--------------------------------------------------------------------------------
/benches/benchmark_checkers.rs:
--------------------------------------------------------------------------------
 1 | use ciphey::checkers::athena::Athena;
 2 | use ciphey::checkers::checker_type::{Check, Checker};
 3 | use ciphey::checkers::CheckerTypes;
 4 | use ciphey::decoders::base64_decoder::Base64Decoder;
 5 | use ciphey::decoders::interface::{Crack, Decoder};
 6 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 7 | 
 8 | pub fn criterion_benchmark(c: &mut Criterion) {
 9 |     let decode_base64 = Decoder::<Base64Decoder>::new();
10 |     let athena_checker = Checker::<Athena>::new();
11 |     let checker = CheckerTypes::CheckAthena(athena_checker);
12 |     c.bench_function("base64 successful decoding", |b| {
13 |         b.iter(|| decode_base64.crack(black_box("aGVsbG8gd29ybGQ="), &checker))
14 |     });
15 | }
16 | 
17 | criterion_group!(benches, criterion_benchmark);
18 | criterion_main!(benches);
19 | 


--------------------------------------------------------------------------------
/.github/workflows/stalePRS.yml:
--------------------------------------------------------------------------------
 1 | name: 'Handle stale PRs'
 2 | on:
 3 |     schedule:
 4 |         - cron: '30 7 * * 1-5'
 5 | 
 6 | jobs:
 7 |     stale:
 8 |         runs-on: ubuntu-latest
 9 |         steps:
10 |             - uses: actions/stale@v4
11 |               with:
12 |                   only: pulls
13 |                   stale-pr-message: "This PR hasn't seen activity in 2 weeks! Should it be merged, closed, or worked on further? If you want to keep it open, post a comment or remove the `stale` label – otherwise this will be closed in another week."
14 |                   close-pr-message: 'This PR was closed due to 1 month of inactivity. Feel free to reopen it if still relevant.'
15 |                   days-before-pr-stale: 14
16 |                   days-before-pr-close: 30
17 |                   stale-issue-label: stale
18 |                   stale-pr-label: stale


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use ciphey::cli::parse_cli_args;
 2 | use ciphey::cli_pretty_printing::{program_exiting_successful_decoding, success};
 3 | use ciphey::perform_cracking;
 4 | 
 5 | fn main() {
 6 |     // Turn CLI arguments into a library object
 7 |     let (text, config) = parse_cli_args();
 8 |     let result = perform_cracking(&text, config);
 9 |     success(&format!(
10 |         "DEBUG: main.rs - Result from perform_cracking: {:?}",
11 |         result.is_some()
12 |     ));
13 |     match result {
14 |         // TODO: As result have array of CrackResult used,
15 |         // we can print in better way with more info
16 |         Some(result) => {
17 |             success(&format!(
18 |                 "DEBUG: main.rs - Got successful result with {} decoders in path",
19 |                 result.path.len()
20 |             ));
21 |             program_exiting_successful_decoding(result);
22 |         }
23 |         None => {
24 |             success("DEBUG: main.rs - Got None result, calling failed_to_decode");
25 |             ciphey::cli_pretty_printing::failed_to_decode()
26 |         }
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Bee @bee-san on GitHub
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/images/main_demo.cast:
--------------------------------------------------------------------------------
 1 | {"version": 2, "width": 123, "height": 45, "timestamp": 1672149835, "env": {"SHELL": "/bin/zsh", "TERM": "xterm-256color"}}
 2 | [0.111327, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
 3 | [0.111394, "o", "\u001b[?1h\u001b=\u001b[?2004h"]
 4 | [0.940616, "o", "\u001b[7mciphey -t 'LJIVE222KFJGUWSRJZ2FUUKSNNNFCTTLLJIVE5C2KFJGWWSRKJVVUUKOORNFCUTLLJIVE222KFHHIWSRKJVVUUKSNNNEOUTULJIU4222KFJGW\u001b[7mW\u001b[7mSRJZ2FUUKONNNFCTTKLJIU45C2KFJGWWSHJZVVUR2SORNFCUTLLJIVE222I5JHIWSRKJVVUR2ONJNEOTTULJIVE222KFJGWWSRJZ2FUUKSNNNFCTTLLJIU45C2\u001b[7mK\u001b[7mFHGWWSRJZVFUUKSHU======' -d\u001b[27m\u001b[K"]
 5 | [1.697765, "o", "\r\r\n"]
 6 | [2.213287, "o", "\r\n🥳 ciphey has decoded 205 times times.\r\nIf you would have used Ciphey, it would have taken you 41 seconds\r\n\r\n"]
 7 | [2.225368, "o", "The plaintext is: \r\n\u001b[1;33mhello, world!\u001b[0m\r\nand the decoders used are \u001b[1;33mBase32 → Caesar Cipher → Base64 → Binary\u001b[0m\r\n"]
 8 | [2.237779, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
 9 | [2.237852, "o", "\u001b[?1h\u001b="]
10 | [2.237875, "o", "\u001b[?2004h"]
11 | [5.440228, "o", "\u001b[?2004l\r\r\n"]
12 | 


--------------------------------------------------------------------------------
/benches/benchmark_crackers.rs:
--------------------------------------------------------------------------------
 1 | use ciphey::checkers::athena::Athena;
 2 | use ciphey::checkers::checker_type::{Check, Checker};
 3 | use ciphey::checkers::CheckerTypes;
 4 | use ciphey::config::{set_global_config, Config};
 5 | use ciphey::decoders::base64_decoder::Base64Decoder;
 6 | use ciphey::decoders::interface::{Crack, Decoder};
 7 | use criterion::{black_box, criterion_group, criterion_main, Criterion};
 8 | use env_logger::Builder;
 9 | use log::LevelFilter;
10 | 
11 | pub fn criterion_benchmark(c: &mut Criterion) {
12 |     // Initialize logger with only error level to suppress debug messages
13 |     let mut builder = Builder::new();
14 |     builder.filter_level(LevelFilter::Error);
15 |     builder.init();
16 | 
17 |     // Setup global config to suppress output
18 |     let mut config = Config::default();
19 |     config.api_mode = true;
20 |     config.verbose = 0;
21 |     set_global_config(config);
22 | 
23 |     let decode_base64 = Decoder::<Base64Decoder>::new();
24 |     let athena_checker = Checker::<Athena>::new();
25 |     let checker = CheckerTypes::CheckAthena(athena_checker);
26 |     c.bench_function("base64 successful decoding", |b| {
27 |         b.iter(|| decode_base64.crack(black_box("aGVsbG8gd29ybGQ="), &checker))
28 |     });
29 | }
30 | 
31 | criterion_group!(benches, criterion_benchmark);
32 | criterion_main!(benches);
33 | 


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-remove-cipher-mapping.md:
--------------------------------------------------------------------------------
 1 | # Change: Remove CIPHER_MAPPING from helper_functions
 2 | 
 3 | ## Purpose
 4 | Remove the incorrect mapping between Cipher Identifier's cipher names and ciphey decoder names. The mapping was inaccurate, particularly with "fractionatedMorse" being incorrectly mapped to "morseCode" when they are different encoding schemes.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Removes incorrect mappings that could lead to misidentification of ciphers
 9 | - Simplifies the code by directly using the first result from Cipher Identifier
10 | - Eliminates potential confusion between different cipher types
11 | 
12 | ### Disadvantages
13 | - No longer filters cipher types based on available decoders
14 | - May return cipher types that don't have corresponding decoders in ciphey
15 | 
16 | ## Technical Implementation
17 | - Removed the `CIPHER_MAPPING` static variable and its documentation
18 | - Modified the `get_cipher_identifier_score` function to return the first result from Cipher Identifier instead of checking against the mapping
19 | - Verified that all tests still pass after the changes
20 | 
21 | ## Future Improvements
22 | - Consider implementing a more accurate mapping if needed in the future
23 | - Potentially add a check to verify if ciphey has a decoder for the identified cipher type
24 | - Could add a more sophisticated scoring mechanism for cipher identification 


--------------------------------------------------------------------------------
/src/checkers/checker_result.rs:
--------------------------------------------------------------------------------
 1 | use super::checker_type::Checker;
 2 | 
 3 | /// The checkerResult struct is used to store the results of a checker.
 4 | pub struct CheckResult {
 5 |     /// If our checkers return success, we change this bool to True
 6 |     pub is_identified: bool,
 7 |     /// text is the text before we check it.
 8 |     // we can make this &'text str
 9 |     // but then crack requires lifetime annotations.
10 |     pub text: String,
11 |     /// Description of the checked text.
12 |     pub description: String,
13 |     /// Name of the Checker we are using
14 |     pub checker_name: &'static str,
15 |     /// Description of the Checker we are using
16 |     pub checker_description: &'static str,
17 |     /// Link to more info about checker
18 |     pub link: &'static str,
19 | }
20 | 
21 | /// To save time we have a default
22 | /// for checkResult in case we fail
23 | /// I do not believe the checker is important if failed
24 | /// as we will not use it. To save time we will return a default
25 | /// checker.
26 | impl CheckResult {
27 |     /// Creates a default CheckResult
28 |     pub fn new<Type>(checker_used: &Checker<Type>) -> CheckResult {
29 |         CheckResult {
30 |             is_identified: false,
31 |             text: "".to_string(),
32 |             checker_name: checker_used.name,
33 |             checker_description: checker_used.description,
34 |             description: "".to_string(),
35 |             link: checker_used.link,
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-astar-refactor.md:
--------------------------------------------------------------------------------
 1 | # Change: AStar Refactoring and String Quality Enhancement
 2 | 
 3 | ## Purpose
 4 | Refactor the AStar search implementation to improve code organization and enhance string quality assessment by filtering out strings with high percentages of invisible characters.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Improved code organization with helper functions in a separate module
 9 | - Better memory efficiency by quickly rejecting strings with >50% invisible characters
10 | - Enhanced maintainability through clearer separation of concerns
11 | - Easier testing of individual helper functions
12 | 
13 | ### Disadvantages
14 | - Slight increase in module complexity with an additional file
15 | - Potential for minor performance overhead from cross-module function calls
16 | 
17 | ## Technical Implementation
18 | - Split AStar implementation into two files:
19 |   - `astar.rs`: Core A* search algorithm implementation
20 |   - `helper_functions.rs`: Supporting functions for heuristics, quality assessment, and statistics
21 | - Enhanced `calculate_string_quality` function to immediately reject strings with >50% invisible characters
22 | - Added a new test case to verify the invisible character filtering functionality
23 | - Updated module imports and exports in `mod.rs`
24 | 
25 | ## Future Improvements
26 | - Persist decoder success statistics to disk for learning across sessions
27 | - Further optimize string quality assessment with more sophisticated language detection
28 | - Consider moving more common utility functions to the helper module for reuse by other search algorithms 


--------------------------------------------------------------------------------
/docs/changes/2024-03-21-add-vigenere-decoder.md:
--------------------------------------------------------------------------------
 1 | # Change: Add Vigenère Cipher Decoder
 2 | 
 3 | ## Purpose
 4 | Implement a Vigenère cipher decoder to expand ciphey' classical cipher decoding capabilities. This decoder will automatically detect and break Vigenère encrypted text without requiring a key, making it valuable for cryptanalysis and historical cipher decoding.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Implements sophisticated frequency analysis for automated key length detection
 9 | - Uses Index of Coincidence (IoC) for reliable key length determination
10 | - Employs statistical analysis to break the cipher without requiring the key
11 | - Handles both known-key and unknown-key scenarios
12 | 
13 | ### Disadvantages
14 | - Computationally more intensive than simple substitution ciphers
15 | - May produce false positives with very short texts
16 | - Effectiveness depends on text length and language characteristics
17 | 
18 | ## Technical Implementation
19 | - Added Vigenère decoder module with key length detection using IoC
20 | - Implemented frequency analysis for automated key discovery
21 | - Added comprehensive test suite with example ciphertexts
22 | - Integrated with ciphey' existing decoder infrastructure
23 | - Popularity score set to 0.8 reflecting its historical significance
24 | 
25 | ## Future Improvements
26 | - Add support for multiple languages beyond English
27 | - Implement parallel processing for faster key space exploration
28 | - Add option to specify known key length or partial key
29 | - Enhance accuracy for very short ciphertexts
30 | - Add support for variant ciphers (Beaufort, Gronsfeld) 


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-remove-decoder-popularity.md:
--------------------------------------------------------------------------------
 1 | # Change: Remove get_decoder_popularity Function
 2 | 
 3 | ## Purpose
 4 | Remove the redundant `get_decoder_popularity` function from `helper_functions.rs` since decoders already have a `popularity` attribute in their implementation. This eliminates duplication and ensures that popularity values are maintained in a single location.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Eliminates redundant code that duplicated popularity values
 9 | - Simplifies maintenance by having popularity values defined only in the decoder implementations
10 | - Reduces the risk of inconsistencies between the function and the actual decoder attributes
11 | 
12 | ### Disadvantages
13 | - The `generate_heuristic` function no longer has direct access to the popularity values
14 | - Using success rate as a proxy for popularity may not perfectly match the original behavior
15 | 
16 | ## Technical Implementation
17 | - Removed the `get_decoder_popularity` function from `helper_functions.rs`
18 | - Modified the `generate_heuristic` function to use the decoder's success rate as a proxy for popularity
19 | - Updated tests to verify that success rate affects the heuristic calculation
20 | - Removed the now-obsolete `test_popularity_affects_heuristic` test
21 | 
22 | ## Future Improvements
23 | - Consider modifying the `CrackResult` struct to include the decoder's popularity attribute
24 | - Explore ways to directly access the decoder's popularity attribute in the `generate_heuristic` function
25 | - Evaluate whether success rate is an appropriate proxy for popularity or if another approach would be better 


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-improve-string-pruning.md:
--------------------------------------------------------------------------------
 1 | # Change: Improve String Pruning for Low-Quality Inputs
 2 | 
 3 | ## Purpose
 4 | Enhance the pruning mechanism to skip decoding of low-quality strings, which improves efficiency by avoiding wasted computation on strings that are unlikely to produce meaningful results.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Reduces computational resources spent on strings unlikely to yield useful results
 9 | - Speeds up the overall decoding process by focusing on higher-quality candidates
10 | - Prevents the search algorithm from exploring unproductive paths
11 | - Improves memory usage by pruning low-quality strings early
12 | 
13 | ### Disadvantages
14 | - May occasionally reject valid encodings that have unusual characteristics
15 | - Requires careful tuning of thresholds to balance efficiency and thoroughness
16 | - Adds additional computation for quality checks (though this is minimal compared to the savings)
17 | 
18 | ## Technical Implementation
19 | - Enhanced the `check_if_string_cant_be_decoded` function to consider multiple quality factors:
20 |   - String length (rejects strings with 2 or fewer characters)
21 |   - Non-printable character ratio (rejects strings with >30% non-printable characters)
22 |   - Overall string quality (rejects strings with quality score <0.2)
23 | - Added comprehensive tests to verify the pruning behavior
24 | - Updated documentation to explain the rationale behind each pruning criterion
25 | 
26 | ## Future Improvements
27 | - Fine-tune the thresholds based on real-world usage data
28 | - Consider adding more sophisticated quality metrics (e.g., entropy, character distribution)
29 | - Implement adaptive thresholds that adjust based on the search context
30 | - Add logging to track how many strings are being pruned and why 


--------------------------------------------------------------------------------
/docs/changes/2024-07-01-wordlist-checker.md:
--------------------------------------------------------------------------------
 1 | # Change: Add Wordlist Checker
 2 | 
 3 | ## Purpose
 4 | Implement a wordlist checker that performs exact matching against a user-provided list of words. This allows users to check if the input text exactly matches any word in their custom wordlist, which is useful for targeted decoding tasks where the expected output is known to be within a specific set of words.
 5 | 
 6 | ## Trade-offs
 7 | 
 8 | ### Advantages
 9 | - Provides exact matching against custom wordlists
10 | - Efficient O(1) lookups using HashSet
11 | - Memory-mapped file handling for large wordlists (>10MB)
12 | - Takes precedence over other checkers when specified, allowing for targeted checking
13 | - Supports both CLI argument and config file specification
14 | 
15 | ### Disadvantages
16 | - Requires additional memory to store the wordlist
17 | - Only performs exact matching (no partial or fuzzy matching)
18 | - Case-sensitive matching only
19 | - No support for multiple wordlists
20 | 
21 | ## Technical Implementation
22 | - Added `wordlist_path` and `wordlist` fields to the `Config` struct
23 | - Implemented `load_wordlist` function using memory mapping for large files
24 | - Created a new `WordlistChecker` that performs exact matching against the wordlist
25 | - Updated Athena checker to prioritize wordlist checking when a wordlist is provided
26 | - Added `--wordlist` CLI argument that takes precedence over config file
27 | - Updated library API to accept pre-loaded wordlists
28 | 
29 | ## Future Improvements
30 | - Add support for case-insensitive matching
31 | - Implement partial matching options
32 | - Support multiple wordlist files
33 | - Add progress indicator for loading large wordlists
34 | - Implement wordlist caching
35 | - Add support for alternative wordlist formats (CSV, JSON, etc.) 


--------------------------------------------------------------------------------
/src/storage/invisible_chars/chars.txt:
--------------------------------------------------------------------------------
 1 | U+0009 CHARACTER TABULATION 	
 2 | U+0020 SPACE 	
 3 | U+00A0 NO-BREAK SPACE 	
 4 | U+00AD SOFT HYPHEN 	
 5 | U+034F COMBINING GRAPHEME JOINER 	
 6 | U+061C ARABIC LETTER MARK 	
 7 | U+115F HANGUL CHOSEONG FILLER 	
 8 | U+1160 HANGUL JUNGSEONG FILLER 	
 9 | U+17B4 KHMER VOWEL INHERENT AQ 	
10 | U+17B5 KHMER VOWEL INHERENT AA 	
11 | U+180E MONGOLIAN VOWEL SEPARATOR 	
12 | U+2000 EN QUAD 	
13 | U+2001 EM QUAD 	
14 | U+2002 EN SPACE 	
15 | U+2003 EM SPACE 	
16 | U+2004 THREE-PER-EM SPACE 	
17 | U+2005 FOUR-PER-EM SPACE 	
18 | U+2006 SIX-PER-EM SPACE 	
19 | U+2007 FIGURE SPACE 	
20 | U+2008 PUNCTUATION SPACE 	
21 | U+2009 THIN SPACE 	
22 | U+200A HAIR SPACE 	
23 | U+200B ZERO WIDTH SPACE 	
24 | U+200C ZERO WIDTH NON-JOINER 	
25 | U+200D ZERO WIDTH JOINER 	
26 | U+200E LEFT-TO-RIGHT MARK 	
27 | U+200F RIGHT-TO-LEFT MARK 	
28 | U+202F NARROW NO-BREAK SPACE 	
29 | U+205F MEDIUM MATHEMATICAL SPACE 	
30 | U+2060 WORD JOINER 	
31 | U+2061 FUNCTION APPLICATION 	
32 | U+2062 INVISIBLE TIMES 	
33 | U+2063 INVISIBLE SEPARATOR 	
34 | U+2064 INVISIBLE PLUS 	
35 | U+206A INHIBIT SYMMETRIC SWAPPING 	
36 | U+206B ACTIVATE SYMMETRIC SWAPPING 	
37 | U+206C INHIBIT ARABIC FORM SHAPING 	
38 | U+206D ACTIVATE ARABIC FORM SHAPING 	
39 | U+206E NATIONAL DIGIT SHAPES 	
40 | U+206F NOMINAL DIGIT SHAPES 	
41 | U+3000 IDEOGRAPHIC SPACE 	
42 | U+2800 BRAILLE PATTERN BLANK 	
43 | U+3164 HANGUL FILLER 	
44 | U+FEFF ZERO WIDTH NO-BREAK SPACE 	
45 | U+FFA0 HALFWIDTH HANGUL FILLER 	
46 | U+1D159 MUSICAL SYMBOL NULL NOTEHEAD 	
47 | U+1D173 MUSICAL SYMBOL BEGIN BEAM 	
48 | U+1D174 MUSICAL SYMBOL END BEAM 	
49 | U+1D175 MUSICAL SYMBOL BEGIN TIE 	
50 | U+1D176 MUSICAL SYMBOL END TIE 	
51 | U+1D177 MUSICAL SYMBOL BEGIN SLUR 	
52 | U+1D178 MUSICAL SYMBOL END SLUR 	
53 | U+1D179 MUSICAL SYMBOL BEGIN PHRASE 	
54 | U+1D17A MUSICAL SYMBOL END PHRASE


--------------------------------------------------------------------------------
/src/checkers/default_checker.rs:
--------------------------------------------------------------------------------
 1 | use gibberish_or_not::Sensitivity;
 2 | use lemmeknow::Identifier;
 3 | 
 4 | use super::{
 5 |     checker_result::CheckResult,
 6 |     checker_type::{Check, Checker},
 7 | };
 8 | 
 9 | /// The default checker is used to check if the text is plaintext
10 | /// Based on what the ciphey team has found to be the best checker.
11 | pub struct DefaultChecker;
12 | 
13 | impl Check for Checker<DefaultChecker> {
14 |     fn new() -> Self {
15 |         Checker {
16 |             name: "Template checker",
17 |             description: "This is a default template checker. If you're seeing this, it's an error. Please contact us on Discord http://discord.skerritt.blog",
18 |             link: "http://discord.skerritt.blog",
19 |             tags: vec![],
20 |             expected_runtime: 0.0,
21 |             popularity: 0.0,
22 |             lemmeknow_config: Identifier::default(),
23 |             sensitivity: Sensitivity::Medium, // Default to Medium sensitivity
24 |             enhanced_detector: None,
25 |             _phantom: std::marker::PhantomData,
26 |         }
27 |     }
28 | 
29 |     fn check(&self, _text: &str) -> CheckResult {
30 |         CheckResult::new(self)
31 |     }
32 | 
33 |     fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
34 |         self.sensitivity = sensitivity;
35 |         self
36 |     }
37 | 
38 |     fn get_sensitivity(&self) -> Sensitivity {
39 |         self.sensitivity
40 |     }
41 | }
42 | 
43 | #[cfg(test)]
44 | mod tests {
45 |     use crate::checkers::{
46 |         checker_result::CheckResult,
47 |         checker_type::{Check, Checker},
48 |         default_checker::DefaultChecker,
49 |     };
50 | 
51 |     #[test]
52 |     fn default_checker_works() {
53 |         let checker = Checker::<DefaultChecker>::new();
54 |         let checker_result = CheckResult::new(&checker);
55 |         assert!(!checker_result.is_identified);
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/docs/storage.md:
--------------------------------------------------------------------------------
 1 | # Storage Module
 2 | 
 3 | The storage module provides reusable data structures and constants that are used across the ciphey project.
 4 | 
 5 | ## Contents
 6 | 
 7 | ### English Letter Frequencies
 8 | 
 9 | The `ENGLISH_FREQS` constant provides the frequency distribution of letters in the English language. This is used for frequency analysis in various decoders, such as the Vigenere decoder.
10 | 
11 | ```rust
12 | pub const ENGLISH_FREQS: [f64; 26] = [
13 |     0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G
14 |     0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N
15 |     0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U
16 |     0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z
17 | ];
18 | ```
19 | 
20 | These values represent the relative frequency of each letter in typical English text, from A to Z. They are used in statistical analysis for breaking classical ciphers.
21 | 
22 | ### Invisible Characters
23 | 
24 | The `INVISIBLE_CHARS` static collection contains a set of invisible Unicode characters that are loaded from a file at runtime. This is used for detecting and handling invisible characters in encoded text.
25 | 
26 | ```rust
27 | pub static INVISIBLE_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
28 |     // Implementation loads characters from a file
29 |     // ...
30 | });
31 | ```
32 | 
33 | The characters are loaded from `src/storage/invisible_chars/chars.txt` and include various whitespace and zero-width characters.
34 | 
35 | ## Usage
36 | 
37 | To use these resources in your code:
38 | 
39 | ```rust
40 | use crate::storage::ENGLISH_FREQS;
41 | use crate::storage::INVISIBLE_CHARS;
42 | 
43 | // Example: Using English frequencies for analysis
44 | fn analyze_text(text: &str) {
45 |     // ...frequency analysis using ENGLISH_FREQS...
46 | }
47 | 
48 | // Example: Checking for invisible characters
49 | fn check_for_invisible(text: &str) -> bool {
50 |     text.chars().any(|c| INVISIBLE_CHARS.contains(&c))
51 | }


--------------------------------------------------------------------------------
/docs/changes/2024-07-02-sensitivity-trait.md:
--------------------------------------------------------------------------------
 1 | # Change: Make Sensitivity an Optional Trait
 2 | 
 3 | ## Purpose
 4 | Implement an optional `SensitivityAware` trait for checkers that use sensitivity for gibberish detection. This separates the sensitivity functionality from the core `Check` trait, allowing checkers like the WordlistChecker to avoid implementing sensitivity-related methods that they don't actually use.
 5 | 
 6 | ## Trade-offs
 7 | 
 8 | ### Advantages
 9 | - Cleaner separation of concerns between core checking functionality and sensitivity handling
10 | - Checkers that don't use sensitivity don't need to implement unused methods
11 | - More accurate representation of which checkers actually use sensitivity
12 | - Reduces code duplication and improves maintainability
13 | - Makes it clearer to developers which checkers support sensitivity adjustment
14 | 
15 | ### Disadvantages
16 | - Requires changes to existing code that assumes all checkers implement sensitivity methods
17 | - Slightly more complex trait hierarchy
18 | - Requires careful handling in composite checkers like Athena
19 | 
20 | ## Technical Implementation
21 | - Created a new `SensitivityAware` trait in `checker_type.rs` with the sensitivity-related methods
22 | - Removed sensitivity methods from the core `Check` trait
23 | - Updated the WordlistChecker to not implement the `SensitivityAware` trait
24 | - Updated the Athena checker to handle both sensitivity-aware and non-sensitivity-aware checkers
25 | - Kept the sensitivity field in the `Checker` struct for backward compatibility
26 | - Added documentation to clarify which checkers use sensitivity
27 | 
28 | ## Future Improvements
29 | - Implement the `SensitivityAware` trait for all checkers that actually use sensitivity
30 | - Add runtime detection of whether a checker implements `SensitivityAware`
31 | - Consider making the sensitivity field optional in the `Checker` struct
32 | - Add helper methods to safely apply sensitivity only to checkers that support it
33 | - Update documentation to clearly indicate which checkers support sensitivity adjustment 


--------------------------------------------------------------------------------
/src/api_library_input_struct.rs:
--------------------------------------------------------------------------------
 1 | /// import general checker
 2 | use crate::checkers::{
 3 |     checker_type::{Check, Checker},
 4 |     default_checker::DefaultChecker,
 5 | };
 6 | use lemmeknow::Identifier;
 7 | use std::collections::HashSet;
 8 | 
 9 | /// Library input is the default API input
10 | /// The CLI turns its arguments into a LibraryInput struct
11 | #[allow(dead_code)]
12 | pub struct LibraryInput<Type> {
13 |     /// The input to be decoded.
14 |     /// Given to us by the user.
15 |     pub encoded_text: String,
16 |     /// A level of verbosity to determine.
17 |     /// How much we print in logs.
18 |     pub verbose: i32,
19 |     /// The checker to use
20 |     pub checker: Checker<Type>,
21 |     /// The lemmeknow config to use
22 |     pub lemmeknow_config: Identifier,
23 |     /// Pre-loaded wordlist (allows library users to provide wordlist directly)
24 |     pub wordlist: Option<HashSet<String>>,
25 | }
26 | 
27 | /// Creates a default lemmeknow config
28 | const LEMMEKNOW_DEFAULT_CONFIG: Identifier = Identifier {
29 |     min_rarity: 0.0,
30 |     max_rarity: 0.0,
31 |     tags: vec![],
32 |     exclude_tags: vec![],
33 |     file_support: false,
34 |     boundaryless: false,
35 | };
36 | 
37 | impl Default for LibraryInput<DefaultChecker> {
38 |     fn default() -> Self {
39 |         LibraryInput {
40 |             encoded_text: String::new(),
41 |             // this will be of type Checker<DefaultChecker>
42 |             verbose: 0,
43 |             checker: Checker::new(),
44 |             lemmeknow_config: LEMMEKNOW_DEFAULT_CONFIG,
45 |             wordlist: None,
46 |         }
47 |     }
48 | }
49 | 
50 | impl<Type> LibraryInput<Type> {
51 |     /// Set a pre-loaded wordlist
52 |     ///
53 |     /// This method is part of the public API for library users who want to provide
54 |     /// a pre-loaded wordlist directly. While it may not be used internally yet,
55 |     /// it's maintained for API compatibility and future use cases.
56 |     #[allow(dead_code)]
57 |     pub fn with_wordlist(mut self, wordlist: HashSet<String>) -> Self {
58 |         self.wordlist = Some(wordlist);
59 |         self
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/checkers/regex_checker.rs:
--------------------------------------------------------------------------------
 1 | use gibberish_or_not::Sensitivity;
 2 | use lemmeknow::Identifier;
 3 | 
 4 | use super::checker_type::{Check, Checker};
 5 | use crate::{checkers::checker_result::CheckResult, config::get_config};
 6 | use log::trace;
 7 | use regex::Regex;
 8 | 
 9 | /// The Regex Checker checks if the text matches a known Regex pattern.
10 | /// This is the struct for it.
11 | pub struct RegexChecker;
12 | 
13 | impl Check for Checker<RegexChecker> {
14 |     fn new() -> Self {
15 |         Checker {
16 |             name: "Regex Checker",
17 |             description: "Uses Regex to check for regex matches, useful for finding cribs.",
18 |             link: "https://github.com/rust-lang/regex",
19 |             tags: vec!["crib", "regex"],
20 |             expected_runtime: 0.01,
21 |             popularity: 1.0,
22 |             lemmeknow_config: Identifier::default(),
23 |             sensitivity: Sensitivity::Medium, // Default to Medium sensitivity
24 |             enhanced_detector: None,
25 |             _phantom: std::marker::PhantomData,
26 |         }
27 |     }
28 | 
29 |     fn check(&self, text: &str) -> CheckResult {
30 |         trace!("Checking {} with regex", text);
31 |         // TODO put this into a lazy static so we don't generate it everytime
32 |         let config = get_config();
33 |         let regex_to_parse = config.regex.clone();
34 |         let re = Regex::new(&regex_to_parse.unwrap()).unwrap();
35 | 
36 |         let regex_check_result = re.is_match(text);
37 |         let mut plaintext_found = false;
38 |         let printed_name = format!("Regex matched: {re}");
39 |         if regex_check_result {
40 |             plaintext_found = true;
41 |         }
42 | 
43 |         CheckResult {
44 |             is_identified: plaintext_found,
45 |             text: text.to_string(),
46 |             checker_name: self.name,
47 |             checker_description: self.description,
48 |             description: printed_name,
49 |             link: self.link,
50 |         }
51 |     }
52 | 
53 |     fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
54 |         self.sensitivity = sensitivity;
55 |         self
56 |     }
57 | 
58 |     fn get_sensitivity(&self) -> Sensitivity {
59 |         self.sensitivity
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/.github/workflows/quickstart.yml:
--------------------------------------------------------------------------------
 1 | # Based on https://github.com/actions-rs/meta/blob/master/recipes/quickstart.md
 2 | #
 3 | # While our "example" application has the platform-specific code,
 4 | # for simplicity we are compiling and testing everything on the Ubuntu environment only.
 5 | # For multi-OS testing see the `cross.yml` workflow.
 6 | 
 7 | on: [push, pull_request]
 8 | 
 9 | name: Test
10 | 
11 | jobs:
12 |   check:
13 |     name: Check
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - name: Checkout sources
17 |         uses: actions/checkout@v2
18 |         with:
19 |           lfs: true
20 | 
21 |       - name: Install stable toolchain
22 |         uses: actions-rs/toolchain@v1
23 |         with:
24 |           profile: minimal
25 |           toolchain: stable
26 |           override: true
27 | 
28 |       - name: Run cargo check
29 |         uses: actions-rs/cargo@v1
30 |         with:
31 |           command: check
32 | 
33 |   test:
34 |     name: Test Suite
35 |     strategy:
36 |       fail-fast: false
37 |       matrix:
38 |         os: [ubuntu-latest, windows-latest, macos-latest]
39 |     runs-on: ubuntu-latest
40 |     steps:
41 |       - name: Checkout sources
42 |         uses: actions/checkout@v2
43 | 
44 |       - name: Install stable toolchain
45 |         uses: actions-rs/toolchain@v1
46 |         with:
47 |           profile: minimal
48 |           toolchain: stable
49 |           override: true
50 | 
51 |       - name: Run cargo test
52 |         uses: actions-rs/cargo@v1
53 |         with:
54 |           command: test
55 | 
56 |   lints:
57 |     name: Lints
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |       - name: Checkout sources
61 |         uses: actions/checkout@v2
62 | 
63 |       - name: Install stable toolchain
64 |         uses: actions-rs/toolchain@v1
65 |         with:
66 |           profile: minimal
67 |           toolchain: stable
68 |           override: true
69 |           components: rustfmt, clippy
70 | 
71 |       - name: Run cargo fmt
72 |         uses: actions-rs/cargo@v1
73 |         with:
74 |           command: fmt
75 |           args: --all -- --check
76 | 
77 |       - name: Run cargo clippy
78 |         uses: actions-rs/cargo@v1
79 |         with:
80 |           command: clippy
81 | 


--------------------------------------------------------------------------------
/docs/sensitivity.md:
--------------------------------------------------------------------------------
 1 | # Sensitivity Levels in Gibberish Detection
 2 | 
 3 | ## Overview
 4 | 
 5 | ciphey uses the `gibberish_or_not` library to detect whether decoded text is meaningful English. This library provides three sensitivity levels to fine-tune gibberish detection:
 6 | 
 7 | ### Low Sensitivity
 8 | - Most strict classification
 9 | - Requires very high confidence to classify text as English
10 | - Best for detecting texts that appear English-like but are actually gibberish
11 | - Used by classical ciphers like Caesar cipher that produce more English-like results
12 | 
13 | ### Medium Sensitivity (Default)
14 | - Balanced approach for general use
15 | - Combines dictionary and n-gram analysis
16 | - Default mode suitable for most applications
17 | - Used by most decoders in ciphey
18 | 
19 | ### High Sensitivity
20 | - Most lenient classification
21 | - Favors classifying text as English
22 | - Best when input is mostly gibberish and any English-like patterns are significant
23 | 
24 | ## Implementation in ciphey
25 | 
26 | In ciphey, different decoders use different sensitivity levels based on their characteristics:
27 | 
28 | 1. **Caesar Cipher**: Uses Low sensitivity because classical ciphers often produce text that can appear English-like even when the shift is incorrect.
29 | 
30 | 2. **Other Decoders**: Use Medium sensitivity by default, which provides a balanced approach for most types of encoded text.
31 | 
32 | ## Customizing Sensitivity
33 | 
34 | Decoders can override the default sensitivity level when needed. The `CheckerTypes` enum provides a `with_sensitivity` method that allows changing the sensitivity level:
35 | 
36 | ```rust
37 | // Example: Using a checker with a custom sensitivity level
38 | let checker_with_sensitivity = checker.with_sensitivity(Sensitivity::High);
39 | let result = checker_with_sensitivity.check(text);
40 | ```
41 | 
42 | ## Technical Details
43 | 
44 | The sensitivity level affects the thresholds used for n-gram analysis and dictionary checks:
45 | 
46 | - **Low Sensitivity**: Stricter thresholds, requiring more evidence to classify text as English
47 | - **Medium Sensitivity**: Balanced thresholds suitable for most applications
48 | - **High Sensitivity**: Lenient thresholds, more likely to classify text as English
49 | 
50 | For more details on how the sensitivity levels work, see the [gibberish_or_not documentation](https://crates.io/crates/gibberish-or-not).


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-wait-athena-checker.md:
--------------------------------------------------------------------------------
 1 | # Change: Add WaitAthena Checker for Collecting Multiple Plaintexts
 2 | 
 3 | ## Purpose
 4 | Implement a variant of the Athena checker that collects all potential plaintexts found during the search instead of exiting immediately when the first plaintext is found. This allows users to see all possible interpretations of their ciphertext, which is particularly useful for ambiguous encodings or when multiple valid plaintexts might exist.
 5 | 
 6 | ## Trade-offs
 7 | ### Advantages
 8 | - Provides users with multiple potential plaintexts instead of just the first one found
 9 | - Allows for more comprehensive analysis of ambiguous ciphertexts
10 | - Maintains compatibility with all existing decoders and checkers
11 | - Simple to use via a single command-line flag (`--top-results`)
12 | - Automatically disables the human checker to avoid interrupting the search process
13 | - Continues searching until the timer expires, maximizing the number of potential plaintexts found
14 | 
15 | ### Disadvantages
16 | - May take longer to complete as it continues searching even after finding valid plaintexts
17 | - Could potentially return false positives along with true plaintexts
18 | - Increases memory usage as all results must be stored until the timer expires
19 | 
20 | ## Technical Implementation
21 | - Created a new `WaitAthena` checker that is a variant of `Athena` but stores results instead of returning immediately
22 | - Implemented a thread-safe storage mechanism using `Mutex` and `lazy_static` to store plaintext results
23 | - Modified the timer module to display all collected plaintext results when the timer expires
24 | - Added a new configuration option (`top_results`) to enable WaitAthena mode
25 | - Added a new command-line flag (`--top-results`) to enable WaitAthena mode
26 | - Updated the library interface to use WaitAthena when the `top_results` option is enabled
27 | - Automatically disabled the human checker when `--top-results` is specified to avoid interrupting the search process
28 | - Modified the search algorithm to continue searching until the timer expires when in top_results mode
29 | 
30 | ## Future Improvements
31 | - Add filtering options for WaitAthena results to reduce false positives
32 | - Implement sorting of results by confidence level or other metrics
33 | - Add an option to save results to a file for later analysis
34 | - Implement deduplication logic if duplicate plaintexts become an issue in practice 


--------------------------------------------------------------------------------
/src/storage/wait_athena_storage.rs:
--------------------------------------------------------------------------------
 1 | use lazy_static::lazy_static;
 2 | use log::{trace, warn};
 3 | use std::sync::Mutex;
 4 | 
 5 | /// Represents a plaintext result with its description, checker name, and decoder name
 6 | #[derive(Debug, Clone)]
 7 | pub struct PlaintextResult {
 8 |     /// The plaintext text
 9 |     pub text: String,
10 |     /// The description of the result
11 |     pub description: String,
12 |     /// The name of the checker used to generate the result
13 |     pub checker_name: String,
14 |     /// The name of the decoder used to generate the result
15 |     pub decoder_name: String,
16 | }
17 | 
18 | lazy_static! {
19 |     static ref PLAINTEXT_RESULTS: Mutex<Vec<PlaintextResult>> = Mutex::new(Vec::new());
20 | }
21 | 
22 | /// Adds a plaintext result to the storage
23 | pub fn add_plaintext_result(
24 |     text: String,
25 |     description: String,
26 |     checker_name: String,
27 |     decoder_name: String,
28 | ) {
29 |     let result = PlaintextResult {
30 |         text: text.clone(),
31 |         description: description.clone(),
32 |         checker_name: checker_name.clone(),
33 |         decoder_name: decoder_name.clone(),
34 |     };
35 | 
36 |     trace!(
37 |         "Adding plaintext result: [{}] {} (decoder: {})",
38 |         checker_name,
39 |         text,
40 |         decoder_name
41 |     );
42 | 
43 |     let mut results = match PLAINTEXT_RESULTS.lock() {
44 |         Ok(guard) => guard,
45 |         Err(poisoned) => {
46 |             warn!("Mutex was poisoned, recovering");
47 |             poisoned.into_inner()
48 |         }
49 |     };
50 | 
51 |     results.push(result);
52 |     trace!("Storage now has {} results", results.len());
53 | }
54 | 
55 | /// Retrieves all plaintext results from the storage
56 | pub fn get_plaintext_results() -> Vec<PlaintextResult> {
57 |     let results = match PLAINTEXT_RESULTS.lock() {
58 |         Ok(guard) => guard,
59 |         Err(poisoned) => {
60 |             warn!("Mutex was poisoned, recovering");
61 |             poisoned.into_inner()
62 |         }
63 |     };
64 | 
65 |     trace!("Retrieving {} plaintext results", results.len());
66 |     results.clone()
67 | }
68 | 
69 | /// Clears all plaintext results from the storage
70 | pub fn clear_plaintext_results() {
71 |     let mut results = match PLAINTEXT_RESULTS.lock() {
72 |         Ok(guard) => guard,
73 |         Err(poisoned) => {
74 |             warn!("Mutex was poisoned, recovering");
75 |             poisoned.into_inner()
76 |         }
77 |     };
78 | 
79 |     trace!("Clearing plaintext results (had {} results)", results.len());
80 |     results.clear();
81 | }
82 | 


--------------------------------------------------------------------------------
/docs/changes/2024-03-11-fix-duplicate-human-prompts.md:
--------------------------------------------------------------------------------
 1 | # Fix Duplicate Human Verification Prompts
 2 | 
 3 | ## Issue
 4 | When running ciphey in top_results mode with parallel A* search, users would sometimes see duplicate human verification prompts for the same plaintext. This occurred because:
 5 | 
 6 | 1. The parallel A* search could discover the same solution path multiple times
 7 | 2. Each discovery would trigger Athena's checker
 8 | 3. The human checker would prompt for verification each time, even for identical results
 9 | 
10 | Example of duplicated prompts:
11 | ```
12 | 🕵️ I think the plaintext is Words.
13 | Possible plaintext: 'hello this text...' (y/N):
14 | ...
15 | 🕵️ I think the plaintext is Words.
16 | Possible plaintext: 'hello this text...' (y/N):
17 | ```
18 | 
19 | ## Root Cause Analysis
20 | The issue stemmed from multiple factors:
21 | 1. Parallel processing in A* search allowing multiple threads to find the same solution
22 | 2. Top_results mode continuing the search after finding a valid result
23 | 3. No deduplication of human verification prompts
24 | 4. State being maintained separately in each Athena checker instance
25 | 
26 | ## Solution
27 | Added prompt deduplication to the human checker using a thread-safe cache:
28 | 
29 | ```rust
30 | use dashmap::DashSet;
31 | use std::sync::OnceLock;
32 | 
33 | static SEEN_PROMPTS: OnceLock<DashSet<String>> = OnceLock::new();
34 | 
35 | fn get_seen_prompts() -> &'static DashSet<String> {
36 |     SEEN_PROMPTS.get_or_init(|| DashSet::new())
37 | }
38 | ```
39 | 
40 | The human checker now checks if it has already prompted for a given plaintext:
41 | ```rust
42 | let prompt_key = format!("{}{}", input.description, input.text);
43 | if !get_seen_prompts().insert(prompt_key) {
44 |     println!("DEBUG: Skipping duplicate human verification prompt");
45 |     return true;  // Return true to allow the search to continue
46 | }
47 | ```
48 | 
49 | Benefits of this approach:
50 | 1. Thread-safe using DashSet
51 | 2. Minimal code changes required
52 | 3. Maintains existing functionality while eliminating duplicates
53 | 4. Works regardless of which code path triggered the verification
54 | 
55 | ## Alternative Approaches Considered
56 | 1. Result deduplication in A* search - Too late, prompts already shown
57 | 2. Modifying Athena checker - More complex, required state management
58 | 3. Disabling parallel processing - Would impact performance
59 | 4. Disabling top_results mode - Would limit functionality
60 | 
61 | The chosen solution provides the best balance of:
62 | - Minimal code changes
63 | - No performance impact
64 | - Preserved functionality
65 | - Clean user experience


--------------------------------------------------------------------------------
/src/timer/mod.rs:
--------------------------------------------------------------------------------
 1 | use crossbeam::channel::{bounded, Receiver};
 2 | use std::sync::atomic::Ordering::Relaxed;
 3 | use std::{
 4 |     sync::atomic::AtomicBool,
 5 |     thread::{self, sleep},
 6 |     time::Duration,
 7 | };
 8 | 
 9 | use crate::cli_pretty_printing::{countdown_until_program_ends, display_top_results};
10 | use crate::config::get_config;
11 | use crate::storage::wait_athena_storage;
12 | 
13 | /// Indicate whether timer is paused
14 | static PAUSED: AtomicBool = AtomicBool::new(false);
15 | 
16 | /// Start the timer with duration in seconds
17 | pub fn start(duration: u32) -> Receiver<()> {
18 |     let (sender, recv) = bounded(1);
19 |     thread::spawn(move || {
20 |         let mut time_spent = 0;
21 | 
22 |         while time_spent < duration {
23 |             if !PAUSED.load(Relaxed) {
24 |                 sleep(Duration::from_secs(1));
25 |                 time_spent += 1;
26 |                 // Some pretty printing support
27 |                 countdown_until_program_ends(time_spent, duration);
28 |             }
29 |         }
30 | 
31 |         // When the timer expires, display all collected plaintext results
32 |         // Only if we're in top_results mode
33 |         let config = get_config();
34 |         log::trace!("Timer expired. top_results mode: {}", config.top_results);
35 | 
36 |         if config.top_results {
37 |             log::info!("Displaying all collected plaintext results");
38 |             filter_and_display_results();
39 |         } else {
40 |             log::info!("Not in top_results mode, skipping display_wait_athena_results()");
41 |         }
42 | 
43 |         // Replace the existing expect with a match that logs errors in case of send failure
44 |         match sender.send(()) {
45 |             Ok(_) => log::debug!("Timer signal sent successfully"),
46 |             Err(e) => {
47 |                 // Just log the error instead of panicking
48 |                 log::warn!(
49 |                     "Failed to send timer signal: {:?}. This is expected in benchmarks.",
50 |                     e
51 |                 );
52 |             }
53 |         }
54 |     });
55 | 
56 |     recv
57 | }
58 | 
59 | /// Filter and display all plaintext results collected by WaitAthena
60 | fn filter_and_display_results() {
61 |     let results = wait_athena_storage::get_plaintext_results();
62 | 
63 |     log::trace!(
64 |         "Retrieved {} results from wait_athena_storage",
65 |         results.len()
66 |     );
67 | 
68 |     // Use the cli_pretty_printing function to display the results
69 |     display_top_results(&results);
70 | }
71 | 
72 | /// Pause timer
73 | pub fn pause() {
74 |     PAUSED.store(true, Relaxed);
75 | }
76 | 
77 | /// Resume timer
78 | pub fn resume() {
79 |     PAUSED.store(false, Relaxed);
80 | }
81 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # ciphey Documentation
 2 | 
 3 | Welcome to the ciphey documentation! This repository contains comprehensive documentation for ciphey, the next-generation automatic decoding and cracking tool.
 4 | 
 5 | ## Table of Contents
 6 | 
 7 | ### General Documentation
 8 | 
 9 | - [ciphey Overview](ciphey_overview.md) - A high-level overview of ciphey, its features, and capabilities
10 | - [Using ciphey](using_ciphey.md) - A comprehensive guide on how to use ciphey, with examples and common use cases
11 | 
12 | ### Technical Documentation
13 | 
14 | - [ciphey Architecture](ciphey_architecture.md) - Detailed explanation of ciphey's internal architecture and components
15 | - [Plaintext Identification](plaintext_identification.md) - How ciphey identifies plaintext and determines when decoding is successful
16 | 
17 | ### Feature-Specific Documentation
18 | 
19 | - [Invisible Characters Detection](invisible_characters.md) - Information about ciphey's capability to detect and handle invisible Unicode characters
20 | - [Package Managers](package-managers.md) - Guidelines for packaging ciphey for different package managers
21 | 
22 | ## About ciphey
23 | 
24 | ciphey is the next generation of decoding tools, built by the same people that brought you [Ciphey](https://github.com/ciphey/ciphey). It's designed to automatically detect and decode various types of encoded or encrypted text, including (but not limited to) Base64, Hexadecimal, Caesar cipher, ROT13, URL encoding, and many more.
25 | 
26 | Key features include:
27 | 
28 | - Significantly faster performance (up to 700% faster than Ciphey)
29 | - Library-first architecture for easy integration
30 | - Advanced search algorithms for efficient decoding
31 | - Built-in timeout mechanism
32 | - Comprehensive documentation and testing
33 | - Support for multi-level encodings
34 | 
35 | ## Getting Started
36 | 
37 | The quickest way to get started with ciphey is to install it via Cargo:
38 | 
39 | ```bash
40 | cargo install ciphey
41 | ```
42 | 
43 | Then use it with the `ciphey` command:
44 | 
45 | ```bash
46 | ciphey "your encoded text here"
47 | ```
48 | 
49 | For more detailed instructions, see the [Using ciphey](using_ciphey.md) guide.
50 | 
51 | ## Contributing
52 | 
53 | Contributions to ciphey are welcome! Whether it's adding new decoders, improving existing ones, enhancing documentation, or fixing bugs, your help is appreciated. Check the [GitHub repository](https://github.com/bee-san/ciphey) for more information on how to contribute.
54 | 
55 | ## Additional Resources
56 | 
57 | - [GitHub Repository](https://github.com/bee-san/ciphey)
58 | - [Discord Server](http://discord.skerritt.blog)
59 | - [Blog Post: Introducing ciphey](https://skerritt.blog/introducing-ciphey/)
60 | - [Ciphey2 Documentation](https://broadleaf-angora-7db.notion.site/Ciphey2-32d5eea5d38b40c5b95a9442b4425710)


--------------------------------------------------------------------------------
/src/storage/mod.rs:
--------------------------------------------------------------------------------
 1 | use once_cell::sync::Lazy;
 2 | use std::collections::HashSet;
 3 | use std::fs;
 4 | use std::path::Path;
 5 | 
 6 | /// Module housing functions for managing SQLite database
 7 | pub mod database;
 8 | /// Module for storing WaitAthena results
 9 | pub mod wait_athena_storage;
10 | 
11 | /// English letter frequency distribution (A-Z)
12 | /// Used for frequency analysis in various decoders
13 | pub const ENGLISH_FREQS: [f64; 26] = [
14 |     0.08167, 0.01492, 0.02782, 0.04253, 0.12702, 0.02228, 0.02015, // A-G
15 |     0.06094, 0.06966, 0.00153, 0.00772, 0.04025, 0.02406, 0.06749, // H-N
16 |     0.07507, 0.01929, 0.00095, 0.05987, 0.06327, 0.09056, 0.02758, // O-U
17 |     0.00978, 0.02360, 0.00150, 0.01974, 0.00074, // V-Z
18 | ];
19 | 
20 | /// Loads invisible character list into a HashSet
21 | pub static INVISIBLE_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
22 |     let mut entries: HashSet<char> = HashSet::new();
23 | 
24 |     // Path to the invisible characters file
25 |     let chars_file_path = Path::new(env!("CARGO_MANIFEST_DIR"))
26 |         .join("src")
27 |         .join("storage")
28 |         .join("invisible_chars")
29 |         .join("chars.txt");
30 | 
31 |     // Read the file content
32 |     if let Ok(content) = fs::read_to_string(&chars_file_path) {
33 |         let content_lines = content.split('\n');
34 |         for line in content_lines {
35 |             if line.is_empty() {
36 |                 continue;
37 |             }
38 |             let unicode_line_split: Vec<&str> = line.split_ascii_whitespace().collect();
39 |             if unicode_line_split.is_empty() {
40 |                 continue;
41 |             }
42 |             let unicode_literal = unicode_line_split[0].trim_start_matches("U+");
43 |             if let Ok(unicode_value) = u32::from_str_radix(unicode_literal, 16) {
44 |                 if let Some(unicode_char) = char::from_u32(unicode_value) {
45 |                     entries.insert(unicode_char);
46 |                 }
47 |             }
48 |         }
49 |     }
50 | 
51 |     entries
52 | });
53 | 
54 | // Rust tests
55 | #[cfg(test)]
56 | mod tests {
57 |     use super::*;
58 | 
59 |     #[test]
60 |     fn test_invisible_chars_loaded() {
61 |         // Verify that the INVISIBLE_CHARS HashSet is not empty
62 |         assert!(!INVISIBLE_CHARS.is_empty());
63 |     }
64 | 
65 |     #[test]
66 |     fn test_invisible_chars_contains_space() {
67 |         // Verify that the space character (U+0020) is in the HashSet
68 |         assert!(INVISIBLE_CHARS.contains(&' '));
69 |     }
70 | 
71 |     #[test]
72 |     fn test_invisible_chars_contains_zero_width_space() {
73 |         // Verify that the zero width space (U+200B) is in the HashSet
74 |         // This is a common invisible character
75 |         let zero_width_space = char::from_u32(0x200B).unwrap();
76 |         assert!(INVISIBLE_CHARS.contains(&zero_width_space));
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [package]
  2 | name = "ciphey"
  3 | repository = "https://github.com/bee-san/ciphey"
  4 | version = "0.12.0"
  5 | edition = "2021"
  6 | description = "Automated decoding tool, Ciphey but in Rust"
  7 | license = "MIT"
  8 | 
  9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 10 | 
 11 | [lib]
 12 | name = "ciphey"
 13 | path = "src/lib.rs"
 14 | bench = false
 15 | 
 16 | [[bin]]
 17 | name = "ciphey"
 18 | path = "src/main.rs"
 19 | bench = false
 20 | 
 21 | # Please keep this list in alphabetical order
 22 | [dependencies]
 23 | ansi_term = "0.12.1"
 24 | chrono = "0.4.42"
 25 | cipher_identifier = "0.2.0"
 26 | clap = {version = "4.5.53", features = ["derive"]}
 27 | colored = "3.0.0"
 28 | crossbeam = "0.8"
 29 | dirs = "6.0.0"
 30 | env_logger = "0.11.8"
 31 | gibberish-or-not = "5.0.7"
 32 | human-panic = "2.0.4"
 33 | include_dir = "0.7.3"
 34 | lazy-regex = "3.4.2"
 35 | lazy_static = "1.4.0"
 36 | lemmeknow = "0.8.0"
 37 | log = "0.4"
 38 | memmap2 = "0.9.9"
 39 | num = "0.4"
 40 | once_cell = "1.21.3"
 41 | proc-macro2 = "1.0.103" # Required due to https://github.com/rust-lang/rust/issues/113152
 42 | rayon = "1.11.0"
 43 | regex = "1.12.2"
 44 | rpassword = "7.4.0"
 45 | rusqlite = { version = "0.37", features = ["bundled"] }
 46 | serde = { version = "1.0.228", features = ["derive"] }
 47 | serde_derive = "1.0.197"
 48 | serde_json = "1.0"
 49 | serial_test = "3.2.0"
 50 | text_io = "0.1.13"
 51 | toml = "0.9.10"
 52 | uuid = "1.19.0"
 53 | rand = "0.9.2"  # For generating random values
 54 | 
 55 | # Dependencies used for decoding
 56 | base64 = "0.22.1"
 57 | base65536 = "1.0.1"
 58 | base91 = "0.1.0"
 59 | bs58 = "0.5.0"
 60 | data-encoding = "2.9.0"
 61 | urlencoding = "2.1.3"
 62 | z85 = "3.0.5"
 63 | brainfuck-exe = { version = "0.2.4", default-features = false }
 64 | dashmap = "6.1.0"
 65 | 
 66 | # Dev dependencies
 67 | [dev-dependencies]
 68 | cargo-nextest = "0.9.115"
 69 | criterion = "0.8.1"
 70 | 
 71 | [profile.release]
 72 | lto = "fat"
 73 | panic = "abort"
 74 | strip = "symbols"
 75 | codegen-units = 1
 76 | 
 77 | # The profile that 'cargo dist' will build with
 78 | [profile.dist]
 79 | inherits = "release"
 80 | 
 81 | [[bench]]
 82 | name = "benchmark_crackers"
 83 | harness = false
 84 | 
 85 | [[bench]]
 86 | name = "benchmark_decoders"
 87 | harness = false
 88 | 
 89 | [[bench]]
 90 | name = "benchmark_whole_program"
 91 | harness = false
 92 | 
 93 | # Config for 'cargo dist'
 94 | [workspace.metadata.dist]
 95 | # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax)
 96 | cargo-dist-version = "0.1.0"
 97 | # CI backends to support (see 'cargo dist generate-ci')
 98 | ci = ["github"]
 99 | # The installers to generate for each app
100 | installers = []
101 | # Target platforms to build apps for (Rust target-triple syntax)
102 | targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]
103 | 


--------------------------------------------------------------------------------
/src/checkers/human_checker.rs:
--------------------------------------------------------------------------------
 1 | use crate::checkers::checker_result::CheckResult;
 2 | use crate::cli_pretty_printing::human_checker_check;
 3 | use crate::config::get_config;
 4 | use crate::storage::database;
 5 | use crate::{cli_pretty_printing, timer};
 6 | use dashmap::DashSet;
 7 | use std::sync::atomic::{AtomicBool, Ordering};
 8 | use std::sync::OnceLock;
 9 | use text_io::read;
10 | 
11 | static SEEN_PROMPTS: OnceLock<DashSet<String>> = OnceLock::new();
12 | // if human checker is called, we set this to true
13 | // so we dont call it again
14 | static HUMAN_CONFIRMED: AtomicBool = AtomicBool::new(false);
15 | 
16 | fn get_seen_prompts() -> &'static DashSet<String> {
17 |     SEEN_PROMPTS.get_or_init(DashSet::new)
18 | }
19 | 
20 | /// The Human Checker asks humans if the expected plaintext is real plaintext
21 | /// We can use all the automated checkers in the world, but sometimes they get false positives
22 | /// Humans have the last say.
23 | /// TODO: Add a way to specify a list of checkers to use in the library. This checker is not library friendly!
24 | // compile this if we are not running tests
25 | pub fn human_checker(input: &CheckResult) -> bool {
26 |     // Check if a human has already confirmed a result
27 |     if HUMAN_CONFIRMED.load(Ordering::Acquire) {
28 |         return true;
29 |     }
30 |     timer::pause();
31 |     // wait instead of get so it waits for config being set
32 |     let config = get_config();
33 |     // We still call human checker, just if config is false we return True
34 |     if !config.human_checker_on || config.api_mode {
35 |         timer::resume();
36 |         return true;
37 |     }
38 | 
39 |     // Check if we've already prompted for this text
40 |     let prompt_key = format!("{}{}", input.description, input.text);
41 |     if !get_seen_prompts().insert(prompt_key) {
42 |         return true; // Return true to allow the search to continue
43 |     }
44 |     human_checker_check(&input.description, &input.text);
45 | 
46 |     let reply: String = read!("{}\n");
47 |     cli_pretty_printing::success(&format!("DEBUG: Human checker received reply: '{}'", reply));
48 |     let result = reply.to_ascii_lowercase().starts_with('y');
49 |     // If the user confirmed, set the atomic boolean to true
50 |     if result {
51 |         HUMAN_CONFIRMED.store(true, Ordering::Release);
52 |         cli_pretty_printing::success(
53 |             "DEBUG: Human confirmed a result, future checks will be skipped",
54 |         );
55 |     }
56 |     timer::resume();
57 | 
58 |     cli_pretty_printing::success(&format!("DEBUG: Human checker returning: {}", result));
59 | 
60 |     if !result {
61 |         let fd_result = database::insert_human_rejection(uuid::Uuid::new_v4(), &input.text, input);
62 |         match fd_result {
63 |             Ok(_) => (),
64 |             Err(e) => {
65 |                 cli_pretty_printing::warning(&format!(
66 |                     "DEBUG: Failed to write human checker rejection due to error: {}",
67 |                     e
68 |                 ));
69 |             }
70 |         }
71 |         return false;
72 |     }
73 |     true
74 | }
75 | 


--------------------------------------------------------------------------------
/src/checkers/password.rs:
--------------------------------------------------------------------------------
 1 | use crate::checkers::checker_result::CheckResult;
 2 | use gibberish_or_not::{is_password, Sensitivity};
 3 | use lemmeknow::Identifier;
 4 | 
 5 | use crate::checkers::checker_type::{Check, Checker};
 6 | 
 7 | /// Checks if the input matches a known common password.
 8 | pub struct PasswordChecker;
 9 | 
10 | /// Implementation of the Check trait for PasswordChecker
11 | impl Check for Checker<PasswordChecker> {
12 |     fn new() -> Self {
13 |         Checker {
14 |             name: "Password Checker",
15 |             description: "Checks if the input exactly matches a known common password",
16 |             link: "https://crates.io/crates/gibberish-or-not",
17 |             tags: vec!["password", "security"],
18 |             expected_runtime: 0.01,
19 |             popularity: 1.0,
20 |             lemmeknow_config: Identifier::default(),
21 |             sensitivity: Sensitivity::Medium,
22 |             enhanced_detector: None,
23 |             _phantom: std::marker::PhantomData,
24 |         }
25 |     }
26 | 
27 |     fn check(&self, text: &str) -> CheckResult {
28 |         CheckResult {
29 |             is_identified: is_password(text),
30 |             text: text.to_string(),
31 |             checker_name: self.name,
32 |             checker_description: self.description,
33 |             description: "Common Password".to_string(),
34 |             link: self.link,
35 |         }
36 |     }
37 | 
38 |     fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
39 |         self.sensitivity = sensitivity;
40 |         self
41 |     }
42 | 
43 |     fn get_sensitivity(&self) -> Sensitivity {
44 |         self.sensitivity
45 |     }
46 | }
47 | 
48 | #[cfg(test)]
49 | mod tests {
50 |     use super::*;
51 |     use gibberish_or_not::Sensitivity;
52 | 
53 |     #[test]
54 |     fn test_check_common_password() {
55 |         let checker = Checker::<PasswordChecker>::new();
56 |         assert!(checker.check("123456").is_identified);
57 |     }
58 | 
59 |     #[test]
60 |     fn test_check_not_password() {
61 |         let checker = Checker::<PasswordChecker>::new();
62 |         assert!(!checker.check("not-a-common-password").is_identified);
63 |     }
64 | 
65 |     #[test]
66 |     fn test_check_case_sensitive() {
67 |         let checker = Checker::<PasswordChecker>::new();
68 |         // Test exact matching with different cases
69 |         let original = checker.check("password").is_identified;
70 |         let uppercase = checker.check("PASSWORD").is_identified;
71 |         assert!(original != uppercase, "Case sensitivity test failed");
72 |     }
73 | 
74 |     #[test]
75 |     fn test_default_sensitivity_is_medium() {
76 |         let checker = Checker::<PasswordChecker>::new();
77 |         assert!(matches!(checker.get_sensitivity(), Sensitivity::Medium));
78 |     }
79 | 
80 |     #[test]
81 |     fn test_with_sensitivity_changes_sensitivity() {
82 |         let checker = Checker::<PasswordChecker>::new().with_sensitivity(Sensitivity::Low);
83 |         assert!(matches!(checker.get_sensitivity(), Sensitivity::Low));
84 | 
85 |         let checker = Checker::<PasswordChecker>::new().with_sensitivity(Sensitivity::High);
86 |         assert!(matches!(checker.get_sensitivity(), Sensitivity::High));
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/images/decoding.tape:
--------------------------------------------------------------------------------
 1 | # VHS documentation
 2 | #
 3 | # Output:
 4 | #   Output <path>.gif               Create a GIF output at the given <path>
 5 | #   Output <path>.mp4               Create an MP4 output at the given <path>
 6 | #   Output <path>.webm              Create a WebM output at the given <path>
 7 | #
 8 | # Require:
 9 | #   Require <string>                Ensure a program is on the $PATH to proceed
10 | #
11 | # Settings:
12 | #   Set FontSize <number>           Set the font size of the terminal
13 | #   Set FontFamily <string>         Set the font family of the terminal
14 | #   Set Height <number>             Set the height of the terminal
15 | #   Set Width <number>              Set the width of the terminal
16 | #   Set LetterSpacing <float>       Set the font letter spacing (tracking)
17 | #   Set LineHeight <float>          Set the font line height
18 | #   Set LoopOffset <float>%         Set the starting frame offset for the GIF loop
19 | #   Set Theme <json|string>         Set the theme of the terminal
20 | #   Set Padding <number>            Set the padding of the terminal
21 | #   Set Framerate <number>          Set the framerate of the recording
22 | #   Set PlaybackSpeed <float>       Set the playback speed of the recording
23 | #   Set MarginFill <file|#000000>   Set the file or color the margin will be filled with.
24 | #   Set Margin <number>             Set the size of the margin. Has no effect if MarginFill isn't set.
25 | #   Set BorderRadius <number>       Set terminal border radius, in pixels.
26 | #   Set WindowBar <string>          Set window bar type. (one of: Rings, RingsRight, Colorful, ColorfulRight)
27 | #   Set WindowBarSize <number>      Set window bar size, in pixels. Default is 40.
28 | #   Set TypingSpeed <time>          Set the typing speed of the terminal. Default is 50ms.
29 | #
30 | # Sleep:
31 | #   Sleep <time>                    Sleep for a set amount of <time> in seconds
32 | #
33 | # Type:
34 | #   Type[@<time>] "<characters>"    Type <characters> into the terminal with a
35 | #                                   <time> delay between each character
36 | #
37 | # Keys:
38 | #   Escape[@<time>] [number]        Press the Escape key
39 | #   Backspace[@<time>] [number]     Press the Backspace key
40 | #   Delete[@<time>] [number]        Press the Delete key
41 | #   Insert[@<time>] [number]        Press the Insert key
42 | #   Down[@<time>] [number]          Press the Down key
43 | #   Enter[@<time>] [number]         Press the Enter key
44 | #   Space[@<time>] [number]         Press the Space key
45 | #   Tab[@<time>] [number]           Press the Tab key
46 | #   Left[@<time>] [number]          Press the Left Arrow key
47 | #   Right[@<time>] [number]         Press the Right Arrow key
48 | #   Up[@<time>] [number]            Press the Up Arrow key
49 | #   Down[@<time>] [number]          Press the Down Arrow key
50 | #   PageUp[@<time>] [number]        Press the Page Up key
51 | #   PageDown[@<time>] [number]      Press the Page Down key
52 | #   Ctrl+<key>                      Press the Control key + <key> (e.g. Ctrl+C)
53 | #
54 | # Display:
55 | #   Hide                            Hide the subsequent commands from the output
56 | #   Show                            Show the subsequent commands in the output
57 | 
58 | Output decoding.gif
59 | 
60 | Require echo
61 | 
62 | Set Shell "bash"
63 | Set FontSize 32
64 | Set Width 1200
65 | Set Height 600
66 | 
67 | Type "echo 'Welcome to VHS!'" Sleep 500ms  Enter
68 | 
69 | Sleep 5s
70 | 


--------------------------------------------------------------------------------
/src/cli_pretty_printing/tests.rs:
--------------------------------------------------------------------------------
 1 | use crate::storage::INVISIBLE_CHARS;
 2 | 
 3 | /// Test that checks if the invisible character detection works correctly
 4 | #[test]
 5 | fn test_invisible_character_detection() {
 6 |     // Get a zero width space character
 7 |     let zero_width_space = char::from_u32(0x200B).unwrap();
 8 |     assert!(INVISIBLE_CHARS.contains(&zero_width_space));
 9 | 
10 |     // Create a string with 50% invisible characters (alternating normal and invisible)
11 |     let mut test_string = String::new();
12 |     for _ in 0..10 {
13 |         test_string.push('a');
14 |         test_string.push(zero_width_space);
15 |     }
16 | 
17 |     // Count the number of characters (not bytes)
18 |     let char_count = test_string.chars().count();
19 | 
20 |     // Count invisible characters
21 |     let mut invis_chars_found = 0.0;
22 |     for char in test_string.chars() {
23 |         if INVISIBLE_CHARS
24 |             .iter()
25 |             .any(|invis_chars| *invis_chars == char)
26 |         {
27 |             invis_chars_found += 1.0;
28 |         }
29 |     }
30 | 
31 |     // Calculate percentage based on character count, not byte length
32 |     let invis_char_percentage = invis_chars_found / char_count as f64;
33 | 
34 |     // Should be 50%
35 |     assert_eq!(invis_char_percentage, 0.5);
36 | }
37 | 
38 | /// Test with a string that has no invisible characters except spaces
39 | /// Note: Spaces are considered invisible characters in our implementation
40 | #[test]
41 | fn test_no_invisible_characters() {
42 |     // This string has no invisible characters except spaces
43 |     let test_string = "This is a normal string with no invisible characters.";
44 | 
45 |     // Count invisible characters
46 |     let mut invis_chars_found = 0.0;
47 |     for char in test_string.chars() {
48 |         if INVISIBLE_CHARS
49 |             .iter()
50 |             .any(|invis_chars| *invis_chars == char)
51 |         {
52 |             invis_chars_found += 1.0;
53 |         }
54 |     }
55 | 
56 |     // Calculate percentage based on character count, not byte length
57 |     let char_count = test_string.chars().count();
58 |     let invis_char_percentage = invis_chars_found / char_count as f64;
59 | 
60 |     // Count spaces
61 |     let space_count = test_string.chars().filter(|c| *c == ' ').count() as f64;
62 |     let expected_percentage = space_count / char_count as f64;
63 | 
64 |     // The invisible characters should be exactly the spaces
65 |     assert_eq!(invis_char_percentage, expected_percentage);
66 | 
67 |     // Verify that spaces are indeed counted as invisible
68 |     assert!(INVISIBLE_CHARS.contains(&' '));
69 | }
70 | 
71 | /// Test with a string that has spaces (which are considered invisible)
72 | #[test]
73 | fn test_spaces_as_invisible_characters() {
74 |     let test_string = "This string has spaces.";
75 | 
76 |     // Count invisible characters
77 |     let mut invis_chars_found = 0.0;
78 |     for char in test_string.chars() {
79 |         if INVISIBLE_CHARS
80 |             .iter()
81 |             .any(|invis_chars| *invis_chars == char)
82 |         {
83 |             invis_chars_found += 1.0;
84 |         }
85 |     }
86 | 
87 |     // Calculate percentage - should be the number of spaces divided by the total length
88 |     let space_count = test_string.chars().filter(|c| *c == ' ').count() as f64;
89 |     let expected_percentage = space_count / test_string.len() as f64;
90 |     let invis_char_percentage = invis_chars_found / test_string.len() as f64;
91 | 
92 |     assert_eq!(invis_char_percentage, expected_percentage);
93 | }
94 | 


--------------------------------------------------------------------------------
/docs/invisible_characters.md:
--------------------------------------------------------------------------------
 1 | # Invisible Characters Detection
 2 | 
 3 | ## Overview
 4 | 
 5 | ciphey now includes a feature to detect invisible characters in decoded plaintext and offer to save the result to a file. This is particularly useful when dealing with steganography or obfuscated text that uses invisible Unicode characters.
 6 | 
 7 | ## What are Invisible Characters?
 8 | 
 9 | Invisible characters are Unicode characters that don't display visibly in text but still take up space or affect text rendering. These include:
10 | 
11 | - Spaces and various space-like characters (U+0020, U+00A0, U+2000-U+200A, etc.)
12 | - Zero-width characters (U+200B, U+200C, U+200D, etc.)
13 | - Control characters
14 | - Formatting characters
15 | - Various other special Unicode characters
16 | 
17 | These characters are often used in steganography (hiding messages within other messages) or for obfuscation purposes.
18 | 
19 | ## How the Detection Works
20 | 
21 | When ciphey successfully decodes a message, it analyzes the resulting plaintext to determine what percentage of the characters are invisible. The detection process works as follows:
22 | 
23 | 1. The system maintains a list of known invisible characters in `src/storage/invisible_chars/chars.txt`
24 | 2. When plaintext is decoded, each character is checked against this list
25 | 3. If more than 30% of the characters in the plaintext are invisible, the user is prompted with options:
26 |    - Save the plaintext to a file (recommended for invisible character-heavy content)
27 |    - Display the plaintext in the terminal (which may not render invisible characters properly)
28 | 
29 | ## Why This Feature is Useful
30 | 
31 | Invisible characters can be difficult to work with in terminal output:
32 | 
33 | 1. They're hard to see (by definition)
34 | 2. They can break formatting or be lost when copying text
35 | 3. They might not render consistently across different terminals
36 | 
37 | By saving to a file, users can:
38 | - Preserve all characters exactly as decoded
39 | - Open the file in specialized editors that can visualize invisible characters
40 | - Process the file with other tools for further analysis
41 | 
42 | ## Implementation Details
43 | 
44 | The feature is implemented in the following components:
45 | 
46 | - `src/storage/mod.rs`: Defines the `INVISIBLE_CHARS` static variable that loads the list of invisible characters
47 | - `src/storage/invisible_chars/chars.txt`: Contains the list of Unicode invisible characters
48 | - `src/cli_pretty_printing/mod.rs`: Contains the logic to detect invisible characters and prompt the user
49 | 
50 | The detection threshold is set at 30% by default, which can be adjusted in the code if needed.
51 | 
52 | ## Example Usage
53 | 
54 | When a decoded message contains a significant number of invisible characters:
55 | 
56 | ```
57 | 75% of the plaintext is invisible characters, would you like to save to a file instead? (y/N)
58 | ```
59 | 
60 | If the user selects 'y':
61 | 
62 | ```
63 | Please enter a filename: (default: /home/user/ciphey_text.txt)
64 | ```
65 | 
66 | The user can then specify a custom filename or accept the default.
67 | 
68 | ```
69 | Outputting plaintext to file: /home/user/ciphey_text.txt
70 | 
71 | the decoders used are Base64 → Hex
72 | ```
73 | 
74 | ## Testing
75 | 
76 | The invisible characters detection feature includes comprehensive tests:
77 | 
78 | - Tests for loading the invisible characters list
79 | - Tests for detecting various percentages of invisible characters
80 | - Tests for handling edge cases
81 | 
82 | These tests ensure the feature works reliably across different scenarios.


--------------------------------------------------------------------------------
/images/better_demo.cast:
--------------------------------------------------------------------------------
 1 | {"version": 2, "width": 123, "height": 45, "timestamp": 1672151013, "env": {"SHELL": "/bin/zsh", "TERM": "xterm-256color"}}
 2 | [0.106523, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
 3 | [0.106621, "o", "\u001b[?1h\u001b=\u001b[?2004h"]
 4 | [1.029272, "o", "\u001b[7mecho 'ciphey supports file input, and regular expressions. If you know a part of the plaintext you can use regex as a cr\u001b[7mi\u001b[7mb'\u001b[27m\u001b[K"]
 5 | [1.70544, "o", "\u001b[A\u001b[2C\u001b[27me\u001b[27mc\u001b[27mh\u001b[27mo\u001b[27m \u001b[27m'\u001b[27mA\u001b[27mr\u001b[27me\u001b[27ms\u001b[27m \u001b[27ms\u001b[27mu\u001b[27mp\u001b[27mp\u001b[27mo\u001b[27mr\u001b[27mt\u001b[27ms\u001b[27m \u001b[27mf\u001b[27mi\u001b[27ml\u001b[27me\u001b[27m \u001b[27mi\u001b[27mn\u001b[27mp\u001b[27mu\u001b[27mt\u001b[27m,\u001b[27m \u001b[27ma\u001b[27mn\u001b[27md\u001b[27m \u001b[27mr\u001b[27me\u001b[27mg\u001b[27mu\u001b[27ml\u001b[27ma\u001b[27mr\u001b[27m \u001b[27me\u001b[27mx\u001b[27mp\u001b[27mr\u001b[27me\u001b[27ms\u001b[27ms\u001b[27mi\u001b[27mo\u001b[27mn\u001b[27ms\u001b[27m.\u001b[27m \u001b[27mI\u001b[27mf\u001b[27m \u001b[27my\u001b[27mo\u001b[27mu\u001b[27m \u001b[27mk\u001b[27mn\u001b[27mo\u001b[27mw\u001b[27m \u001b[27ma\u001b[27m \u001b[27mp\u001b[27ma\u001b[27mr\u001b[27mt\u001b[27m \u001b[27mo\u001b[27mf\u001b[27m \u001b[27mt\u001b[27mh\u001b[27me\u001b[27m \u001b[27mp\u001b[27ml\u001b[27ma\u001b[27mi\u001b[27mn\u001b[27mt\u001b[27me\u001b[27mx\u001b[27mt\u001b[27m \u001b[27my\u001b[27mo\u001b[27mu\u001b[27m \u001b[27mc\u001b[27ma\u001b[27mn\u001b[27m \u001b[27mu\u001b[27ms\u001b[27me\u001b[27m \u001b[27mr\u001b[27me\u001b[27mg\u001b[27me\u001b[27mx\u001b[27m \u001b[27ma\u001b[27ms\u001b[27m \u001b[27ma\u001b[27m \u001b[27mc\u001b[27mri\u001b[27mb\u001b[27m'"]
 6 | [1.705862, "o", "\u001b[?1l\u001b>\u001b[?2004l\r\r\n"]
 7 | [1.707432, "o", "\u001b]2;echo \u0007\u001b]1;echo\u0007"]
 8 | [1.707476, "o", "ciphey supports file input, and regular expressions. If you know a part of the plaintext you can use regex as a crib\r\n"]
 9 | [1.729224, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
10 | [1.729396, "o", "\u001b[?1h\u001b=\u001b[?2004h"]
11 | [8.159895, "o", "\u001b[7mciphey -f crack.txt -r hello\u001b[27m"]
12 | [8.972236, "o", "\u001b[26D\u001b[27ma\u001b[27mr\u001b[27me\u001b[27ms\u001b[27m \u001b[27m-\u001b[27mf\u001b[27m \u001b[27mc\u001b[27mr\u001b[27ma\u001b[27mc\u001b[27mk\u001b[27m.\u001b[27mt\u001b[27mx\u001b[27mt\u001b[27m \u001b[27m-\u001b[27mr\u001b[27m \u001b[27mh\u001b[27me\u001b[27ml\u001b[27ml\u001b[27mo"]
13 | [8.972428, "o", "\u001b[?1l\u001b>"]
14 | [8.972445, "o", "\u001b[?2004l\r\r\n"]
15 | [8.976107, "o", "\u001b]2;ciphey -f crack.txt -r hello\u0007\u001b]1;ciphey\u0007"]
16 | [9.574511, "o", "🕵️ I think the plaintext is \u001b[1;33mRegex matched: hello\u001b[0m.\r\nPossible plaintext: '\u001b[1;33mhello, world!\u001b[0m' (y/N): \r\n"]
17 | [10.491995, "o", "y"]
18 | [10.946224, "o", "\r\n"]
19 | [10.952582, "o", "\r\n🥳 ciphey has decoded 205 times times.\r\nIf you would have used Ciphey, it would have taken you 41 seconds\r\n\r\n"]
20 | [10.956232, "o", "The plaintext is: \r\n\u001b[1;33mhello, world!\u001b[0m\r\nand the decoders used are \u001b[1;33mAtbash → Caesar Cipher → Base64 → Caesar Cipher\u001b[0m\r\n"]
21 | [10.956748, "o", "\u001b]2;autumnskerritt@autumns-MacBook-Air:~\u0007\u001b]1;~\u0007"]
22 | [10.964685, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
23 | [10.964745, "o", "\u001b[?1h\u001b="]
24 | [10.964783, "o", "\u001b[?2004h"]
25 | [17.953008, "o", "\u001b[?2004l\r\r\n"]
26 | 


--------------------------------------------------------------------------------
/src/checkers/checker_type.rs:
--------------------------------------------------------------------------------
 1 | /// Checker_type is a type used to define checkers
 2 | /// This means that we can standardise the way we check for plaintext
 3 | use crate::checkers::checker_result::CheckResult;
 4 | use gibberish_or_not::Sensitivity;
 5 | use lemmeknow::Identifier;
 6 | 
 7 | /// Every checker is of type CheckerType
 8 | /// This will let us pick & choose which checkers to use
 9 | /// at runtime.
10 | pub struct Checker<Type> {
11 |     /// The name of the checker
12 |     pub name: &'static str,
13 |     /// The description of the checker
14 |     /// you can take the first line from Wikipedia
15 |     /// Sometimes our checkers do not exist on Wikipedia so we write our own.
16 |     pub description: &'static str,
17 |     /// The link to the checker's website
18 |     /// Wikipedia link, articles, github etc
19 |     pub link: &'static str,
20 |     /// The tags of the checker
21 |     pub tags: Vec<&'static str>,
22 |     /// The expected runtime of the checker
23 |     /// We get this by bench marking the code
24 |     pub expected_runtime: f32,
25 |     /// The popularity of the checker
26 |     pub popularity: f32,
27 |     /// lemmeknow config object
28 |     pub lemmeknow_config: Identifier,
29 |     /// The sensitivity level for gibberish detection
30 |     /// This is only used by checkers that implement the SensitivityAware trait
31 |     pub sensitivity: Sensitivity,
32 |     /// Enhanced gibberish detector using BERT model
33 |     /// This is only used when enhanced detection is enabled
34 |     pub enhanced_detector: Option<()>, // Changed from GibberishDetector to () since we don't have the actual type
35 |     /// https://doc.rust-lang.org/std/marker/struct.PhantomData.html
36 |     /// Let's us save memory by telling the compiler that our type
37 |     /// acts like a type <T> even though it doesn't.
38 |     /// Stops the compiler complaining, else we'd need to implement
39 |     /// some magic to make it work.
40 |     pub _phantom: std::marker::PhantomData<Type>,
41 | }
42 | 
43 | /// Helper trait for returning info from a Checker
44 | pub trait CheckInfo {
45 |     /// Returns the checker name
46 |     fn get_name(&self) -> &str;
47 |     /// Returns the checker description
48 |     fn get_description(&self) -> &str;
49 | }
50 | 
51 | impl<Type> CheckInfo for Checker<Type> {
52 |     /// Returns the checker name
53 |     fn get_name(&self) -> &str {
54 |         self.name
55 |     }
56 |     /// Returns the checker description
57 |     fn get_description(&self) -> &str {
58 |         self.description
59 |     }
60 | }
61 | 
62 | /// Every checker must implement this trait
63 | /// Which checks the given text to see if its plaintext
64 | /// and returns CheckResult, which is our results object.
65 | pub trait Check {
66 |     /// Returns a new struct of type CheckerType
67 |     fn new() -> Self
68 |     where
69 |         Self: Sized;
70 |     /// Checks the given text to see if its plaintext
71 |     fn check(&self, text: &str) -> CheckResult;
72 |     /// Sets the sensitivity level for gibberish detection
73 |     fn with_sensitivity(self, sensitivity: Sensitivity) -> Self
74 |     where
75 |         Self: Sized;
76 |     /// Gets the current sensitivity level
77 |     fn get_sensitivity(&self) -> Sensitivity;
78 | }
79 | 
80 | /// Optional trait for checkers that use sensitivity for gibberish detection
81 | /// Not all checkers need to implement this trait
82 | /// This is a future improvement - not currently used
83 | pub trait SensitivityAware {
84 |     /// Sets the sensitivity level for gibberish detection
85 |     fn with_sensitivity(self, sensitivity: Sensitivity) -> Self
86 |     where
87 |         Self: Sized;
88 |     /// Gets the current sensitivity level
89 |     fn get_sensitivity(&self) -> Sensitivity;
90 | }
91 | 


--------------------------------------------------------------------------------
/images/first_run.tape:
--------------------------------------------------------------------------------
  1 | # VHS documentation
  2 | #
  3 | # Output:
  4 | #   Output <path>.gif               Create a GIF output at the given <path>
  5 | #   Output <path>.mp4               Create an MP4 output at the given <path>
  6 | #   Output <path>.webm              Create a WebM output at the given <path>
  7 | #
  8 | # Require:
  9 | #   Require <string>                Ensure a program is on the $PATH to proceed
 10 | #
 11 | # Settings:
 12 | #   Set FontSize <number>           Set the font size of the terminal
 13 | #   Set FontFamily <string>         Set the font family of the terminal
 14 | #   Set Height <number>             Set the height of the terminal
 15 | #   Set Width <number>              Set the width of the terminal
 16 | #   Set LetterSpacing <float>       Set the font letter spacing (tracking)
 17 | #   Set LineHeight <float>          Set the font line height
 18 | #   Set LoopOffset <float>%         Set the starting frame offset for the GIF loop
 19 | #   Set Theme <json|string>         Set the theme of the terminal
 20 | #   Set Padding <number>            Set the padding of the terminal
 21 | #   Set Framerate <number>          Set the framerate of the recording
 22 | #   Set PlaybackSpeed <float>       Set the playback speed of the recording
 23 | #   Set MarginFill <file|#000000>   Set the file or color the margin will be filled with.
 24 | #   Set Margin <number>             Set the size of the margin. Has no effect if MarginFill isn't set.
 25 | #   Set BorderRadius <number>       Set terminal border radius, in pixels.
 26 | #   Set WindowBar <string>          Set window bar type. (one of: Rings, RingsRight, Colorful, ColorfulRight)
 27 | #   Set WindowBarSize <number>      Set window bar size, in pixels. Default is 40.
 28 | #   Set TypingSpeed <time>          Set the typing speed of the terminal. Default is 50ms.
 29 | #
 30 | # Sleep:
 31 | #   Sleep <time>                    Sleep for a set amount of <time> in seconds
 32 | #
 33 | # Type:
 34 | #   Type[@<time>] "<characters>"    Type <characters> into the terminal with a
 35 | #                                   <time> delay between each character
 36 | #
 37 | # Keys:
 38 | #   Escape[@<time>] [number]        Press the Escape key
 39 | #   Backspace[@<time>] [number]     Press the Backspace key
 40 | #   Delete[@<time>] [number]        Press the Delete key
 41 | #   Insert[@<time>] [number]        Press the Insert key
 42 | #   Down[@<time>] [number]          Press the Down key
 43 | #   Enter[@<time>] [number]         Press the Enter key
 44 | #   Space[@<time>] [number]         Press the Space key
 45 | #   Tab[@<time>] [number]           Press the Tab key
 46 | #   Left[@<time>] [number]          Press the Left Arrow key
 47 | #   Right[@<time>] [number]         Press the Right Arrow key
 48 | #   Up[@<time>] [number]            Press the Up Arrow key
 49 | #   Down[@<time>] [number]          Press the Down Arrow key
 50 | #   PageUp[@<time>] [number]        Press the Page Up key
 51 | #   PageDown[@<time>] [number]      Press the Page Down key
 52 | #   Ctrl+<key>                      Press the Control key + <key> (e.g. Ctrl+C)
 53 | #
 54 | # Display:
 55 | #   Hide                            Hide the subsequent commands from the output
 56 | #   Show                            Show the subsequent commands in the output
 57 | 
 58 | Output first_run.gif
 59 | 
 60 | Require ciphey
 61 | 
 62 | Set Shell "fish"
 63 | Set FontSize 32
 64 | Set Width 1200
 65 | Set Height 600
 66 | 
 67 | Type "ciphey -t 'K5XXOIJAKRUGC5BAO5QXGIDBEBRW633MEBRW63TGNFTXK4TBORUW63RB"
 68 | Enter
 69 | 
 70 | # Do you want a tutorial?
 71 | Type "n" Enter
 72 | 
 73 | # Do you want a custom colour scheme?
 74 | Type "y" Enter
 75 | Sleep 1s
 76 | # GirlyPop
 77 | Type "3" Enter
 78 | 
 79 | # What sounds better to you?
 80 | Sleep 2s
 81 | Type "1" Enter
 82 | 
 83 | # Wordlist or not?
 84 | Sleep 1s
 85 | Enter
 86 | 
 87 | # Enhanced Plaintext
 88 | Sleep 4s
 89 | Enter
 90 | 
 91 | # Cute cat
 92 | Sleep 500ms
 93 | Type "y" Enter
 94 | 
 95 | Sleep 500ms
 96 | 
 97 | Type "y" Enter
 98 | 
 99 | Sleep 5s
100 | 


--------------------------------------------------------------------------------
/src/checkers/lemmeknow_checker.rs:
--------------------------------------------------------------------------------
  1 | use super::checker_type::{Check, Checker};
  2 | use crate::checkers::checker_result::CheckResult;
  3 | use gibberish_or_not::Sensitivity;
  4 | use lemmeknow::{Data, Identifier};
  5 | 
  6 | /// The LemmeKnow Checker checks if the text matches a known Regex pattern.
  7 | /// This is the struct for it.
  8 | pub struct LemmeKnow;
  9 | 
 10 | impl Check for Checker<LemmeKnow> {
 11 |     fn new() -> Self {
 12 |         Checker {
 13 |             // TODO: Update fields with proper values
 14 |             name: "LemmeKnow Checker",
 15 |             description: "Uses LemmeKnow to check for regex matches",
 16 |             link: "https://swanandx.github.io/lemmeknow-frontend/",
 17 |             tags: vec!["lemmeknow", "regex"],
 18 |             expected_runtime: 0.01,
 19 |             popularity: 1.0,
 20 |             lemmeknow_config: Identifier::default().min_rarity(0.1),
 21 |             sensitivity: Sensitivity::Medium, // Default to Medium sensitivity
 22 |             enhanced_detector: None,
 23 |             _phantom: std::marker::PhantomData,
 24 |         }
 25 |     }
 26 | 
 27 |     fn check(&self, text: &str) -> CheckResult {
 28 |         let lemmeknow_result = self.lemmeknow_config.identify(text);
 29 |         let mut is_identified = false;
 30 |         let mut description = "".to_string();
 31 |         if !lemmeknow_result.is_empty() {
 32 |             is_identified = true;
 33 |             description = format_data_result(&lemmeknow_result[0].data)
 34 |         }
 35 | 
 36 |         CheckResult {
 37 |             is_identified,
 38 |             text: text.to_owned(),
 39 |             checker_name: self.name,
 40 |             checker_description: self.description,
 41 |             // Returns a vector of matches
 42 |             description,
 43 |             link: self.link,
 44 |         }
 45 |     }
 46 | 
 47 |     fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
 48 |         self.sensitivity = sensitivity;
 49 |         self
 50 |     }
 51 | 
 52 |     fn get_sensitivity(&self) -> Sensitivity {
 53 |         self.sensitivity
 54 |     }
 55 | }
 56 | 
 57 | /// Formats the data result to a string
 58 | /// This is used to display the result in the UI
 59 | fn format_data_result(input: &Data) -> String {
 60 |     input.name.to_string()
 61 | }
 62 | 
 63 | #[cfg(test)]
 64 | mod tests {
 65 |     use super::*;
 66 |     use crate::checkers::checker_type::{Check, Checker};
 67 |     use gibberish_or_not::Sensitivity;
 68 | 
 69 |     #[test]
 70 |     fn test_url_exact_match() {
 71 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
 72 |         assert!(checker.check("https://google.com").is_identified);
 73 |     }
 74 | 
 75 |     #[test]
 76 |     fn test_url_with_extra_text_fails() {
 77 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
 78 |         assert!(
 79 |             !checker
 80 |                 .check("https://google.com and some text")
 81 |                 .is_identified
 82 |         );
 83 |     }
 84 | 
 85 |     #[test]
 86 |     fn test_ip_exact_match() {
 87 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
 88 |         assert!(checker.check("192.168.1.1").is_identified);
 89 |     }
 90 | 
 91 |     #[test]
 92 |     fn test_ip_with_extra_text_fails() {
 93 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
 94 |         assert!(!checker.check("IP is 192.168.1.1").is_identified);
 95 |     }
 96 | 
 97 |     #[test]
 98 |     fn test_s3_path() {
 99 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
100 |         assert!(checker.check("s3://bucket/path/key").is_identified);
101 |     }
102 | 
103 |     // Lemmeknow can only match if its an EXACT match
104 |     // So this should fail
105 |     #[test]
106 |     fn test_bitcoin_with_extra_text_fails() {
107 |         let checker = Checker::<LemmeKnow>::new().with_sensitivity(Sensitivity::Low);
108 |         assert!(
109 |             !checker
110 |                 .check("BTC address: 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2")
111 |                 .is_identified
112 |         );
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/decoders/reverse_decoder.rs:
--------------------------------------------------------------------------------
  1 | //! Reverses the input string
  2 | //! Performs error handling and returns a string
  3 | //! Call reverse_decoder.crack to use. It returns option<String> and check with
  4 | //! `result.is_some()` to see if it returned okay.
  5 | 
  6 | use crate::checkers::CheckerTypes;
  7 | 
  8 | use super::crack_results::CrackResult;
  9 | use super::interface::Crack;
 10 | use super::interface::Decoder;
 11 | 
 12 | use log::trace;
 13 | /// The Reverse decoder is a decoder that reverses the input string.
 14 | /// ```rust
 15 | /// use ciphey::decoders::reverse_decoder::ReverseDecoder;
 16 | /// use ciphey::decoders::interface::{Crack, Decoder};
 17 | /// use ciphey::config::{set_global_config, Config};
 18 | /// use ciphey::checkers::{athena::Athena, CheckerTypes, checker_type::{Check, Checker}};
 19 | ///
 20 | /// let reversedecoder = Decoder::<ReverseDecoder>::new();
 21 | /// let athena_checker = Checker::<Athena>::new();
 22 | /// let checker = CheckerTypes::CheckAthena(athena_checker);
 23 | ///
 24 | /// let result = reversedecoder.crack("stac", &checker).unencrypted_text;
 25 | /// assert!(result.is_some());
 26 | /// assert_eq!(result.unwrap()[0], "cats");
 27 | /// ```
 28 | pub struct ReverseDecoder;
 29 | 
 30 | impl Crack for Decoder<ReverseDecoder> {
 31 |     fn new() -> Decoder<ReverseDecoder> {
 32 |         Decoder {
 33 |             name: "Reverse",
 34 |             description: "Reverses a string. stac -> cats",
 35 |             link: "http://string-functions.com/reverse.aspx",
 36 |             tags: vec!["reverse", "decoder", "reciprocal"],
 37 |             // I have never seen a reversed string in a CTF
 38 |             // or otherwise
 39 |             popularity: 0.2,
 40 |             phantom: std::marker::PhantomData,
 41 |         }
 42 |     }
 43 | 
 44 |     /// This function does the actual decoding
 45 |     /// It returns an Option<string> if it was successful
 46 |     /// Else the Option returns nothing and the error is logged in Trace
 47 |     fn crack(&self, text: &str, checker: &CheckerTypes) -> CrackResult {
 48 |         trace!("Running reverse string");
 49 |         let mut result = CrackResult::new(self, text.to_string());
 50 |         if text.is_empty() {
 51 |             return result;
 52 |         }
 53 |         let rev_str: String = text.chars().rev().collect();
 54 |         let checker_res = checker.check(&rev_str);
 55 | 
 56 |         result.unencrypted_text = Some(vec![rev_str]);
 57 |         result.update_checker(&checker_res);
 58 |         result
 59 |     }
 60 |     /// Gets all tags for this decoder
 61 |     fn get_tags(&self) -> &Vec<&str> {
 62 |         &self.tags
 63 |     }
 64 |     /// Gets the name for the current decoder
 65 |     fn get_name(&self) -> &str {
 66 |         self.name
 67 |     }
 68 |     /// Gets the description for the current decoder
 69 |     fn get_description(&self) -> &str {
 70 |         self.description
 71 |     }
 72 |     /// Gets the link for the current decoder
 73 |     fn get_link(&self) -> &str {
 74 |         self.link
 75 |     }
 76 | }
 77 | 
 78 | #[cfg(test)]
 79 | mod tests {
 80 |     use super::*;
 81 |     use crate::{
 82 |         checkers::{
 83 |             athena::Athena,
 84 |             checker_type::{Check, Checker},
 85 |         },
 86 |         decoders::interface::Crack,
 87 |     };
 88 | 
 89 |     // helper for tests
 90 |     fn get_athena_checker() -> CheckerTypes {
 91 |         let athena_checker = Checker::<Athena>::new();
 92 |         CheckerTypes::CheckAthena(athena_checker)
 93 |     }
 94 | 
 95 |     #[test]
 96 |     fn returns_success() {
 97 |         let reverse_decoder = Decoder::<ReverseDecoder>::new();
 98 |         let result = reverse_decoder
 99 |             .crack("stac", &get_athena_checker())
100 |             .unencrypted_text
101 |             .expect("No unencrypted string for reverse decoder");
102 |         assert_eq!(result[0], "cats");
103 |     }
104 | 
105 |     #[test]
106 |     fn returns_nothing() {
107 |         let reverse_decoder = Decoder::<ReverseDecoder>::new();
108 |         let result = reverse_decoder
109 |             .crack("", &get_athena_checker())
110 |             .unencrypted_text;
111 |         assert!(result.is_none());
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/decoders/interface.rs:
--------------------------------------------------------------------------------
  1 | use crate::checkers::CheckerTypes;
  2 | 
  3 | use super::crack_results::CrackResult;
  4 | 
  5 | /// The Interface defines what the struct for each decoder looks like
  6 | //TODO: rename this file
  7 | pub struct Decoder<Type> {
  8 |     /// The English name of the decoder.
  9 |     pub name: &'static str,
 10 |     /// A description, you can take the first line from Wikipedia
 11 |     /// Sometimes our decoders do not exist on Wikipedia so we write our own.
 12 |     pub description: &'static str,
 13 |     /// Wikipedia Link
 14 |     pub link: &'static str,
 15 |     /// The tags it has. See other decoders. Think of it as a "category"
 16 |     /// This is used to filter decoders.
 17 |     /// For example, if you want to filter decoders that are "base64"
 18 |     /// you would use the tag "base64" or "base".
 19 |     /// You can also add tags like "online" to filter decoders that are online.
 20 |     pub tags: Vec<&'static str>,
 21 |     /// We get popularity by eye-balling it or using the API's data
 22 |     pub popularity: f32,
 23 |     /// we don't use the Type, so we use PhantomData to mark it!
 24 |     pub phantom: std::marker::PhantomData<Type>,
 25 | }
 26 | 
 27 | /// The default implementation for a decoder
 28 | pub struct DefaultDecoder;
 29 | impl Default for Decoder<DefaultDecoder> {
 30 |     fn default() -> Decoder<DefaultDecoder> {
 31 |         Decoder {
 32 |             name: "Default decoder",
 33 |             description: "N/A",
 34 |             link: "N/A",
 35 |             tags: vec!["N/A"],
 36 |             popularity: 0.0,
 37 |             phantom: std::marker::PhantomData,
 38 |         }
 39 |     }
 40 | }
 41 | 
 42 | impl Crack for Decoder<DefaultDecoder> {
 43 |     fn new() -> Decoder<DefaultDecoder> {
 44 |         Decoder::default()
 45 |     }
 46 |     /// Returns a dummy CrackResult
 47 |     fn crack(&self, text: &str, _checker: &CheckerTypes) -> CrackResult {
 48 |         CrackResult::new(self, text.to_string())
 49 |     }
 50 |     /// Gets all tags for this decoder
 51 |     fn get_tags(&self) -> &Vec<&str> {
 52 |         &self.tags
 53 |     }
 54 |     /// Gets the name for the current decoder
 55 |     fn get_name(&self) -> &str {
 56 |         self.name
 57 |     }
 58 |     /// Gets the description for the current decoder
 59 |     fn get_description(&self) -> &str {
 60 |         self.description
 61 |     }
 62 |     /// Gets the link for the current decoder
 63 |     fn get_link(&self) -> &str {
 64 |         self.link
 65 |     }
 66 | }
 67 | 
 68 | /// All decoders will share the same Crack trait
 69 | /// Which let's us put them into a vector and iterate over them,
 70 | /// Running `.crack()` on each of them.
 71 | /// Relevant docs: https://docs.rs/crack/0.3.0/crack/trait.Crack.html
 72 | pub trait Crack {
 73 |     /// This function generates a new crack trait
 74 |     fn new() -> Self
 75 |     where
 76 |         Self: Sized;
 77 |     /// Crack is the function that actually does the decoding
 78 |     fn crack(&self, text: &str, checker: &CheckerTypes) -> CrackResult;
 79 |     /// Get all tags for the current decoder
 80 |     fn get_tags(&self) -> &Vec<&str>;
 81 |     /// Get the name of the current decoder
 82 |     fn get_name(&self) -> &str;
 83 |     /// Gets the description of the current deocder
 84 |     fn get_description(&self) -> &str;
 85 |     /// Gets the link for the current decoder
 86 |     fn get_link(&self) -> &str;
 87 |     /// Get the popularity of the decoder (a value between 0.0 and 1.0)
 88 |     /// Default implementation returns the decoder's popularity field
 89 |     fn get_popularity(&self) -> f32 {
 90 |         // This is a default implementation that will be used if the decoder
 91 |         // doesn't override this method. In a real implementation, each decoder
 92 |         // would override this to return its actual popularity value.
 93 |         0.5
 94 |     }
 95 | }
 96 | 
 97 | /// Returns a boolean of True if the string is successfully changed
 98 | /// So empty strings fail, but non-empty strings succeed
 99 | /// and only if the string is different from the original text.
100 | pub fn check_string_success(decoded_text: &str, original_text: &str) -> bool {
101 |     if decoded_text.is_empty() {
102 |         return false;
103 |     } else if decoded_text != original_text {
104 |         return true;
105 |     }
106 |     false
107 | }
108 | 


--------------------------------------------------------------------------------
/benches/benchmark_whole_program.rs:
--------------------------------------------------------------------------------
  1 | use ciphey::config::Config;
  2 | use ciphey::perform_cracking;
  3 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
  4 | use env_logger::Builder;
  5 | use log::LevelFilter;
  6 | use std::time::Duration;
  7 | 
  8 | // Test cases with different encodings/encryptions and varying complexity
  9 | const TEST_CASES: &[(&str, &str)] = &[
 10 |     // Format: (encoded_text, description)
 11 |     // Base64 encoded text (simple)
 12 |     (
 13 |         "aGVsbG8gd29ybGQ=",
 14 |         "base64_simple",
 15 |     ),
 16 |     // Base64 encoded longer text
 17 |     (
 18 |         "TXV0bGV5LCB5b3Ugc25pY2tlcmluZywgZmxvcHB5IGVhcmVkIGhvdW5kLiBXaGVuIGNvdXJhZ2UgaXMgbmVlZGVkLCB5b3XigJlyZSBuZXZlciBhcm91bmQu",
 19 |         "base64_medium",
 20 |     ),
 21 |     // Long Base64 encoded text (from integration test)
 22 |     (
 23 |         "TXV0bGV5LCB5b3Ugc25pY2tlcmluZywgZmxvcHB5IGVhcmVkIGhvdW5kLiBXaGVuIGNvdXJhZ2UgaXMgbmVlZGVkLCB5b3XigJlyZSBuZXZlciBhcm91bmQuIFRob3NlIG1lZGFscyB5b3Ugd2VhciBvbiB5b3VyIG1vdGgtZWF0ZW4gY2hlc3Qgc2hvdWxkIGJlIHRoZXJlIGZvciBidW5nbGluZyBhdCB3aGljaCB5b3UgYXJlIGJlc3QuIFNvLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLiBIb3d3d3chIE5hYiBoaW0sIGphYiBoaW0sIHRhYiBoaW0sIGdyYWIgaGltLCBzdG9wIHRoYXQgcGlnZW9uIG5vdy4g",
 24 |         "base64_long",
 25 |     ),
 26 |     // Base32 encoded text
 27 |     (
 28 |         "NBSWY3DPEB3W64TMMQ======",
 29 |         "base32",
 30 |     ),
 31 |     // Hex encoded text
 32 |     (
 33 |         "68656c6c6f20776f726c64",
 34 |         "hex",
 35 |     ),
 36 |     // Binary encoded text
 37 |     (
 38 |         "01101000 01100101 01101100 01101100 01101111 00100000 01110111 01101111 01110010 01101100 01100100",
 39 |         "binary",
 40 |     ),
 41 |     // Plain text (early exit case)
 42 |     (
 43 |         "This is just plain text 123",
 44 |         "plaintext",
 45 |     ),
 46 |     // Empty string (failure case)
 47 |     (
 48 |         "",
 49 |         "empty_string",
 50 |     ),
 51 | ];
 52 | 
 53 | pub fn criterion_benchmark(c: &mut Criterion) {
 54 |     // Initialize logger with only error level to suppress debug messages
 55 |     let mut builder = Builder::new();
 56 |     builder.filter_level(LevelFilter::Error);
 57 |     builder.init();
 58 | 
 59 |     // Create a benchmark group with longer measurement times for more accurate results
 60 |     let mut group = c.benchmark_group("program_performance");
 61 | 
 62 |     // Configure the benchmark group for better statistics
 63 |     group.measurement_time(Duration::from_secs(10));
 64 |     group.sample_size(30);
 65 | 
 66 |     // Run benchmarks with different configurations
 67 |     benchmark_with_config(&mut group, false, 5); // Default config
 68 |     benchmark_with_config(&mut group, false, 1); // Fast config
 69 | 
 70 |     group.finish();
 71 | }
 72 | 
 73 | fn benchmark_with_config(
 74 |     group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
 75 |     top_results: bool,
 76 |     timeout: u32,
 77 | ) {
 78 |     let config_name = if top_results {
 79 |         "top_results"
 80 |     } else {
 81 |         "default"
 82 |     };
 83 |     let timeout_str = format!("timeout_{}", timeout);
 84 | 
 85 |     for (text, description) in TEST_CASES {
 86 |         let id = BenchmarkId::new(
 87 |             format!("{}_{}_{}", config_name, timeout_str, description),
 88 |             text.len(),
 89 |         );
 90 |         group.bench_with_input(id, text, |b, text| {
 91 |             b.iter_batched_ref(
 92 |                 || {
 93 |                     let _test_db = ciphey::TestDatabase::default();
 94 |                     ciphey::set_test_db_path();
 95 |                 },
 96 |                 |_| {
 97 |                     // Create config and set necessary parameters
 98 |                     let mut config = Config::default();
 99 |                     config.timeout = timeout;
100 |                     config.top_results = top_results;
101 |                     config.verbose = 0;
102 |                     config.human_checker_on = false;
103 |                     config.api_mode = true; // Set to true to suppress output
104 | 
105 |                     // Use perform_cracking with the configuration
106 |                     perform_cracking(black_box(text), config)
107 |                 },
108 |                 criterion::BatchSize::SmallInput,
109 |             );
110 |         });
111 |     }
112 | }
113 | 
114 | criterion_group!(benches, criterion_benchmark);
115 | criterion_main!(benches);
116 | 


--------------------------------------------------------------------------------
/images/lemmeknow.cast:
--------------------------------------------------------------------------------
 1 | {"version": 2, "width": 123, "height": 45, "timestamp": 1672151368, "env": {"SHELL": "/bin/zsh", "TERM": "xterm-256color"}}
 2 | [0.108416, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
 3 | [0.108493, "o", "\u001b[?1h\u001b=\u001b[?2004h"]
 4 | [3.342422, "o", "\u001b[7mecho 'ciphey uses LemmeKnow to identify 300+ different types of plaintext include API keys, IP addresses, and URLs. This\u001b[7m \u001b[7mis 3300% faster than PyWhat written in Python for Ciphey'\u001b[27m\u001b[K"]
 5 | [3.696046, "o", "\u001b[A\u001b[53D\u001b[27me\u001b[27mc\u001b[27mh\u001b[27mo\u001b[27m \u001b[27m'\u001b[27mA\u001b[27mr\u001b[27me\u001b[27ms\u001b[27m \u001b[27mu\u001b[27ms\u001b[27me\u001b[27ms\u001b[27m \u001b[27mL\u001b[27me\u001b[27mm\u001b[27mm\u001b[27me\u001b[27mK\u001b[27mn\u001b[27mo\u001b[27mw\u001b[27m \u001b[27mt\u001b[27mo\u001b[27m \u001b[27mi\u001b[27md\u001b[27me\u001b[27mn\u001b[27mt\u001b[27mi\u001b[27mf\u001b[27my\u001b[27m \u001b[27m3\u001b[27m0\u001b[27m0\u001b[27m+\u001b[27m \u001b[27md\u001b[27mi\u001b[27mf\u001b[27mf\u001b[27me\u001b[27mr\u001b[27me\u001b[27mn\u001b[27mt\u001b[27m \u001b[27mt\u001b[27my\u001b[27mp\u001b[27me\u001b[27ms\u001b[27m \u001b[27mo\u001b[27mf\u001b[27m \u001b[27mp\u001b[27ml\u001b[27ma\u001b[27mi\u001b[27mn\u001b[27mt\u001b[27me\u001b[27mx\u001b[27mt\u001b[27m \u001b[27mi\u001b[27mn\u001b[27mc\u001b[27ml\u001b[27mu\u001b[27md\u001b[27me\u001b[27m \u001b[27mA\u001b[27mP\u001b[27mI\u001b[27m \u001b[27mk\u001b[27me\u001b[27my\u001b[27ms\u001b[27m,\u001b[27m \u001b[27mI\u001b[27mP\u001b[27m \u001b[27ma\u001b[27md\u001b[27md\u001b[27mr\u001b[27me\u001b[27ms\u001b[27ms\u001b[27me\u001b[27ms\u001b[27m,\u001b[27m \u001b[27ma\u001b[27mn\u001b[27md\u001b[27m \u001b[27mU\u001b[27mR\u001b[27mL\u001b[27ms\u001b[27m.\u001b[27m \u001b[27mT\u001b[27mh\u001b[27mi\u001b[27ms \u001b[27mi\u001b[27ms\u001b[27m \u001b[27m3\u001b[27m3\u001b[27m0\u001b[27m0\u001b[27m%\u001b[27m \u001b[27mf\u001b[27ma\u001b[27ms\u001b[27mt\u001b[27me\u001b[27mr\u001b[27m \u001b[27mt\u001b[27mh\u001b[27ma\u001b[27mn\u001b[27m \u001b[27mP\u001b[27my\u001b[27mW\u001b[27mh\u001b[27ma\u001b[27mt\u001b[27m \u001b[27mw\u001b[27mr\u001b[27mi\u001b[27mt\u001b[27mt\u001b[27me\u001b[27mn\u001b[27m \u001b[27mi\u001b[27mn\u001b[27m \u001b[27mP\u001b[27my\u001b[27mt\u001b[27mh\u001b[27mo\u001b[27mn\u001b[27m \u001b[27mf\u001b[27mo\u001b[27mr\u001b[27m \u001b[27mC\u001b"]
 6 | [3.696275, "o", "[27mi\u001b[27mp\u001b[27mh\u001b[27me\u001b[27my\u001b[27m'"]
 7 | [3.696297, "o", "\u001b[?1l\u001b>"]
 8 | [3.69645, "o", "\u001b[?2004l\r\r\n"]
 9 | [3.69795, "o", "\u001b]2;echo \u0007\u001b]1;echo\u0007"]
10 | [3.697977, "o", "ciphey uses LemmeKnow to identify 300+ different types of plaintext include API keys, IP addresses, and URLs. This is 3300% faster than PyWhat written in Python for Ciphey\r\n"]
11 | [3.6981, "o", "\u001b]2;autumnskerritt@autumns-MacBook-Air:~\u0007\u001b]1;~\u0007"]
12 | [3.718559, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
13 | [3.718714, "o", "\u001b[?1h\u001b=\u001b[?2004h"]
14 | [8.78795, "o", "\u001b[7mciphey -t '7Vqt2YuAvPvTXQTHVLjPvp4BM2ZJpZWYL'\u001b[27m"]
15 | [9.29994, "o", "\u001b[43D\u001b[27ma\u001b[27mr\u001b[27me\u001b[27ms\u001b[27m \u001b[27m-\u001b[27mt\u001b[27m \u001b[27m'\u001b[27m7\u001b[27mV\u001b[27mq\u001b[27mt\u001b[27m2\u001b[27mY\u001b[27mu\u001b[27mA\u001b[27mv\u001b[27mP\u001b[27mv\u001b[27mT\u001b[27mX\u001b[27mQ\u001b[27mT\u001b[27mH\u001b[27mV\u001b[27mL\u001b[27mj\u001b[27mP\u001b[27mv\u001b[27mp\u001b[27m4\u001b[27mB\u001b[27mM\u001b[27m2\u001b[27mZ\u001b[27mJ\u001b[27mp\u001b[27mZ\u001b[27mW\u001b[27mY\u001b[27mL\u001b[27m'"]
16 | [9.300166, "o", "\u001b[?1l\u001b>\u001b[?2004l"]
17 | [9.300189, "o", "\r\r\n"]
18 | [9.303124, "o", "\u001b]2;ciphey -t '7Vqt2YuAvPvTXQTHVLjPvp4BM2ZJpZWYL'\u0007\u001b]1;ciphey\u0007"]
19 | [9.400731, "o", "🕵️ I think the plaintext is \u001b[1;33mInternet Protocol (IP) Address Version 4\u001b[0m.\r\nPossible plaintext: '\u001b[1;33m192.168.0.1\u001b[0m' (y/N): \r\n"]
20 | [11.126176, "o", "y"]
21 | [12.293594, "o", "\r\n"]
22 | [12.293858, "o", "\r\n🥳 ciphey has decoded 123 times times.\r\nIf you would have used Ciphey, it would have taken you 24 seconds\r\n\r\nThe plaintext is: \r\n\u001b[1;33m192.168.0.1\u001b[0m\r\nand the decoders used are \u001b[1;33mBase58 Bitcoin → Base32\u001b[0m\r\n"]
23 | [12.296495, "o", "\u001b]2;autumnskerritt@autumns-MacBook-Air:~\u0007\u001b]1;~\u0007"]
24 | [12.303612, "o", "\r\u001b[0m\u001b[27m\u001b[24m\u001b[J\u001b[01;32m➜  \u001b[36m~\u001b[00m \u001b[K"]
25 | [12.303662, "o", "\u001b[?1h\u001b="]
26 | [12.303701, "o", "\u001b[?2004h"]
27 | [15.847661, "o", "\u001b[?2004l\r\r\n"]
28 | 


--------------------------------------------------------------------------------
/docs/parallel_astar_implementation_clarifications.md:
--------------------------------------------------------------------------------
  1 | # Clarifications for Parallel A* Implementation
  2 | 
  3 | This document provides additional clarifications for implementing the parallel node expansion in the A* search algorithm.
  4 | 
  5 | ## Important Implementation Details
  6 | 
  7 | ### 1. Atomic Types
  8 | 
  9 | When implementing the atomic counters and flags, make sure to import the correct atomic types:
 10 | 
 11 | ```rust
 12 | use std::sync::atomic::{AtomicBool, AtomicU32, AtomicUsize, Ordering};
 13 | ```
 14 | 
 15 | The `AtomicU32` and `AtomicUsize` types are used for `curr_depth` and `seen_count` respectively.
 16 | 
 17 | ### 2. DecoderResult Clone Implementation
 18 | 
 19 | The implementation assumes that `DecoderResult` implements `Clone`. If it doesn't, you'll need to add a clone implementation:
 20 | 
 21 | ```rust
 22 | impl Clone for DecoderResult {
 23 |     fn clone(&self) -> Self {
 24 |         DecoderResult {
 25 |             text: self.text.clone(),
 26 |             path: self.path.clone(),
 27 |         }
 28 |     }
 29 | }
 30 | ```
 31 | 
 32 | ### 3. Thread-Safe Priority Queue Usage
 33 | 
 34 | The `ThreadSafePriorityQueue` is a custom wrapper around `BinaryHeap`. Make sure to use it consistently:
 35 | 
 36 | ```rust
 37 | // Don't do this:
 38 | open_set.push(node);  // This assumes open_set is a BinaryHeap
 39 | 
 40 | // Do this instead:
 41 | open_set.push(node);  // This uses the push method of ThreadSafePriorityQueue
 42 | ```
 43 | 
 44 | ### 4. Handling the Special Result Node
 45 | 
 46 | The implementation uses a special marker `"__RESULT__"` to identify result nodes. Make sure to check for this marker when processing nodes:
 47 | 
 48 | ```rust
 49 | if let Some(decoder_name) = &node.next_decoder_name {
 50 |     if decoder_name == "__RESULT__" {
 51 |         // This is a result node, handle it accordingly
 52 |     }
 53 | }
 54 | ```
 55 | 
 56 | ### 5. Calculating Hash
 57 | 
 58 | The `calculate_hash` function uses Rust's standard hashing mechanism. Make sure to import the necessary types:
 59 | 
 60 | ```rust
 61 | use std::collections::hash_map::DefaultHasher;
 62 | use std::hash::{Hash, Hasher};
 63 | ```
 64 | 
 65 | ### 6. Static Atomic Variables
 66 | 
 67 | When defining static atomic variables, make sure to initialize them correctly:
 68 | 
 69 | ```rust
 70 | // This is correct:
 71 | static DECODED_COUNT: AtomicUsize = AtomicUsize::new(0);
 72 | 
 73 | // This is incorrect:
 74 | static DECODED_COUNT: AtomicUsize;  // Missing initialization
 75 | ```
 76 | 
 77 | ## Common Pitfalls to Avoid
 78 | 
 79 | 1. **Race Conditions**: Be careful when accessing shared data. Always use atomic operations or locks.
 80 | 
 81 | 2. **Deadlocks**: Avoid holding multiple locks at the same time to prevent deadlocks.
 82 | 
 83 | 3. **Thread Safety**: Make sure all shared data structures are thread-safe.
 84 | 
 85 | 4. **Memory Leaks**: Be careful with cloning large data structures in parallel code.
 86 | 
 87 | 5. **Performance Bottlenecks**: Avoid excessive locking, which can negate the benefits of parallelism.
 88 | 
 89 | ## Testing Tips
 90 | 
 91 | 1. Start with a small batch size (e.g., 2-3) to test the parallel implementation.
 92 | 
 93 | 2. Use simple test cases with known solutions to verify correctness.
 94 | 
 95 | 3. Add logging to track the parallel execution and identify any issues.
 96 | 
 97 | 4. Test with different input sizes to ensure the implementation scales well.
 98 | 
 99 | 5. Verify that the results are the same as the sequential implementation.
100 | 
101 | ## Debugging Parallel Code
102 | 
103 | 1. Use `println!` or logging to track the execution flow.
104 | 
105 | 2. Add assertions to verify invariants.
106 | 
107 | 3. Use atomic counters to track progress.
108 | 
109 | 4. Check for deadlocks by adding timeouts to lock acquisitions.
110 | 
111 | 5. Use thread-safe data structures consistently.
112 | 
113 | ## Example: Debugging Race Conditions
114 | 
115 | If you suspect a race condition, add logging to track the state of shared variables:
116 | 
117 | ```rust
118 | // Before:
119 | seen_strings.insert(text_hash);
120 | 
121 | // After:
122 | let inserted = seen_strings.insert(text_hash);
123 | debug!("Inserted {} into seen_strings: {}", text_hash, inserted);
124 | ```
125 | 
126 | ## Example: Verifying Thread Safety
127 | 
128 | To verify that a data structure is being accessed safely, add assertions:
129 | 
130 | ```rust
131 | // Before:
132 | open_set.push(node);
133 | 
134 | // After:
135 | open_set.push(node);
136 | debug_assert!(open_set.len() > 0, "Open set should not be empty after pushing a node");
137 | ```
138 | 
139 | ## Final Notes
140 | 
141 | - The parallel implementation should be a drop-in replacement for the sequential one.
142 | - The core A* algorithm remains the same; only the node expansion is parallelized.
143 | - The implementation uses Rayon for parallel processing, which handles the thread pool automatically.
144 | - The thread-safe data structures ensure that the algorithm remains correct when parallelized.
145 | - The special result node mechanism ensures that results are processed correctly in parallel. 


--------------------------------------------------------------------------------
/benches/benchmark_decoders.rs:
--------------------------------------------------------------------------------
  1 | use ciphey::checkers::athena::Athena;
  2 | use ciphey::checkers::checker_type::{Check, Checker};
  3 | use ciphey::checkers::CheckerTypes;
  4 | use ciphey::config::{set_global_config, Config};
  5 | use ciphey::decoders::{
  6 |     base32_decoder::Base32Decoder,
  7 |     base58_bitcoin_decoder::Base58BitcoinDecoder,
  8 |     base58_flickr_decoder::Base58FlickrDecoder,
  9 |     base64_decoder::Base64Decoder,
 10 |     binary_decoder::BinaryDecoder,
 11 |     hexadecimal_decoder::HexadecimalDecoder,
 12 |     interface::{Crack, Decoder},
 13 | };
 14 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 15 | use env_logger::Builder;
 16 | use log::LevelFilter;
 17 | use std::time::Duration;
 18 | 
 19 | // Test cases for different decoders
 20 | struct DecoderTestCase<'a> {
 21 |     encoded: &'a str,
 22 |     expected: &'a str,
 23 |     description: &'a str,
 24 | }
 25 | 
 26 | // Test data for benchmarking each decoder
 27 | const BASE64_TESTS: &[DecoderTestCase] = &[
 28 |     DecoderTestCase {
 29 |         encoded: "aGVsbG8gd29ybGQ=",
 30 |         expected: "hello world",
 31 |         description: "simple",
 32 |     },
 33 |     DecoderTestCase {
 34 |         encoded: "TXV0bGV5LCB5b3Ugc25pY2tlcmluZywgZmxvcHB5IGVhcmVkIGhvdW5kLiBXaGVuIGNvdXJhZ2UgaXMgbmVlZGVkLCB5b3XigJlyZSBuZXZlciBhcm91bmQu",
 35 |         expected: "Mutley, you snickering, floppy eared hound. When courage is needed, you're never around.",
 36 |         description: "medium",
 37 |     },
 38 | ];
 39 | 
 40 | const BASE32_TESTS: &[DecoderTestCase] = &[DecoderTestCase {
 41 |     encoded: "NBSWY3DPEB3W64TMMQ======",
 42 |     expected: "hello world",
 43 |     description: "simple",
 44 | }];
 45 | 
 46 | const HEX_TESTS: &[DecoderTestCase] = &[DecoderTestCase {
 47 |     encoded: "68656c6c6f20776f726c64",
 48 |     expected: "hello world",
 49 |     description: "simple",
 50 | }];
 51 | 
 52 | const BINARY_TESTS: &[DecoderTestCase] = &[
 53 |     DecoderTestCase {
 54 |         encoded: "01101000 01100101 01101100 01101100 01101111 00100000 01110111 01101111 01110010 01101100 01100100",
 55 |         expected: "hello world",
 56 |         description: "simple",
 57 |     },
 58 | ];
 59 | 
 60 | const BASE58_BITCOIN_TESTS: &[DecoderTestCase] = &[DecoderTestCase {
 61 |     encoded: "StV1DL6CwTryKyV",
 62 |     expected: "hello world",
 63 |     description: "simple",
 64 | }];
 65 | 
 66 | const BASE58_FLICKR_TESTS: &[DecoderTestCase] = &[DecoderTestCase {
 67 |     encoded: "rTu1dk6cWsRYjYu",
 68 |     expected: "hello world",
 69 |     description: "simple",
 70 | }];
 71 | 
 72 | pub fn benchmark_decoders(c: &mut Criterion) {
 73 |     // Initialize logger with only error level to suppress debug messages
 74 |     let mut builder = Builder::new();
 75 |     builder.filter_level(LevelFilter::Error);
 76 |     builder.init();
 77 | 
 78 |     // Setup global config to suppress output
 79 |     let mut config = Config::default();
 80 |     config.api_mode = true;
 81 |     config.verbose = 0;
 82 |     set_global_config(config);
 83 | 
 84 |     // Create a benchmark group with appropriate measurement time
 85 |     let mut group = c.benchmark_group("decoder_performance");
 86 |     group.measurement_time(Duration::from_secs(5));
 87 |     group.sample_size(50); // More samples for better statistical significance
 88 | 
 89 |     // Create a checker to use for all decoders
 90 |     let athena_checker = Checker::<Athena>::new();
 91 |     let checker = CheckerTypes::CheckAthena(athena_checker);
 92 | 
 93 |     // Base64 decoder benchmarks
 94 |     benchmark_decoder::<Base64Decoder>(&mut group, "base64", BASE64_TESTS, &checker);
 95 | 
 96 |     // Base32 decoder benchmarks
 97 |     benchmark_decoder::<Base32Decoder>(&mut group, "base32", BASE32_TESTS, &checker);
 98 | 
 99 |     // Hex decoder benchmarks
100 |     benchmark_decoder::<HexadecimalDecoder>(&mut group, "hexadecimal", HEX_TESTS, &checker);
101 | 
102 |     // Binary decoder benchmarks
103 |     benchmark_decoder::<BinaryDecoder>(&mut group, "binary", BINARY_TESTS, &checker);
104 | 
105 |     // Base58 Bitcoin decoder benchmarks
106 |     benchmark_decoder::<Base58BitcoinDecoder>(
107 |         &mut group,
108 |         "base58_bitcoin",
109 |         BASE58_BITCOIN_TESTS,
110 |         &checker,
111 |     );
112 | 
113 |     // Base58 Flickr decoder benchmarks
114 |     benchmark_decoder::<Base58FlickrDecoder>(
115 |         &mut group,
116 |         "base58_flickr",
117 |         BASE58_FLICKR_TESTS,
118 |         &checker,
119 |     );
120 | 
121 |     group.finish();
122 | }
123 | 
124 | // Generic function to benchmark any decoder with its test cases
125 | fn benchmark_decoder<T>(
126 |     group: &mut criterion::BenchmarkGroup<criterion::measurement::WallTime>,
127 |     decoder_name: &str,
128 |     test_cases: &[DecoderTestCase],
129 |     checker: &CheckerTypes,
130 | ) where
131 |     Decoder<T>: Crack,
132 | {
133 |     let decoder = Decoder::<T>::new();
134 | 
135 |     for test in test_cases {
136 |         let id = BenchmarkId::new(
137 |             format!("{}_{}", decoder_name, test.description),
138 |             test.encoded.len(),
139 |         );
140 | 
141 |         group.bench_with_input(id, test.encoded, |b, encoded| {
142 |             b.iter_batched_ref(
143 |                 || {
144 |                     let _test_db = ciphey::TestDatabase::default();
145 |                     ciphey::set_test_db_path();
146 |                 },
147 |                 |_| decoder.crack(black_box(encoded), checker),
148 |                 criterion::BatchSize::SmallInput,
149 |             )
150 |         });
151 |     }
152 | }
153 | 
154 | criterion_group!(benches, benchmark_decoders);
155 | criterion_main!(benches);
156 | 


--------------------------------------------------------------------------------
/docs/astar_decoder_specific_nodes.md:
--------------------------------------------------------------------------------
  1 | # A* Search Algorithm with Decoder-Specific Nodes
  2 | 
  3 | ## Overview
  4 | 
  5 | This document describes the implementation of the A* search algorithm with decoder-specific nodes in ciphey. This enhancement makes the search more efficient by making nodes more specific about which decoder to try next.
  6 | 
  7 | ## Background
  8 | 
  9 | The A* search algorithm is a best-first search algorithm that uses a heuristic function to prioritize which paths to explore. In the context of ciphey, the A* algorithm is used to find the correct sequence of decoders to decode encrypted or encoded text.
 10 | 
 11 | In the original implementation, each node in the search tree represented a state with a decoded text and a path of decoders used to reach that state. When expanding a node, the algorithm would try all available decoders on the decoded text, which could be inefficient.
 12 | 
 13 | ## Decoder-Specific Nodes
 14 | 
 15 | The enhanced implementation adds a `next_decoder_name` field to the `AStarNode` struct, which specifies which decoder to try when expanding the node. This makes the search more focused and efficient by avoiding the need to try all decoders for each state.
 16 | 
 17 | ### Node Structure
 18 | 
 19 | ```rust
 20 | struct AStarNode {
 21 |     /// Current state containing the decoded text and path of decoders used
 22 |     state: DecoderResult,
 23 | 
 24 |     /// Cost so far (g) - represents the depth in the search tree
 25 |     /// This increases by 1 for each decoder applied
 26 |     cost: u32,
 27 | 
 28 |     /// Heuristic value (h) - estimated cost to reach the goal
 29 |     heuristic: f32,
 30 | 
 31 |     /// Total cost (f = g + h) used for prioritization in the queue
 32 |     /// Nodes with lower total_cost are explored first
 33 |     total_cost: f32,
 34 |     
 35 |     /// The name of the next decoder to try when this node is expanded
 36 |     next_decoder_name: Option<String>,
 37 | }
 38 | ```
 39 | 
 40 | ### Node Creation
 41 | 
 42 | When creating new nodes, we set the `next_decoder_name` field to the name of the decoder that produced the result:
 43 | 
 44 | ```rust
 45 | let new_node = AStarNode {
 46 |     state: DecoderResult {
 47 |         text,
 48 |         path: decoders_used,
 49 |     },
 50 |     cost,
 51 |     heuristic,
 52 |     total_cost,
 53 |     next_decoder_name: Some(r.decoder.to_string()),
 54 | };
 55 | ```
 56 | 
 57 | This creates a node that specifies which decoder to try next. When this node is expanded, the algorithm will only try the specified decoder, rather than all available decoders.
 58 | 
 59 | ### Node Expansion
 60 | 
 61 | When expanding a node, we check if it has a specific decoder name, and if so, we filter the available decoders to only include that one:
 62 | 
 63 | ```rust
 64 | // Determine which decoders to use based on next_decoder_name
 65 | let mut decoders;
 66 | if let Some(decoder_name) = &current_node.next_decoder_name {
 67 |     // If we have a specific decoder name, filter all decoders to only include that one
 68 |     trace!("Using specific decoder: {}", decoder_name);
 69 |     let mut all_decoders = get_all_decoders();
 70 |     all_decoders.components.retain(|d| d.get_name() == decoder_name);
 71 |     
 72 |     // Update stats for the decoder
 73 |     if !all_decoders.components.is_empty() {
 74 |         update_decoder_stats(decoder_name, true);
 75 |     }
 76 |     decoders = all_decoders;
 77 | } else {
 78 |     decoders = get_decoder_tagged_decoders(&current_node.state);
 79 | }
 80 | ```
 81 | 
 82 | This approach avoids the need to clone trait objects, which would be required if we stored the decoder itself in the node.
 83 | 
 84 | ## Benefits
 85 | 
 86 | The decoder-specific nodes approach has several benefits:
 87 | 
 88 | 1. **More Efficient Search**: By specifying which decoder to try next, the algorithm can focus on the most promising paths and avoid wasting time on unpromising ones.
 89 | 
 90 | 2. **Better Heuristic Guidance**: The heuristic function can now guide not just which node to expand next, but also which decoder to try from that node.
 91 | 
 92 | 3. **Reduced Computational Overhead**: The algorithm no longer needs to try all decoders for each state, which can significantly reduce the computational overhead.
 93 | 
 94 | 4. **Improved Scalability**: As the number of decoders increases, the efficiency gains from decoder-specific nodes become more significant.
 95 | 
 96 | ## Limitations and Future Work
 97 | 
 98 | While the decoder-specific nodes approach improves the efficiency of the A* search algorithm, there are still some limitations and areas for future work:
 99 | 
100 | 1. **Trait Limitations**: The current implementation uses the decoder's name to identify it, rather than storing the decoder itself, due to limitations with cloning trait objects. A more elegant solution would be to modify the `Crack` trait to support cloning.
101 | 
102 | 2. **Heuristic Function**: The heuristic function has been improved to use the decoder's popularity, an adaptive depth penalty, and a string quality component. It could be further enhanced to learn from past successes.
103 | 
104 | 3. **Parallel Expansion**: The algorithm could be modified to expand multiple nodes in parallel, which would take advantage of multi-core processors.
105 | 
106 | 4. **Memory Optimization**: As the search progresses, the number of nodes in the open set can grow significantly. Techniques like beam search or pruning could be used to limit the memory usage.
107 | 
108 | ## Conclusion
109 | 
110 | The A* search algorithm with decoder-specific nodes is a significant improvement over the original implementation. By making nodes more specific about which decoder to try next, the algorithm can more efficiently find the correct sequence of decoders to decode encrypted or encoded text.


--------------------------------------------------------------------------------
/docs/database_implementation.md:
--------------------------------------------------------------------------------
  1 | # SQLite Database Implementation for ciphey
  2 | 
  3 | ## Database Location
  4 | 
  5 | The SQLite database will be stored at: `$HOME_DIR/ciphey/database.sqlite`
  6 | 
  7 | ```rust
  8 | use std::env;
  9 | use std::path::PathBuf;
 10 | 
 11 | fn get_database_path() -> PathBuf {
 12 |     let mut path = env::home_dir().expect("Could not find home directory");
 13 |     path.push("ciphey");
 14 |     path.push("database.sqlite");
 15 |     path
 16 | }
 17 | ```
 18 | 
 19 | ## Schema Design
 20 | 
 21 | ### Cache Table
 22 | ```sql
 23 | CREATE TABLE IF NOT EXISTS cache (
 24 |     id INTEGER PRIMARY KEY AUTOINCREMENT,
 25 |     encoded_text TEXT NOT NULL,
 26 |     decoded_text TEXT NOT NULL,
 27 |     path JSON NOT NULL,        -- Stores Vec<CrackResult> as JSON
 28 |     successful BOOLEAN NOT NULL DEFAULT true,
 29 |     execution_time_ms INTEGER NOT NULL,
 30 |     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
 31 | );
 32 | 
 33 | CREATE INDEX IF NOT EXISTS idx_cache_encoded_text ON cache(encoded_text);
 34 | ```
 35 | 
 36 | ### Statistics Table
 37 | ```sql
 38 | CREATE TABLE IF NOT EXISTS statistics (
 39 |     id INTEGER PRIMARY KEY AUTOINCREMENT,
 40 |     run_id TEXT NOT NULL,      -- UUID for grouping stats from same run
 41 |     decoder_name TEXT NOT NULL,
 42 |     success_count INTEGER NOT NULL,
 43 |     total_attempts INTEGER NOT NULL,
 44 |     search_depth INTEGER NOT NULL,
 45 |     seen_strings_count INTEGER NOT NULL,
 46 |     prune_threshold INTEGER NOT NULL,
 47 |     max_memory_kb INTEGER NOT NULL,
 48 |     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
 49 | );
 50 | 
 51 | CREATE INDEX IF NOT EXISTS idx_stats_run_id ON statistics(run_id);
 52 | CREATE INDEX IF NOT EXISTS idx_stats_decoder ON statistics(decoder_name);
 53 | ```
 54 | 
 55 | ## Operation Flow
 56 | 
 57 | ### 1. Startup Operations
 58 | 
 59 | ```mermaid
 60 | sequenceDiagram
 61 |     participant Main as Main Program
 62 |     participant DB as Database
 63 |     participant A* as A* Search
 64 |     
 65 |     Main->>DB: Check cache for encoded text
 66 |     alt Found in cache
 67 |         DB-->>Main: Return cached result
 68 |         Main->>Main: Use cached solution
 69 |     else Not found
 70 |         DB->>DB: Calculate average statistics
 71 |         DB-->>A*: Initialize with historical stats
 72 |         A*->>A*: Begin search
 73 |     end
 74 | ```
 75 | 
 76 | The program performs two important database operations at startup:
 77 | 1. Cache lookup to check if we've seen this encoded string before
 78 | 2. Statistics calculation to inform A* search parameters:
 79 |    - Average success rates per decoder
 80 |    - Typical search depths
 81 |    - Common successful decoder sequences
 82 | 
 83 | ### 2. Database Writes
 84 | 
 85 | Database writes occur only after A* search completes successfully:
 86 | 1. When `astar()` returns with a successful result
 87 | 2. Before the program exits
 88 | 3. Inside a transaction to ensure both cache and statistics are written atomically
 89 | 4. Only successful runs are recorded to maintain data quality
 90 | 
 91 | ## Implementation Steps
 92 | 
 93 | ### 1. Add Dependencies
 94 | ```toml
 95 | [dependencies]
 96 | rusqlite = { version = "0.29", features = ["bundled"] }
 97 | serde_json = "1.0"
 98 | uuid = { version = "1.0", features = ["v4"] }
 99 | directories = "5.0"        # For managing app directories
100 | ```
101 | 
102 | ### 2. Create Database Module
103 | - Location: `src/storage/database.rs`
104 | - Responsibilities:
105 |   * Database initialization
106 |   * Connection management
107 |   * Schema creation
108 |   * CRUD operations
109 | 
110 | ### 3. Modify Main Program
111 | - Add startup database operations
112 | - Add run_id generation
113 | - Add JSON serialization for paths
114 | - Write results only after successful completion
115 | 
116 | ### 4. Database Operations Flow
117 | 
118 | ```mermaid
119 | sequenceDiagram
120 |     participant Main as Main Program
121 |     participant A* as A* Search
122 |     participant DB as Database Module
123 |     participant SQLite as SQLite DB
124 |     
125 |     Main->>DB: Check cache
126 |     DB-->>Main: Cache results
127 |     Main->>DB: Get historical stats
128 |     DB-->>A*: Initialize with stats
129 |     A*->>A*: Start search
130 |     A*->>A*: Generate run_id
131 |     
132 |     loop During Search
133 |         A*->>A*: Track statistics
134 |     end
135 |     alt Successful Decode
136 |         A*->>DB: Write to cache
137 |         A*->>DB: Write statistics
138 |         DB->>SQLite: Insert records
139 |     end
140 | ```
141 | 
142 | ### 6. Testing Strategy
143 | 
144 | 1. Unit Tests
145 |    - Database connection
146 |    - Schema creation 
147 |    - CRUD operations
148 |    - Error handling
149 | 
150 | 2. Integration Tests
151 |    - Full A* search with database writes
152 |    - Data persistence verification
153 |    - Concurrent access handling
154 | 
155 | ### 7. Migration Plan
156 | 
157 | 1. Create database directory if not exists
158 | 2. Initialize schema on first run
159 | 3. Add version table for future schema migrations
160 | 
161 | ## Usage Example
162 | 
163 | ```rust
164 | // In main.rs
165 | 
166 | let run_id = Uuid::new_v4().to_string();
167 | let db = Database::new()?;
168 | 
169 | // After successful decode
170 | if let Some(result) = final_result {
171 |     db.insert_cache(&CacheEntry {
172 |         encoded_text: input,
173 |         decoded_text: result.text,
174 |         path: serde_json::to_string(&result.path)?,
175 |         execution_time_ms: duration.as_millis() as i64,
176 |     })?;
177 | 
178 |     db.insert_statistics(&StatisticsEntry {
179 |         run_id: &run_id,
180 |         decoder_stats: &decoder_stats,
181 |         search_depth: curr_depth,
182 |         seen_strings_count: seen_count,
183 |         prune_threshold,
184 |         max_memory_kb: get_memory_usage()?,
185 |     })?;
186 | }
187 | 


--------------------------------------------------------------------------------
/src/checkers/wordlist.rs:
--------------------------------------------------------------------------------
  1 | use crate::checkers::checker_result::CheckResult;
  2 | use crate::checkers::checker_type::{Check, Checker};
  3 | use crate::config::get_config;
  4 | use gibberish_or_not::Sensitivity;
  5 | use lemmeknow::Identifier;
  6 | use log::trace;
  7 | #[cfg(test)]
  8 | use std::collections::HashSet;
  9 | 
 10 | /// WordlistChecker checks if the input text exactly matches any word in a user-provided wordlist
 11 | pub struct WordlistChecker;
 12 | 
 13 | impl Check for Checker<WordlistChecker> {
 14 |     fn new() -> Self {
 15 |         Checker {
 16 |             name: "Wordlist Checker",
 17 |             description:
 18 |                 "Checks if the input text exactly matches any word in a user-provided wordlist",
 19 |             link: "",
 20 |             tags: vec!["wordlist", "exact-match"],
 21 |             expected_runtime: 0.01,
 22 |             popularity: 1.0,
 23 |             lemmeknow_config: Identifier::default(),
 24 |             sensitivity: Sensitivity::Medium, // Dummy value - not used by this checker
 25 |             enhanced_detector: None,
 26 |             _phantom: std::marker::PhantomData,
 27 |         }
 28 |     }
 29 | 
 30 |     fn check(&self, text: &str) -> CheckResult {
 31 |         let config = get_config();
 32 | 
 33 |         // Only run this checker if a wordlist is provided
 34 |         if let Some(wordlist) = &config.wordlist {
 35 |             trace!("Running wordlist checker with {} entries", wordlist.len());
 36 | 
 37 |             // Perform exact matching against the wordlist
 38 |             let is_match = wordlist.contains(text);
 39 | 
 40 |             if is_match {
 41 |                 trace!("Found exact match in wordlist for: {}", text);
 42 |                 let mut result = CheckResult::new(self);
 43 |                 result.is_identified = true;
 44 |                 result.text = text.to_string();
 45 |                 result.description =
 46 |                     "text which matches an entry in the provided wordlist".to_string();
 47 |                 return result;
 48 |             }
 49 | 
 50 |             trace!("No match found in wordlist for: {}", text);
 51 |         } else {
 52 |             trace!("Wordlist checker skipped - no wordlist provided");
 53 |         }
 54 | 
 55 |         // No match found or no wordlist provided
 56 |         CheckResult::new(self)
 57 |     }
 58 | 
 59 |     fn with_sensitivity(mut self, sensitivity: Sensitivity) -> Self {
 60 |         // Wordlist checker doesn't use sensitivity, but we need to implement this method
 61 |         // to satisfy the Check trait. The sensitivity value is stored but not used.
 62 |         self.sensitivity = sensitivity;
 63 |         self
 64 |     }
 65 | 
 66 |     fn get_sensitivity(&self) -> Sensitivity {
 67 |         // Return the stored sensitivity value, though it's not used for checking
 68 |         self.sensitivity
 69 |     }
 70 | }
 71 | 
 72 | // Extension methods for testing
 73 | #[cfg(test)]
 74 | impl Checker<WordlistChecker> {
 75 |     /// Check with a directly provided wordlist (for testing)
 76 |     fn check_with_wordlist(&self, text: &str, wordlist: &HashSet<String>) -> CheckResult {
 77 |         trace!("Running wordlist checker with {} entries", wordlist.len());
 78 | 
 79 |         // Perform exact matching against the wordlist
 80 |         let is_match = wordlist.contains(text);
 81 | 
 82 |         if is_match {
 83 |             trace!("Found exact match in wordlist for: {}", text);
 84 |             let mut result = CheckResult::new(self);
 85 |             result.is_identified = true;
 86 |             result.text = text.to_string();
 87 |             result.description = "Text matches an entry in the provided wordlist".to_string();
 88 |             return result;
 89 |         }
 90 | 
 91 |         trace!("No match found in wordlist for: {}", text);
 92 | 
 93 |         // No match found
 94 |         CheckResult::new(self)
 95 |     }
 96 | }
 97 | 
 98 | #[cfg(test)]
 99 | mod tests {
100 |     use super::*;
101 |     use std::collections::HashSet;
102 | 
103 |     #[test]
104 |     fn test_wordlist_match() {
105 |         // Create a test wordlist
106 |         let mut wordlist = HashSet::new();
107 |         wordlist.insert("password123".to_string());
108 |         wordlist.insert("hello".to_string());
109 |         wordlist.insert("test".to_string());
110 | 
111 |         // Create checker and test
112 |         let checker = Checker::<WordlistChecker>::new();
113 | 
114 |         // Print debug info to help diagnose the issue
115 |         println!("Testing with wordlist containing: password123, hello, test");
116 | 
117 |         // Should match
118 |         let result = checker.check_with_wordlist("hello", &wordlist);
119 |         println!("Result for 'hello': is_identified={}", result.is_identified);
120 |         assert!(result.is_identified);
121 | 
122 |         // Should not match
123 |         let result = checker.check_with_wordlist("goodbye", &wordlist);
124 |         println!(
125 |             "Result for 'goodbye': is_identified={}",
126 |             result.is_identified
127 |         );
128 |         assert!(!result.is_identified);
129 |     }
130 | 
131 |     #[test]
132 |     fn test_no_wordlist() {
133 |         // Create an empty wordlist
134 |         let wordlist = HashSet::new();
135 | 
136 |         // Create checker and test
137 |         let checker = Checker::<WordlistChecker>::new();
138 | 
139 |         // Print debug info
140 |         println!("Testing with empty wordlist");
141 | 
142 |         // Should not match anything when no wordlist is provided
143 |         let result = checker.check_with_wordlist("hello", &wordlist);
144 |         println!(
145 |             "Result for 'hello' with empty wordlist: is_identified={}",
146 |             result.is_identified
147 |         );
148 |         assert!(!result.is_identified);
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/src/decoders/braille_decoder.rs:
--------------------------------------------------------------------------------
  1 | use super::crack_results::CrackResult;
  2 | use super::interface::Crack;
  3 | use super::interface::Decoder;
  4 | use crate::checkers::CheckerTypes;
  5 | 
  6 | use log::trace;
  7 | use std::collections::HashMap;
  8 | 
  9 | /// Braille Decoder
 10 | pub struct BrailleDecoder;
 11 | 
 12 | impl Crack for Decoder<BrailleDecoder> {
 13 |     fn new() -> Decoder<BrailleDecoder> {
 14 |         Decoder {
 15 |             name: "Braille",
 16 |             description: "Braille is a tactile writing system used by people who are visually impaired. It consists of raised dots arranged in cells of up to six dots in a 3×2 pattern.",
 17 |             link: "https://en.wikipedia.org/wiki/Braille",
 18 |             tags: vec!["braille", "substitution", "decoder"],
 19 |             popularity: 0.8,
 20 |             phantom: std::marker::PhantomData,
 21 |         }
 22 |     }
 23 | 
 24 |     fn crack(&self, text: &str, checker: &CheckerTypes) -> CrackResult {
 25 |         trace!("Trying braille with text {:?}", text);
 26 |         let mut results = CrackResult::new(self, text.to_string());
 27 | 
 28 |         if text.is_empty() {
 29 |             return results;
 30 |         }
 31 | 
 32 |         let decoded_text = braille_to_text(text);
 33 | 
 34 |         // check if the decoder transformed the input
 35 |         if decoded_text == text {
 36 |             return results; // unencrypted text is already None by default
 37 |         }
 38 | 
 39 |         let checker_result = checker.check(&decoded_text);
 40 |         if checker_result.is_identified {
 41 |             trace!("Found a match with braille");
 42 |             results.unencrypted_text = Some(vec![decoded_text]);
 43 |             results.update_checker(&checker_result);
 44 |             return results;
 45 |         }
 46 | 
 47 |         results.unencrypted_text = Some(vec![decoded_text]);
 48 |         results
 49 |     }
 50 | 
 51 |     fn get_tags(&self) -> &Vec<&str> {
 52 |         &self.tags
 53 |     }
 54 | 
 55 |     fn get_name(&self) -> &str {
 56 |         self.name
 57 |     }
 58 | 
 59 |     /// Gets the description for the current decoder
 60 |     fn get_description(&self) -> &str {
 61 |         self.description
 62 |     }
 63 | 
 64 |     /// Gets the link for the current decoder
 65 |     fn get_link(&self) -> &str {
 66 |         self.link
 67 |     }
 68 | }
 69 | 
 70 | /// Converts Braille Unicode characters to their corresponding Latin alphabet characters
 71 | ///
 72 | /// This function maps each Braille character to its corresponding Latin letter
 73 | /// and returns the decoded text as a String.
 74 | fn braille_to_text(text: &str) -> String {
 75 |     let mut mapping = HashMap::new();
 76 |     mapping.insert('⠁', 'a');
 77 |     mapping.insert('⠃', 'b');
 78 |     mapping.insert('⠉', 'c');
 79 |     mapping.insert('⠙', 'd');
 80 |     mapping.insert('⠑', 'e');
 81 |     mapping.insert('⠋', 'f');
 82 |     mapping.insert('⠛', 'g');
 83 |     mapping.insert('⠓', 'h');
 84 |     mapping.insert('⠊', 'i');
 85 |     mapping.insert('⠚', 'j');
 86 |     mapping.insert('⠅', 'k');
 87 |     mapping.insert('⠇', 'l');
 88 |     mapping.insert('⠍', 'm');
 89 |     mapping.insert('⠝', 'n');
 90 |     mapping.insert('⠕', 'o');
 91 |     mapping.insert('⠏', 'p');
 92 |     mapping.insert('⠟', 'q');
 93 |     mapping.insert('⠗', 'r');
 94 |     mapping.insert('⠎', 's');
 95 |     mapping.insert('⠞', 't');
 96 |     mapping.insert('⠥', 'u');
 97 |     mapping.insert('⠧', 'v');
 98 |     mapping.insert('⠺', 'w');
 99 |     mapping.insert('⠭', 'x');
100 |     mapping.insert('⠽', 'y');
101 |     mapping.insert('⠵', 'z');
102 |     mapping.insert('⠀', ' ');
103 | 
104 |     text.chars()
105 |         .map(|c| *mapping.get(&c).unwrap_or(&c))
106 |         .collect::<String>()
107 | }
108 | 
109 | #[cfg(test)]
110 | mod tests {
111 |     use super::BrailleDecoder;
112 |     use crate::{
113 |         checkers::{
114 |             athena::Athena,
115 |             checker_type::{Check, Checker},
116 |             CheckerTypes,
117 |         },
118 |         decoders::interface::{Crack, Decoder},
119 |     };
120 | 
121 |     // helper for tests
122 |     fn get_athena_checker() -> CheckerTypes {
123 |         let athena_checker = Checker::<Athena>::new();
124 |         CheckerTypes::CheckAthena(athena_checker)
125 |     }
126 | 
127 |     #[test]
128 |     fn braille_decodes_successfully() {
129 |         let braille_decoder = Decoder::<BrailleDecoder>::new();
130 |         let result = braille_decoder.crack("⠓⠑⠇⠇⠕⠀⠺⠕⠗⠇⠙", &get_athena_checker());
131 |         assert_eq!(result.unencrypted_text.unwrap()[0], "hello world");
132 |     }
133 | 
134 |     #[test]
135 |     fn braille_handles_panic_if_empty_string() {
136 |         let braille_decoder = Decoder::<BrailleDecoder>::new();
137 |         let result = braille_decoder
138 |             .crack("", &get_athena_checker())
139 |             .unencrypted_text;
140 |         assert!(result.is_none());
141 |     }
142 | 
143 |     #[test]
144 |     fn test_braille_long_sentence() {
145 |         let braille_decoder = Decoder::<BrailleDecoder>::new();
146 |         let test_string = "⠓⠑⠇⠇⠕⠀⠍⠽⠀⠝⠁⠍⠑⠀⠊⠎⠀⠃⠑⠑⠀⠁⠝⠙⠀⠊⠀⠇⠊⠅⠑⠀⠙⠕⠛⠀⠁⠝⠙⠀⠁⠏⠏⠇⠑⠀⠁⠝⠙⠀⠞⠗⠑⠑";
147 |         let expected = "hello my name is bee and i like dog and apple and tree";
148 | 
149 |         let result = braille_decoder.crack(test_string, &get_athena_checker());
150 | 
151 |         assert!(result.unencrypted_text.is_some());
152 |         assert_eq!(result.unencrypted_text.unwrap()[0].to_lowercase(), expected);
153 |     }
154 | 
155 |     #[test]
156 |     fn test_braille_handles_invalid_chars() {
157 |         let braille_decoder = Decoder::<BrailleDecoder>::new();
158 |         let result = braille_decoder
159 |             .crack("123ABC", &get_athena_checker())
160 |             .unencrypted_text;
161 |         assert!(result.is_none());
162 |     }
163 | 
164 |     #[test]
165 |     fn test_braille_handles_mixed_content() {
166 |         let braille_decoder = Decoder::<BrailleDecoder>::new();
167 |         let result = braille_decoder
168 |             .crack("⠓⠑⠇⠇⠕123", &get_athena_checker())
169 |             .unencrypted_text;
170 |         assert!(result.is_some());
171 |         assert_eq!(result.unwrap()[0], "hello123");
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/docs/changes/2024-07-10-astar-simplified-heuristic-rewrite.md:
--------------------------------------------------------------------------------
  1 | # Plan: A* Search Algorithm Heuristic Rewrite for Ciphey
  2 | 
  3 | ## Overview
  4 | 
  5 | This document outlines the plan to rewrite the heuristic function in the A* search algorithm for ciphey. The goal is to improve the algorithm's efficiency by focusing on more promising decoding paths through:
  6 | 
  7 | 1. Using a positive heuristic for decoder popularity
  8 | 2. Adding a penalty for search depth
  9 | 3. Maintaining the penalty for uncommon decoder sequences
 10 | 4. Removing the CipherIdentifier dependency
 11 | 
 12 | ## Current Implementation Analysis
 13 | 
 14 | The current heuristic function in `src/searchers/helper_functions.rs` uses a complex system of multipliers:
 15 | - Gets a base score from the cipher identifier
 16 | - Applies penalties for uncommon sequences (1.75x multiplier)
 17 | - Applies penalties for low success rates
 18 | - Applies penalties for low popularity
 19 | - Applies penalties for low quality strings
 20 | - Applies penalties for non-printable characters
 21 | 
 22 | This approach has several drawbacks:
 23 | - The multipliers compound, leading to exponential growth in the heuristic value
 24 | - It's difficult to understand the contribution of each component
 25 | - It doesn't explicitly account for search depth
 26 | - It relies on CipherIdentifier, which adds complexity
 27 | 
 28 | ## Proposed Heuristic Function
 29 | 
 30 | We'll simplify the heuristic to focus on three key components:
 31 | 
 32 | ```rust
 33 | fn generate_heuristic(text: &str, path: &[CrackResult], next_decoder: &Option<Box<dyn Crack + Sync>>) -> f32 {
 34 |     let mut base_score = 0.0;
 35 | 
 36 |     // 1. Popularity component - directly use (1.0 - popularity)
 37 |     if let Some(decoder) = next_decoder {
 38 |         base_score += (1.0 - decoder.popularity);
 39 |     } else {
 40 |         // If next decoder is None, add a moderate penalty
 41 |         base_score += 0.5;
 42 |     }
 43 | 
 44 |     // 2. Depth penalty - exponential growth but not too aggressive
 45 |     base_score += (0.05 * path.len() as f32).powi(2);
 46 | 
 47 |     // 3. Penalty for uncommon pairings
 48 |     if path.len() > 1 {
 49 |         if let Some(previous_decoder) = path.last() {
 50 |             if let Some(next_decoder) = next_decoder {
 51 |                 if !is_common_sequence(previous_decoder.decoder, next_decoder.get_name()) {
 52 |                     base_score += 0.25;
 53 |                 }
 54 |             }
 55 |         }
 56 |     }
 57 | 
 58 |     base_score
 59 | }
 60 | ```
 61 | 
 62 | ## Component Analysis
 63 | 
 64 | ### 1. Popularity Component
 65 | 
 66 | We'll use `(1.0 - decoder.popularity)` directly:
 67 | - A decoder with popularity 1.0 (most popular) would add 0.0 to the heuristic
 68 | - A decoder with popularity 0.0 (least popular) would add 1.0 to the heuristic
 69 | 
 70 | This makes more popular decoders more likely to be explored first.
 71 | 
 72 | ### 2. Depth Penalty
 73 | 
 74 | We'll use a moderate exponential function: `(0.05 * path.len() as f32).powi(2)`
 75 | 
 76 | This results in:
 77 | - Depth 5: 0.0625 penalty
 78 | - Depth 10: 0.25 penalty
 79 | - Depth 20: 1.0 penalty
 80 | - Depth 30: 2.25 penalty
 81 | 
 82 | This exponential growth ensures that very deep paths are significantly penalized, but it's not so aggressive that it prevents exploring moderately deep paths that might be valid solutions.
 83 | 
 84 | ### 3. Uncommon Sequence Penalty
 85 | 
 86 | We'll maintain the existing penalty approach, adding 0.25 to the heuristic for uncommon sequences:
 87 | - This makes common sequences relatively more attractive
 88 | - It maintains non-negative heuristic values (important for A* correctness)
 89 | - It's consistent with the rest of the heuristic design
 90 | 
 91 | ## Visual Representation
 92 | 
 93 | ```mermaid
 94 | graph TD
 95 |     A[Heuristic Score] --> B[Popularity Component]
 96 |     A --> C[Depth Penalty]
 97 |     A --> D[Uncommon Sequence Penalty]
 98 |     
 99 |     B --> B1[1.0 - decoder.popularity]
100 |     B --> B2[No decoder: +0.5 penalty]
101 |     
102 |     C --> C1["(0.05 * depth)²"]
103 |     
104 |     D --> D1[Common sequence: No penalty]
105 |     D --> D2[Uncommon sequence: +0.25]
106 | ```
107 | 
108 | ## Implementation Steps
109 | 
110 | 1. Update the `generate_heuristic` function signature to include the `next_decoder` parameter:
111 |    ```rust
112 |    fn generate_heuristic(text: &str, path: &[CrackResult], next_decoder: &Option<Box<dyn Crack + Sync>>) -> f32
113 |    ```
114 | 
115 | 2. Implement the simplified heuristic logic as outlined above
116 | 
117 | 3. Update all calls to `generate_heuristic` in `src/searchers/astar.rs` to pass the appropriate `next_decoder` parameter
118 | 
119 | 4. Remove any CipherIdentifier-related code that's no longer needed
120 | 
121 | 5. Update tests to reflect the new heuristic behavior
122 | 
123 | ## Expected Benefits
124 | 
125 | 1. **Improved Efficiency**: By prioritizing more popular decoders, we'll likely find solutions faster in common cases
126 | 
127 | 2. **Better Path Exploration**: The depth penalty will prevent the algorithm from going too deep in unproductive paths
128 | 
129 | 3. **Simplified Logic**: The new heuristic is more straightforward and easier to understand/maintain
130 | 
131 | 4. **More Predictable Behavior**: The additive approach makes it easier to understand how each component affects the overall heuristic
132 | 
133 | ## Potential Risks and Mitigations
134 | 
135 | 1. **Risk**: The depth penalty might be too aggressive or too lenient
136 |    **Mitigation**: The coefficient (0.05) can be adjusted based on testing
137 | 
138 | 2. **Risk**: Removing CipherIdentifier might reduce accuracy for certain cipher types
139 |    **Mitigation**: The popularity-based approach should still work well for most cases, and we can revisit if needed
140 | 
141 | 3. **Risk**: The penalty for uncommon sequences might be too high or too low
142 |    **Mitigation**: The value (0.25) can be adjusted based on testing
143 | 
144 | ## Conclusion
145 | 
146 | This simplified heuristic approach focuses on the most important factors for efficient A* search in the context of decoding:
147 | - Popularity of decoders
148 | - Depth of the search tree
149 | - Common decoder sequences
150 | 
151 | By removing the complexity of the current implementation and focusing on these key factors, we expect to improve both the efficiency and maintainability of the A* search algorithm in ciphey.


--------------------------------------------------------------------------------
/src/searchers/mod.rs:
--------------------------------------------------------------------------------
  1 | //! The search algorithm decides what encryptions to do next
  2 | //! And also runs the decryption modules
  3 | //! Click here to find out more:
  4 | //! https://broadleaf-angora-7db.notion.site/Search-Nodes-Edges-What-should-they-look-like-b74c43ca7ac341a1a5cfdbeb84a7eef0
  5 | 
  6 | use std::sync::atomic::AtomicBool;
  7 | use std::sync::Arc;
  8 | use std::thread;
  9 | 
 10 | use crossbeam::channel::bounded;
 11 | 
 12 | use crate::checkers::athena::Athena;
 13 | use crate::checkers::checker_type::{Check, Checker};
 14 | use crate::checkers::CheckerTypes;
 15 | use crate::config::get_config;
 16 | use crate::filtration_system::{filter_and_get_decoders, MyResults};
 17 | use crate::{timer, DecoderResult};
 18 | /// This module provides access to the A* search algorithm
 19 | /// which uses a heuristic to prioritize decoders.
 20 | mod astar;
 21 | /// This module provides access to the breadth first search
 22 | /// which searches for the plaintext.
 23 | mod bfs;
 24 | /// This module contains helper functions used by the A* search algorithm.
 25 | mod helper_functions;
 26 | 
 27 | /*pub struct Tree <'a> {
 28 |     // Wrap in a box because
 29 |     // https://doc.rust-lang.org/error-index.html#E0072
 30 |     parent: &'a Box<Option<Tree<'a>>>,
 31 |     value: String
 32 | }*/
 33 | 
 34 | /// Performs the search algorithm.
 35 | ///
 36 | /// When we perform the decryptions, we will get a vector of Some<String>
 37 | /// We need to loop through these and determine:
 38 | /// 1. Did we reach our exit condition?
 39 | /// 2. If not, create new nodes out of them and add them to the queue.
 40 | ///
 41 | ///    We can return an Option? An Enum? And then match on that
 42 | ///    So if we return CrackSuccess we return
 43 | ///    Else if we return an array, we add it to the children and go again.
 44 | pub fn search_for_plaintext(input: String) -> Option<DecoderResult> {
 45 |     let config = get_config();
 46 |     let timeout = config.timeout;
 47 |     let timer = timer::start(timeout);
 48 | 
 49 |     let (result_sender, result_recv) = bounded::<Option<DecoderResult>>(1);
 50 |     // For stopping the thread
 51 |     let stop = Arc::new(AtomicBool::new(false));
 52 |     let s = stop.clone();
 53 |     // Use A* search algorithm instead of BFS
 54 |     let handle = thread::spawn(move || astar::astar(input, result_sender, s));
 55 | 
 56 |     // In top_results mode, we don't need to return a result immediately
 57 |     // as the timer will display all results when it expires
 58 |     let top_results_mode = config.top_results;
 59 | 
 60 |     // If we're in top_results mode, we'll store the first result to return
 61 |     // at the end of the timer
 62 |     let mut first_result = None;
 63 | 
 64 |     loop {
 65 |         if let Ok(res) = result_recv.try_recv() {
 66 |             log::info!("Found potential plaintext result");
 67 |             log::trace!("Result details: {:?}", res);
 68 | 
 69 |             // In top_results mode, we store the first result but don't stop the search
 70 |             if top_results_mode {
 71 |                 if first_result.is_none() {
 72 |                     first_result = res;
 73 |                 }
 74 |                 // Continue searching for more results
 75 |             } else {
 76 |                 // In normal mode, we stop the search and return the result
 77 |                 stop.store(true, std::sync::atomic::Ordering::Relaxed);
 78 |                 // Wait for the thread to finish
 79 |                 handle.join().unwrap();
 80 |                 return res;
 81 |             }
 82 |         }
 83 | 
 84 |         if timer.try_recv().is_ok() {
 85 |             stop.store(true, std::sync::atomic::Ordering::Relaxed);
 86 |             log::info!("Search timer expired");
 87 |             // Wait for the thread to finish to ensure any ongoing human checker interaction completes
 88 |             handle.join().unwrap();
 89 | 
 90 |             // In top_results mode, return the first result we found (if any)
 91 |             if top_results_mode {
 92 |                 return first_result;
 93 |             }
 94 | 
 95 |             return None;
 96 |         }
 97 | 
 98 |         // Small sleep to prevent CPU spinning
 99 |         std::thread::sleep(std::time::Duration::from_millis(10));
100 |     }
101 | }
102 | 
103 | /// Performs the decodings by getting all of the decoders
104 | /// and calling `.run` which in turn loops through them and calls
105 | /// `.crack()`.
106 | #[allow(dead_code)]
107 | fn perform_decoding(text: &DecoderResult) -> MyResults {
108 |     let decoders = filter_and_get_decoders(text);
109 |     let athena_checker = Checker::<Athena>::new();
110 |     let checker = CheckerTypes::CheckAthena(athena_checker);
111 |     decoders.run(&text.text[0], checker)
112 | }
113 | 
114 | #[cfg(test)]
115 | mod tests {
116 |     use super::*;
117 | 
118 |     // https://github.com/bee-san/ciphey/pull/14/files#diff-b8829c7e292562666c7fa5934de7b478c4a5de46d92e42c46215ac4d9ff89db2R37
119 |     // Only used for tests!
120 |     fn exit_condition(input: &str) -> bool {
121 |         // use Athena Checker from checkers module
122 |         // call check(input)
123 |         let athena_checker = Checker::<Athena>::new();
124 |         let checker = CheckerTypes::CheckAthena(athena_checker);
125 |         checker.check(input).is_identified
126 |     }
127 | 
128 |     #[test]
129 |     fn exit_condition_succeeds() {
130 |         let result = exit_condition("https://www.google.com");
131 |         assert!(result);
132 |     }
133 |     #[test]
134 |     fn exit_condition_fails() {
135 |         let result = exit_condition("vjkrerkdnxhrfjekfdjexk");
136 |         assert!(!result);
137 |     }
138 | 
139 |     #[test]
140 |     fn perform_decoding_succeeds() {
141 |         let dc = DecoderResult::_new("aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbQ==");
142 |         let result = perform_decoding(&dc);
143 |         assert!(
144 |             result
145 |                 ._break_value()
146 |                 .expect("expected successful value, none found")
147 |                 .success
148 |         );
149 |         //TODO assert that the plaintext is correct by looping over the vector
150 |     }
151 |     #[test]
152 |     fn perform_decoding_succeeds_empty_string() {
153 |         // Some decoders like base64 return even when the string is empty.
154 |         let dc = DecoderResult::_new("");
155 |         let result = perform_decoding(&dc);
156 |         assert!(result._break_value().is_none());
157 |     }
158 | }
159 | 


--------------------------------------------------------------------------------
/tests/integration_test.rs:
--------------------------------------------------------------------------------
  1 | use ciphey::checkers::checker_result::CheckResult;
  2 | use ciphey::checkers::checker_type::{Check, Checker};
  3 | use ciphey::checkers::english::EnglishChecker;
  4 | use ciphey::config::Config;
  5 | use ciphey::decoders::base64_decoder::Base64Decoder;
  6 | use ciphey::decoders::crack_results::CrackResult;
  7 | use ciphey::decoders::interface::{Crack, Decoder};
  8 | use ciphey::perform_cracking;
  9 | use ciphey::storage::database;
 10 | use ciphey::{set_test_db_path, TestDatabase};
 11 | use serial_test::{parallel, serial};
 12 | use uuid::Uuid;
 13 | 
 14 | // TODO Below fails because Library API is broken.
 15 | // https://github.com/bee-san/ciphey/issues/48
 16 | #[test]
 17 | #[parallel]
 18 | fn test_it_works() {
 19 |     // It will panic if it doesn't work!
 20 |     // Plaintext is `Mutley, you snickering, floppy eared hound. When courage is needed, you’re never around. Those m...	`
 21 |     let config = Config::default();
 22 |     perform_cracking("TXV0bGV5LCB5b3Ugc25pY2tlcmluZywgZmxvcHB5IGVhcmVkIGhvdW5kLiBXaGVuIGNvdXJhZ2UgaXMgbmVlZGVkLCB5b3XigJlyZSBuZXZlciBhcm91bmQuIFRob3NlIG1lZGFscyB5b3Ugd2VhciBvbiB5b3VyIG1vdGgtZWF0ZW4gY2hlc3Qgc2hvdWxkIGJlIHRoZXJlIGZvciBidW5nbGluZyBhdCB3aGljaCB5b3UgYXJlIGJlc3QuIFNvLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLCBzdG9wIHRoYXQgcGlnZW9uLiBIb3d3d3chIE5hYiBoaW0sIGphYiBoaW0sIHRhYiBoaW0sIGdyYWIgaGltLCBzdG9wIHRoYXQgcGlnZW9uIG5vdy4g", config);
 23 |     assert_eq!(true, true);
 24 | }
 25 | 
 26 | #[test]
 27 | #[parallel]
 28 | fn test_no_panic_if_empty_string() {
 29 |     // It will panic if it doesn't work!
 30 |     let config = Config::default();
 31 |     perform_cracking("", config);
 32 |     assert_eq!(true, true);
 33 | }
 34 | 
 35 | /*
 36 | #[test]
 37 | fn test_program_parses_files_and_cracks() {
 38 |     // It should be able to open and crack this file
 39 |     let file_path = "tests/test_fixtures/base64_3_times_with_no_new_line";
 40 |     let config = Config::default();
 41 |     let to_crack = read_and_parse_file(file_path.to_string());
 42 |     let result = perform_cracking(&to_crack, config);
 43 |     assert_eq!(true, true);
 44 |     // The base64 string decodes to "VFoW2RHbHdiR1VndXMUdlbHBVV1RCSlIxWjFXVEk1YTJGWE5XNWpkejA5"
 45 |     let result = result.unwrap();
 46 |     assert!(
 47 |         !result.text.is_empty(),
 48 |         "Decoding should produce some result"
 49 |     );
 50 | }
 51 | */
 52 | /*
 53 | #[test]
 54 | #[ignore]
 55 | fn test_program_parses_files_with_new_line_and_cracks() {
 56 |     // It should be able to open and crack this file
 57 |     let file_path = "tests/test_fixtures/rot13_base64_hex_with_newline";
 58 |     let config = Config::default();
 59 |     let to_crack = read_and_parse_file(file_path.to_string());
 60 |     let result = perform_cracking(&to_crack, config);
 61 |     assert_eq!(true, true);
 62 |     assert!(result.unwrap().text[0] == "This is a test!");
 63 | }
 64 | */
 65 | 
 66 | #[test]
 67 | #[serial]
 68 | fn test_cache_miss_simple_base64() {
 69 |     let _test_db = TestDatabase::default();
 70 |     set_test_db_path();
 71 | 
 72 |     let encoded_text_1 = String::from("aGVsbG8gd29ybGQK");
 73 |     let decoded_text_1 = String::from("hello world\n");
 74 | 
 75 |     let config = Config::default();
 76 |     let result = perform_cracking(encoded_text_1.as_str(), config);
 77 |     assert!(result.is_some());
 78 |     assert!(result.unwrap().path.last().unwrap().success);
 79 | 
 80 |     let row_result = database::read_cache(&encoded_text_1);
 81 |     assert!(row_result.is_ok());
 82 |     let row_result = row_result.unwrap();
 83 |     assert!(row_result.is_some());
 84 | 
 85 |     let row: database::CacheRow = row_result.unwrap();
 86 | 
 87 |     let base64_decoder = Decoder::<Base64Decoder>::new();
 88 |     let mut expected_crack_result: CrackResult =
 89 |         CrackResult::new(&base64_decoder, encoded_text_1.clone());
 90 |     expected_crack_result.unencrypted_text = Some(vec![decoded_text_1.clone()]);
 91 |     let expected_checker = Checker::<EnglishChecker>::new();
 92 |     let mut expected_check_result = CheckResult::new(&expected_checker);
 93 |     expected_check_result.is_identified = true;
 94 |     expected_crack_result.update_checker(&expected_check_result);
 95 |     let expected_path = vec![expected_crack_result.get_json().unwrap()];
 96 | 
 97 |     assert_eq!(row.encoded_text, encoded_text_1);
 98 |     assert_eq!(row.decoded_text, decoded_text_1);
 99 |     assert_eq!(row.path, expected_path);
100 |     assert!(row.successful);
101 | }
102 | 
103 | #[test]
104 | #[serial]
105 | fn test_cache_hit_simple_base64() {
106 |     let _test_db = TestDatabase::default();
107 |     set_test_db_path();
108 | 
109 |     let encoded_text_1 = String::from("aGVsbG8gd29ybGQK");
110 |     let decoded_text_1 = String::from("hello world\n");
111 | 
112 |     let base64_decoder = Decoder::<Base64Decoder>::new();
113 |     let mut expected_crack_result: CrackResult =
114 |         CrackResult::new(&base64_decoder, encoded_text_1.clone());
115 |     expected_crack_result.unencrypted_text = Some(vec![decoded_text_1.clone()]);
116 |     let expected_checker = Checker::<EnglishChecker>::new();
117 |     let mut expected_check_result = CheckResult::new(&expected_checker);
118 |     expected_check_result.is_identified = true;
119 |     expected_crack_result.update_checker(&expected_check_result);
120 |     let expected_path = vec![expected_crack_result.get_json().unwrap()];
121 | 
122 |     let _result = database::insert_cache(&database::CacheEntry {
123 |         uuid: Uuid::new_v4(),
124 |         encoded_text: encoded_text_1.clone(),
125 |         decoded_text: decoded_text_1.clone(),
126 |         path: vec![expected_crack_result],
127 |         execution_time_ms: 100,
128 |     });
129 | 
130 |     let config = Config::default();
131 |     let result = perform_cracking(encoded_text_1.as_str(), config);
132 |     assert!(result.is_some());
133 |     assert!(result.unwrap().path.last().unwrap().success);
134 | 
135 |     let row_result = database::read_cache(&encoded_text_1);
136 |     assert!(row_result.is_ok());
137 |     let row_result = row_result.unwrap();
138 |     assert!(row_result.is_some());
139 | 
140 |     let row: database::CacheRow = row_result.unwrap();
141 |     assert_eq!(row.encoded_text, encoded_text_1);
142 |     assert_eq!(row.decoded_text, decoded_text_1);
143 |     assert_eq!(row.path, expected_path);
144 |     assert!(row.successful);
145 | }
146 | 


--------------------------------------------------------------------------------
/src/decoders/atbash_decoder.rs:
--------------------------------------------------------------------------------
  1 | use crate::checkers::CheckerTypes;
  2 | use crate::decoders::interface::check_string_success;
  3 | 
  4 | use super::crack_results::CrackResult;
  5 | use super::interface::Crack;
  6 | use super::interface::Decoder;
  7 | 
  8 | use log::{info, trace};
  9 | 
 10 | /// Atbash Decoder
 11 | pub struct AtbashDecoder;
 12 | 
 13 | impl Crack for Decoder<AtbashDecoder> {
 14 |     fn new() -> Decoder<AtbashDecoder> {
 15 |         Decoder {
 16 |             name: "atbash",
 17 |             description: "Atbash is a monoalphabetic substitution cipher originally used to encrypt the Hebrew alphabet. It can be modified for use with any known writing system with a standard collating order.",
 18 |             link: "https://en.wikipedia.org/wiki/Atbash",
 19 |             tags: vec!["atbash", "substitution", "decoder", "reciprocal"],
 20 |             popularity: 0.6,
 21 |             phantom: std::marker::PhantomData,
 22 |         }
 23 |     }
 24 | 
 25 |     /// This function does the actual decoding
 26 |     /// It returns an Option<string> if it was successful
 27 |     /// Else the Option returns nothing and the error is logged in Trace
 28 |     fn crack(&self, text: &str, checker: &CheckerTypes) -> CrackResult {
 29 |         trace!("Trying atbash with text {:?}", text);
 30 |         let decoded_text = atbash_to_alphabet(text);
 31 | 
 32 |         trace!("Decoded text for atbash: {:?}", decoded_text);
 33 |         let mut results = CrackResult::new(self, text.to_string());
 34 | 
 35 |         if !check_string_success(&decoded_text, text) {
 36 |             info!(
 37 |                 "Failed to decode atbash because check_string_success returned false on string {}",
 38 |                 decoded_text
 39 |             );
 40 |             return results;
 41 |         }
 42 | 
 43 |         let checker_result = checker.check(&decoded_text);
 44 |         results.unencrypted_text = Some(vec![decoded_text]);
 45 | 
 46 |         results.update_checker(&checker_result);
 47 | 
 48 |         results
 49 |     }
 50 |     /// Gets all tags for this decoder
 51 |     fn get_tags(&self) -> &Vec<&str> {
 52 |         &self.tags
 53 |     }
 54 |     /// Gets the name for the current decoder
 55 |     fn get_name(&self) -> &str {
 56 |         self.name
 57 |     }
 58 | 
 59 |     /// Gets the description for the current decoder
 60 |     fn get_description(&self) -> &str {
 61 |         self.description
 62 |     }
 63 | 
 64 |     /// Gets the link for the current decoder
 65 |     fn get_link(&self) -> &str {
 66 |         self.link
 67 |     }
 68 | }
 69 | 
 70 | /// Maps atbash to the alphabet
 71 | fn atbash_to_alphabet(text: &str) -> String {
 72 |     text.chars()
 73 |         .map(|char| match char {
 74 |             letter @ 'a'..='z' => (b'a' + b'z' - letter as u8) as char,
 75 |             letter @ 'A'..='Z' => (b'A' + b'Z' - letter as u8) as char,
 76 |             other => other,
 77 |         })
 78 |         .collect()
 79 | }
 80 | 
 81 | #[cfg(test)]
 82 | mod tests {
 83 |     use super::AtbashDecoder;
 84 |     use crate::{
 85 |         checkers::{
 86 |             athena::Athena,
 87 |             checker_type::{Check, Checker},
 88 |             CheckerTypes,
 89 |         },
 90 |         decoders::interface::{Crack, Decoder},
 91 |     };
 92 | 
 93 |     // helper for tests
 94 |     fn get_athena_checker() -> CheckerTypes {
 95 |         let athena_checker = Checker::<Athena>::new();
 96 |         CheckerTypes::CheckAthena(athena_checker)
 97 |     }
 98 | 
 99 |     #[test]
100 |     fn test_atbash() {
101 |         let decoder = Decoder::<AtbashDecoder>::new();
102 |         let result = decoder.crack("svool dliow", &get_athena_checker());
103 |         assert_eq!(result.unencrypted_text.unwrap()[0], "hello world");
104 |     }
105 | 
106 |     #[test]
107 |     fn test_atbash_capitalization() {
108 |         let decoder = Decoder::<AtbashDecoder>::new();
109 |         let result = decoder.crack(
110 |             "Zgyzhs Hslfow Pvvk Xzkrgzorazgrlm orpv GSRH",
111 |             &get_athena_checker(),
112 |         );
113 |         assert_eq!(
114 |             result.unencrypted_text.unwrap()[0],
115 |             "Atbash Should Keep Capitalization like THIS"
116 |         );
117 |     }
118 | 
119 |     #[test]
120 |     fn test_atbash_non_alphabetic_characters() {
121 |         let decoder = Decoder::<AtbashDecoder>::new();
122 |         let result = decoder.crack(
123 |             "Zgyzhs hslfow ovzev xszizxgvih orpv gsvhv: ',.39=_#%^ rmgzxg zugvi wvxlwrmt!",
124 |             &get_athena_checker(),
125 |         );
126 |         assert_eq!(
127 |             result.unencrypted_text.unwrap()[0],
128 |             "Atbash should leave characters like these: ',.39=_#%^ intact after decoding!"
129 |         );
130 |     }
131 | 
132 |     #[test]
133 |     fn atbash_decode_empty_string() {
134 |         // Atbash returns an empty string, this is a valid atbash string
135 |         // but returns False on check_string_success
136 |         let atbash_decoder = Decoder::<AtbashDecoder>::new();
137 |         let result = atbash_decoder
138 |             .crack("", &get_athena_checker())
139 |             .unencrypted_text;
140 |         assert!(result.is_none());
141 |     }
142 | 
143 |     #[test]
144 |     fn atbash_decode_handles_panics() {
145 |         let atbash_decoder = Decoder::<AtbashDecoder>::new();
146 |         let result = atbash_decoder
147 |             .crack("583920482058430191", &get_athena_checker())
148 |             .unencrypted_text;
149 |         assert!(result.is_none());
150 |     }
151 | 
152 |     #[test]
153 |     fn atbash_handle_panic_if_empty_string() {
154 |         let atbash_decoder = Decoder::<AtbashDecoder>::new();
155 |         let result = atbash_decoder
156 |             .crack("", &get_athena_checker())
157 |             .unencrypted_text;
158 |         assert!(result.is_none());
159 |     }
160 | 
161 |     #[test]
162 |     fn atbash_work_if_string_not_atbash() {
163 |         let atbash_decoder = Decoder::<AtbashDecoder>::new();
164 |         let result = atbash_decoder
165 |             .crack("hello good day!", &get_athena_checker())
166 |             .unencrypted_text;
167 |         assert!(result.is_some());
168 |     }
169 | 
170 |     #[test]
171 |     fn atbash_handle_panic_if_emoji() {
172 |         let atbash_decoder = Decoder::<AtbashDecoder>::new();
173 |         let result = atbash_decoder
174 |             .crack("😂", &get_athena_checker())
175 |             .unencrypted_text;
176 |         assert!(result.is_none());
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/searchers/bfs.rs:
--------------------------------------------------------------------------------
  1 | use crate::cli_pretty_printing::decoded_how_many_times;
  2 | use crate::filtration_system::MyResults;
  3 | use crossbeam::channel::Sender;
  4 | 
  5 | use log::trace;
  6 | use std::collections::HashSet;
  7 | use std::sync::atomic::AtomicBool;
  8 | use std::sync::Arc;
  9 | 
 10 | use crate::DecoderResult;
 11 | 
 12 | /// Breadth first search is our search algorithm
 13 | /// https://en.wikipedia.org/wiki/Breadth-first_search
 14 | #[allow(dead_code)]
 15 | pub fn bfs(input: String, result_sender: Sender<Option<DecoderResult>>, stop: Arc<AtomicBool>) {
 16 |     let initial = DecoderResult {
 17 |         text: vec![input],
 18 |         path: vec![],
 19 |     };
 20 |     let mut seen_strings = HashSet::new();
 21 |     // all strings to search through
 22 |     let mut current_strings = vec![initial];
 23 | 
 24 |     let mut curr_depth: u32 = 1; // as we have input string, so we start from 1
 25 | 
 26 |     // loop through all of the strings in the vec
 27 |     while !current_strings.is_empty() && !stop.load(std::sync::atomic::Ordering::Relaxed) {
 28 |         trace!("Number of potential decodings: {}", current_strings.len());
 29 |         trace!("Current depth is {:?}", curr_depth);
 30 | 
 31 |         let mut new_strings: Vec<DecoderResult> = vec![];
 32 | 
 33 |         current_strings.into_iter().try_for_each(|current_string| {
 34 |             let res = super::perform_decoding(&current_string);
 35 | 
 36 |             match res {
 37 |                 // if it's Break variant, we have cracked the text successfully
 38 |                 // so just stop processing further.
 39 |                 MyResults::Break(res) => {
 40 |                     let mut decoders_used = current_string.path;
 41 |                     let text = res.unencrypted_text.clone().unwrap_or_default();
 42 |                     decoders_used.push(res);
 43 |                     let result_text = DecoderResult {
 44 |                         text,
 45 |                         path: decoders_used,
 46 |                     };
 47 | 
 48 |                     decoded_how_many_times(curr_depth);
 49 |                     result_sender
 50 |                         .send(Some(result_text))
 51 |                         .expect("Should succesfully send the result");
 52 | 
 53 |                     // stop further iterations
 54 |                     stop.store(true, std::sync::atomic::Ordering::Relaxed);
 55 |                     None // short-circuits the iterator
 56 |                 }
 57 |                 MyResults::Continue(results_vec) => {
 58 |                     new_strings.extend(results_vec.into_iter().flat_map(|mut r| {
 59 |                         let mut decoders_used = current_string.path.clone();
 60 |                         // text is a vector of strings
 61 |                         let mut text = r.unencrypted_text.take().unwrap_or_default();
 62 | 
 63 |                         text.retain(|s| {
 64 |                             !check_if_string_cant_be_decoded(s) && seen_strings.insert(s.clone())
 65 |                         });
 66 | 
 67 |                         if text.is_empty() {
 68 |                             return None;
 69 |                         }
 70 | 
 71 |                         decoders_used.push(r);
 72 |                         Some(DecoderResult {
 73 |                             // and this is a vector of strings
 74 |                             // TODO we should probably loop through all `text` and create Text structs for each one
 75 |                             // and append those structs
 76 |                             // I think we should keep text as a single string
 77 |                             // and just create more of them....
 78 |                             text,
 79 |                             path: decoders_used.to_vec(),
 80 |                         })
 81 |                     }));
 82 |                     Some(()) // indicate we want to continue processing
 83 |                 }
 84 |             }
 85 |         });
 86 | 
 87 |         current_strings = new_strings;
 88 |         curr_depth += 1;
 89 | 
 90 |         trace!("Refreshed the vector, {:?}", current_strings);
 91 |     }
 92 |     result_sender.try_send(None).ok();
 93 | }
 94 | 
 95 | /// If this returns False it will not attempt to decode that string
 96 | #[allow(dead_code)]
 97 | fn check_if_string_cant_be_decoded(text: &str) -> bool {
 98 |     text.len() <= 2
 99 | }
100 | 
101 | #[cfg(test)]
102 | mod tests {
103 |     use crossbeam::channel::bounded;
104 | 
105 |     use super::*;
106 | 
107 |     #[test]
108 |     fn bfs_succeeds() {
109 |         // this will work after english checker can identify "CANARY: hello"
110 |         let (tx, rx) = bounded::<Option<DecoderResult>>(1);
111 |         let stopper = Arc::new(AtomicBool::new(false));
112 |         bfs("b2xsZWg=".into(), tx, stopper);
113 |         let result = rx.recv().unwrap();
114 |         assert!(result.is_some());
115 |     }
116 | 
117 |     // Vector storing the strings to perform decoding in next iteraion
118 |     // had strings only from result of last decoding it performed.
119 |     // This was due to reassignment in try_for_each block
120 |     // which lead to unintended behaviour.
121 |     // We want strings from all results, so to fix it,
122 |     // we call .extend() to extend the vector.
123 |     // Link to forum https://discord.com/channels/754001738184392704/1002135076034859068
124 |     // This also tests the bug whereby each iteration of caesar was not passed to the next decoder
125 |     // So in Ciphey only Rot1(X) was passed to base64, not Rot13(X)
126 |     #[test]
127 |     fn non_deterministic_like_behaviour_regression_test() {
128 |         // Caesar Cipher (Rot13) -> Base64
129 |         let (tx, rx) = bounded::<Option<DecoderResult>>(1);
130 |         let stopper = Arc::new(AtomicBool::new(false));
131 |         bfs("MTkyLjE2OC4wLjE=".into(), tx, stopper);
132 |         let result = rx.recv().unwrap();
133 |         assert!(result.is_some());
134 |         assert_eq!(result.unwrap().text[0], "192.168.0.1");
135 |     }
136 | 
137 |     #[test]
138 |     fn string_size_checker_returns_bad_if_string_cant_be_decoded() {
139 |         // Should return true because it cant decode it
140 |         let text = "12";
141 |         assert!(check_if_string_cant_be_decoded(text));
142 |     }
143 | 
144 |     #[test]
145 |     fn string_size_checker_returns_ok_if_string_can_be_decoded() {
146 |         // Should return true because it cant decode it
147 |         let text = "123";
148 |         assert!(!check_if_string_cant_be_decoded(text));
149 |     }
150 | }
151 | 


--------------------------------------------------------------------------------
/docs/first_run_implementation_plan.md:
--------------------------------------------------------------------------------
  1 | # ciphey First-Run Configuration Experience Implementation Plan
  2 | 
  3 | ## Overview
  4 | 
  5 | When a user runs ciphey for the first time (detected by the absence of a config file), the program will display a TUI (Text User Interface) that:
  6 | 1. Welcomes the user
  7 | 2. Asks if they want a custom color scheme
  8 | 3. Offers predefined color schemes (Capptucin, Darcula, Default) or a custom option
  9 | 4. Updates the config file with the user's choices
 10 | 
 11 | ## Implementation Plan
 12 | 
 13 | ### 1. Add Required Dependencies
 14 | 
 15 | Add the following dependencies to `Cargo.toml`:
 16 | - `ratatui`: For creating the TUI
 17 | - `crossterm`: For terminal handling (required by Ratatui)
 18 | 
 19 | ```toml
 20 | [dependencies]
 21 | # Existing dependencies...
 22 | ratatui = "0.26.1"
 23 | crossterm = "0.27.0"
 24 | ```
 25 | 
 26 | ### 2. Create First-Run Module
 27 | 
 28 | Create a new file `first_run.rs` in the `src/cli` directory that will contain the TUI implementation for the first-run experience.
 29 | 
 30 | ### 3. Update Config Module
 31 | 
 32 | Modify the `get_config_file_into_struct()` function in `src/config/mod.rs` to call the first-run function when the config file doesn't exist.
 33 | 
 34 | ### 4. Implement Color Scheme Definitions
 35 | 
 36 | Define the color schemes mentioned in the requirements:
 37 | - Capptucin
 38 | - Darcula
 39 | - Default (already implemented)
 40 | 
 41 | ### 5. Implement First-Run TUI
 42 | 
 43 | Implement the TUI with the following components:
 44 | - Welcome screen
 45 | - Color scheme selection
 46 | - Custom RGB input (if selected)
 47 | 
 48 | ## Detailed Implementation Steps
 49 | 
 50 | ### 1. First-Run Module Structure
 51 | 
 52 | ```rust
 53 | // src/cli/first_run.rs
 54 | pub struct ColorScheme {
 55 |     informational: String, // RGB format "r,g,b"
 56 |     warning: String,
 57 |     success: String,
 58 |     question: String,
 59 |     statement: String,
 60 | }
 61 | 
 62 | pub enum PredefinedColorScheme {
 63 |     Capptucin,
 64 |     Darcula,
 65 |     Default,
 66 |     Custom,
 67 | }
 68 | 
 69 | pub fn run_first_time_setup() -> std::collections::HashMap<String, String> {
 70 |     // TUI implementation
 71 | }
 72 | 
 73 | fn get_capptucin_scheme() -> ColorScheme {
 74 |     // Return Capptucin color scheme
 75 | }
 76 | 
 77 | fn get_darcula_scheme() -> ColorScheme {
 78 |     // Return Darcula color scheme
 79 | }
 80 | 
 81 | fn get_default_scheme() -> ColorScheme {
 82 |     // Return Default color scheme
 83 | }
 84 | 
 85 | fn get_custom_scheme() -> ColorScheme {
 86 |     // Prompt user for custom RGB values
 87 | }
 88 | 
 89 | fn color_scheme_to_hashmap(scheme: ColorScheme) -> std::collections::HashMap<String, String> {
 90 |     // Convert ColorScheme to HashMap for Config
 91 | }
 92 | ```
 93 | 
 94 | ### 2. Config Module Updates
 95 | 
 96 | ```rust
 97 | // Modify in src/config/mod.rs
 98 | pub fn get_config_file_into_struct() -> Config {
 99 |     let path = get_config_file_path();
100 |     if !path.exists() {
101 |         // This is the first run, show the TUI
102 |         let colors = crate::cli::first_run::run_first_time_setup();
103 |         
104 |         // Create a default config with the selected colors
105 |         let mut config = Config::default();
106 |         config.colourscheme = colors;
107 |         
108 |         // Save the config to file
109 |         let toml_string = toml::to_string_pretty(&config).expect("Could not serialize config");
110 |         let mut file = File::create(&path).expect("Could not create config file");
111 |         file.write_all(toml_string.as_bytes()).expect("Could not write to config file");
112 |         
113 |         return config;
114 |     }
115 | 
116 |     // Existing code for reading config file...
117 | }
118 | ```
119 | 
120 | ### 3. CLI Module Updates
121 | 
122 | Update `mod.rs` to include the new first_run module:
123 | 
124 | ```rust
125 | // src/cli/mod.rs
126 | mod first_run;
127 | pub use first_run::run_first_time_setup;
128 | ```
129 | 
130 | ## Flow Diagram
131 | 
132 | ```mermaid
133 | flowchart TD
134 |     A[Program Start] --> B{Config File Exists?}
135 |     B -->|No| C[Launch First-Run TUI]
136 |     C --> D[Display Welcome Message]
137 |     D --> E{Want Custom Colors?}
138 |     E -->|No| F[Use Default Colors]
139 |     E -->|Yes| G[Show Color Scheme Options]
140 |     G --> H{Choose Scheme}
141 |     H -->|Capptucin| I[Use Capptucin Colors]
142 |     H -->|Darcula| J[Use Darcula Colors]
143 |     H -->|Default| K[Use Default Colors]
144 |     H -->|Custom| L[Prompt for RGB Values]
145 |     I --> M[Create Config File]
146 |     J --> M
147 |     K --> M
148 |     L --> M
149 |     F --> M
150 |     M --> N[Continue Program]
151 |     B -->|Yes| N
152 | ```
153 | 
154 | ## TUI Design
155 | 
156 | The TUI will have the following screens:
157 | 
158 | 1. **Welcome Screen**:
159 |    ```
160 |    🤠 Howdy! This is your first time running ciphey.
161 |    
162 |    I need to ask you some questions to make it work better for you.
163 |    
164 |    Do you want a custom colour scheme? (y/N)
165 |    ```
166 | 
167 | 2. **Color Scheme Selection Screen** (if user wants custom colors):
168 |    ```
169 |    What colour scheme looks best to you?
170 |    PS: Please think about how this will look with your Terminal background 🙈
171 |    
172 |    1. Capptucin
173 |    Informational Alert Success Questions Statements
174 |    
175 |    2. Darcula
176 |    Informational Alert Success Questions Statements
177 |    
178 |    3. Default
179 |    Informational Alert Success Questions Statements
180 |    
181 |    4. Custom
182 |    ```
183 | 
184 | 3. **Custom RGB Input Screen** (if user selects Custom):
185 |    ```
186 |    Enter 5 RGB values, space seperated. An example is 255,0,0 0,255,0.
187 |    Informational Alert Success Questions Statements
188 |    ```
189 | 
190 | ## Color Scheme Definitions
191 | 
192 | 1. **Capptucin**:
193 |    - Informational: rgb(238, 212, 159)
194 |    - Alert/Warning: rgb(237, 135, 150)
195 |    - Success: rgb(166, 218, 149)
196 |    - Questions: rgb(244, 219, 214)
197 |    - Statements: rgb(202, 211, 245)
198 | 
199 | 2. **Darcula**:
200 |    - Informational: rgb(241, 250, 140)
201 |    - Alert/Warning: rgb(255, 85, 85)
202 |    - Success: rgb(80, 250, 123)
203 |    - Questions: rgb(139, 233, 253)
204 |    - Statements: rgb(248, 248, 242)
205 | 
206 | 3. **Default**: The existing default color scheme in the Config struct.
207 | 
208 | ## Testing Plan
209 | 
210 | 1. Test the TUI interface with different terminal sizes
211 | 2. Test color scheme selection and application
212 | 3. Test custom RGB input validation
213 | 4. Test the complete flow from first run to using the configuration


--------------------------------------------------------------------------------
/docs/parallel_astar_search.md:
--------------------------------------------------------------------------------
  1 | # Parallel A* Search Implementation
  2 | 
  3 | ## Overview
  4 | 
  5 | This document describes the implementation of parallel node expansion in the A* search algorithm in Ares. This enhancement significantly improves performance by processing multiple nodes simultaneously, taking advantage of multi-core processors.
  6 | 
  7 | ## Background
  8 | 
  9 | The A* search algorithm is a best-first search algorithm that uses a heuristic function to prioritize which paths to explore. In the context of Ares, the A* algorithm is used to find the correct sequence of decoders to decode encrypted or encoded text.
 10 | 
 11 | In the original implementation, nodes were processed one at a time, which could be inefficient on modern multi-core systems. The parallel implementation expands multiple nodes simultaneously, significantly improving performance.
 12 | 
 13 | ## Implementation Details
 14 | 
 15 | ### Thread-Safe Data Structures
 16 | 
 17 | The parallel implementation uses thread-safe data structures to ensure correctness:
 18 | 
 19 | 1. **Thread-Safe Priority Queue**:
 20 |    ```rust
 21 |    struct ThreadSafePriorityQueue {
 22 |        queue: Mutex<BinaryHeap<AStarNode>>,
 23 |    }
 24 |    ```
 25 |    This wrapper around `BinaryHeap` ensures thread-safe access to the priority queue.
 26 | 
 27 | 2. **Concurrent Hash Set**:
 28 |    ```rust
 29 |    let seen_strings = DashSet::new();
 30 |    ```
 31 |    `DashSet` from the `dashmap` crate provides a thread-safe hash set for tracking visited states.
 32 | 
 33 | 3. **Atomic Counters**:
 34 |    ```rust
 35 |    let curr_depth = Arc::new(AtomicU32::new(1));
 36 |    let seen_count = Arc::new(AtomicUsize::new(0));
 37 |    ```
 38 |    Atomic counters ensure thread-safe updates to shared counters.
 39 | 
 40 | ### Batch Processing
 41 | 
 42 | The core of the parallel implementation is batch processing of nodes:
 43 | 
 44 | ```rust
 45 | // Extract a batch of nodes to process in parallel
 46 | let batch_size = std::cmp::min(PARALLEL_BATCH_SIZE, open_set.len());
 47 | let batch = open_set.extract_batch(batch_size);
 48 | 
 49 | // Process nodes in parallel
 50 | let new_nodes: Vec<AStarNode> = batch.par_iter()
 51 |     .flat_map(|node| {
 52 |         expand_node(
 53 |             node, 
 54 |             &seen_strings, 
 55 |             &stop, 
 56 |             prune_threshold.load(AtomicOrdering::Relaxed)
 57 |         )
 58 |     })
 59 |     .collect();
 60 | ```
 61 | 
 62 | This code extracts a batch of nodes from the priority queue and processes them in parallel using Rayon's parallel iterator.
 63 | 
 64 | ### Node Expansion
 65 | 
 66 | Node expansion is performed by a separate function that takes a node and returns a vector of new nodes:
 67 | 
 68 | ```rust
 69 | fn expand_node(
 70 |     current_node: &AStarNode,
 71 |     seen_strings: &DashSet<String>,
 72 |     stop: &Arc<AtomicBool>,
 73 |     prune_threshold: usize,
 74 | ) -> Vec<AStarNode> {
 75 |     // Node expansion logic...
 76 | }
 77 | ```
 78 | 
 79 | This function encapsulates the logic for expanding a node, making it easier to parallelize.
 80 | 
 81 | ### Result Handling
 82 | 
 83 | To handle successful decoding results in a thread-safe manner, the implementation uses special "result" nodes:
 84 | 
 85 | ```rust
 86 | // Create a special "result" node with a very low total_cost to ensure it's processed first
 87 | let result_node = AStarNode {
 88 |     state: DecoderResult {
 89 |         text: text.clone(),
 90 |         path: decoders_used,
 91 |     },
 92 |     cost: current_node.cost + 1,
 93 |     heuristic: -1000.0, // Very negative to ensure highest priority
 94 |     total_cost: -1000.0, // Very negative to ensure highest priority
 95 |     next_decoder_name: Some("__RESULT__".to_string()), // Special marker
 96 | };
 97 | ```
 98 | 
 99 | These nodes are identified by a special marker in the `next_decoder_name` field and are given very high priority (negative cost) to ensure they're processed immediately.
100 | 
101 | ## Performance Considerations
102 | 
103 | ### Batch Size
104 | 
105 | The batch size determines how many nodes are processed in parallel. It can be tuned based on the system's capabilities:
106 | 
107 | ```rust
108 | const PARALLEL_BATCH_SIZE: usize = 10;
109 | ```
110 | 
111 | For systems with more cores, increasing this value may improve performance.
112 | 
113 | ### Memory Usage
114 | 
115 | The parallel implementation may use more memory due to multiple nodes being processed simultaneously. The implementation includes pruning mechanisms to manage memory usage:
116 | 
117 | ```rust
118 | // Prune seen strings if we've accumulated too many
119 | let current_seen_count = seen_strings.len();
120 | if current_seen_count > prune_threshold.load(AtomicOrdering::Relaxed) {
121 |     // Pruning logic...
122 | }
123 | ```
124 | 
125 | ### Load Balancing
126 | 
127 | Rayon handles load balancing automatically, distributing work evenly across available threads.
128 | 
129 | ## Benefits
130 | 
131 | The parallel A* search implementation offers several benefits:
132 | 
133 | 1. **Improved Performance**: By processing multiple nodes in parallel, the algorithm can take full advantage of multi-core processors.
134 | 
135 | 2. **Scalability**: The implementation scales with the number of available cores, providing better performance on more powerful systems.
136 | 
137 | 3. **Responsiveness**: The algorithm can continue making progress even if some nodes take longer to process.
138 | 
139 | 4. **Efficient Resource Utilization**: The implementation makes better use of available computing resources.
140 | 
141 | ## Limitations and Future Work
142 | 
143 | While the parallel implementation significantly improves performance, there are some limitations and areas for future work:
144 | 
145 | 1. **Lock Contention**: The thread-safe priority queue uses a mutex, which can become a bottleneck if contention is high. A lock-free priority queue could further improve performance.
146 | 
147 | 2. **Memory Overhead**: The parallel implementation has higher memory overhead due to batch processing. More sophisticated pruning strategies could help manage memory usage.
148 | 
149 | 3. **Dynamic Batch Sizing**: The current implementation uses a fixed batch size. Dynamic batch sizing based on system load and queue size could improve efficiency.
150 | 
151 | 4. **Heuristic Improvements**: The heuristic function could be further improved to better guide the search, potentially reducing the number of nodes that need to be expanded.
152 | 
153 | ## Conclusion
154 | 
155 | The parallel A* search implementation significantly improves the performance of the Ares decoder by taking advantage of multi-core processors. By processing multiple nodes in parallel, the algorithm can explore the search space more efficiently, leading to faster decoding times. 


--------------------------------------------------------------------------------
/docs/parallelization.md:
--------------------------------------------------------------------------------
  1 | # Parallelization in ciphey
  2 | 
  3 | This document describes how parallelization is implemented in the ciphey project, with a focus on the decoder execution system and its relationship to search algorithms.
  4 | 
  5 | ## Overview
  6 | 
  7 | ciphey uses the [Rayon](https://github.com/rayon-rs/rayon) library to implement data parallelism for computationally intensive operations. Rayon provides a simple API for converting sequential iterators into parallel ones, making it straightforward to parallelize operations across multiple CPU cores.
  8 | 
  9 | ## Decoder Parallelization
 10 | 
 11 | ### Implementation
 12 | 
 13 | The primary parallelization in ciphey occurs in the filtration system, specifically in the `run` method of the `Decoders` struct in `src/filtration_system/mod.rs`:
 14 | 
 15 | ```rust
 16 | pub fn run(&self, text: &str, checker: CheckerTypes) -> MyResults {
 17 |     trace!("Running .crack() on all decoders");
 18 |     let (sender, receiver) = channel();
 19 |     self.components
 20 |         .into_par_iter()
 21 |         .try_for_each_with(sender, |s, i| {
 22 |             let results = i.crack(text, &checker);
 23 |             if results.success {
 24 |                 s.send(results).expect("expected no send error!");
 25 |                 // returning None short-circuits the iterator
 26 |                 // we don't process any further as we got success
 27 |                 return None;
 28 |             }
 29 |             s.send(results).expect("expected no send error!");
 30 |             // return Some(()) to indicate that continue processing
 31 |             Some(())
 32 |         });
 33 | 
 34 |     let mut all_results: Vec<CrackResult> = Vec::new();
 35 | 
 36 |     while let Ok(result) = receiver.recv() {
 37 |         // if we recv success, break.
 38 |         if result.success {
 39 |             return MyResults::Break(result);
 40 |         }
 41 |         all_results.push(result)
 42 |     }
 43 | 
 44 |     MyResults::Continue(all_results)
 45 | }
 46 | ```
 47 | 
 48 | ### Key Components
 49 | 
 50 | 1. **Parallel Iterator**: The `into_par_iter()` method converts the sequential iterator over decoders into a parallel one, allowing multiple decoders to be executed concurrently.
 51 | 
 52 | 2. **Channel-based Communication**: A channel (`sender`, `receiver`) is used to collect results from parallel decoder executions.
 53 | 
 54 | 3. **Early Termination**: The `try_for_each_with` method allows for early termination of the parallel iteration when a successful decoding is found, using the `None` return value to short-circuit the iterator.
 55 | 
 56 | 4. **Result Collection**: Results are collected from the channel and either returned immediately (on success) or aggregated into a vector for further processing.
 57 | 
 58 | ## CPU Utilization and Performance Considerations
 59 | 
 60 | ### Saturation Point
 61 | 
 62 | The decoder execution is typically the most computationally intensive part of the ciphey workflow. By parallelizing this operation, ciphey can effectively utilize multiple CPU cores to speed up the decoding process. However, there is a saturation point beyond which adding more parallelism may not improve performance:
 63 | 
 64 | 1. **CPU Core Utilization**: If all available CPU cores are already fully utilized by the parallel decoder execution, adding additional layers of parallelism (such as processing multiple nodes in parallel in the search algorithm) may not provide significant performance benefits.
 65 | 
 66 | 2. **Overhead**: Each layer of parallelism introduces some overhead for thread management, synchronization, and context switching. If this overhead exceeds the benefits of parallelization, performance may actually degrade.
 67 | 
 68 | 3. **Memory Bandwidth**: In some cases, the limiting factor may be memory bandwidth rather than CPU processing power. Multiple threads competing for memory access can lead to contention and reduced performance.
 69 | 
 70 | ### Amdahl's Law
 71 | 
 72 | Amdahl's Law provides a theoretical limit to the speedup that can be achieved through parallelization:
 73 | 
 74 | ```
 75 | Speedup = 1 / ((1 - P) + P/N)
 76 | ```
 77 | 
 78 | Where:
 79 | - P is the proportion of the program that can be parallelized
 80 | - N is the number of processors
 81 | 
 82 | This means that even if we parallelize the decoder execution perfectly, the overall speedup is limited by the sequential portions of the algorithm.
 83 | 
 84 | ## Relationship to Search Algorithms
 85 | 
 86 | ### A* Search Algorithm
 87 | 
 88 | The A* search algorithm in ciphey (`src/searchers/astar.rs`) uses the parallelized decoder execution system but maintains a sequential approach to node processing:
 89 | 
 90 | 1. **Sequential Node Processing**: Nodes are processed one at a time from the priority queue, in order of their f-score (f = g + h, where g is the cost so far and h is the heuristic value).
 91 | 
 92 | 2. **Parallel Decoder Execution**: For each node, the decoder execution is parallelized as described above.
 93 | 
 94 | 3. **Priority Queue Bottleneck**: The priority queue introduces a sequential bottleneck, as nodes must be processed in order of their f-score to maintain the optimality of the A* algorithm.
 95 | 
 96 | ### Optimization Opportunities
 97 | 
 98 | While the core node processing in A* is inherently sequential, there are still opportunities for optimization:
 99 | 
100 | 1. **Pruning Operations**: The quality scoring and sorting during pruning operations could be parallelized, as these operations are independent of the decoder execution.
101 | 
102 | 2. **Heuristic Calculations**: Heuristic calculations for multiple nodes could potentially be parallelized.
103 | 
104 | 3. **Memory Usage Patterns**: Improving memory usage patterns for better cache utilization could provide performance benefits without adding additional parallelism.
105 | 
106 | 4. **Load Balancing**: Ensuring that the workload is evenly distributed across threads can improve overall performance.
107 | 
108 | ## Conclusion
109 | 
110 | The parallelization of decoder execution in ciphey provides significant performance benefits by utilizing multiple CPU cores. However, there are limits to the benefits of parallelization, and adding additional layers of parallelism may not always improve performance.
111 | 
112 | When optimizing the performance of ciphey, it's important to consider the entire system and identify the true bottlenecks. In some cases, optimizing memory usage, improving algorithms, or reducing overhead may provide better performance improvements than adding more parallelism.
113 | 
114 | ## References
115 | 
116 | - [Rayon Documentation](https://docs.rs/rayon/latest/rayon/)
117 | - [Amdahl's Law](https://en.wikipedia.org/wiki/Amdahl%27s_law)
118 | - [A* Search Algorithm](https://en.wikipedia.org/wiki/A*_search_algorithm)


--------------------------------------------------------------------------------
/src/decoders/url_decoder.rs:
--------------------------------------------------------------------------------
  1 | //! Decode a url encoded string
  2 | //! Performs error handling and returns a string
  3 | //! Call url_decoder.crack to use. It returns option<String> and check with
  4 | //! `result.is_some()` to see if it returned okay.
  5 | 
  6 | use crate::checkers::CheckerTypes;
  7 | use crate::decoders::interface::check_string_success;
  8 | 
  9 | use super::crack_results::CrackResult;
 10 | use super::interface::Crack;
 11 | use super::interface::Decoder;
 12 | 
 13 | use log::{debug, info, trace};
 14 | 
 15 | /// The url decoder, call:
 16 | /// `let url_decoder = Decoder::<URLDecoder>::new()` to create a new instance
 17 | /// And then call:
 18 | /// `result = url_decoder.crack(input)` to decode a url string
 19 | /// The struct generated by new() comes from interface.rs
 20 | /// ```
 21 | /// use ciphey::decoders::url_decoder::{URLDecoder};
 22 | /// use ciphey::decoders::interface::{Crack, Decoder};
 23 | /// use ciphey::checkers::{athena::Athena, CheckerTypes, checker_type::{Check, Checker}};
 24 | ///
 25 | /// let decode_url = Decoder::<URLDecoder>::new();
 26 | /// let athena_checker = Checker::<Athena>::new();
 27 | /// let checker = CheckerTypes::CheckAthena(athena_checker);
 28 | ///
 29 | /// let result = decode_url.crack("This%20is%20an%20example%20of%20a%20URL%20encoded%20string%20%3C%3E%3F%3D%7B%7D%7C", &checker).unencrypted_text;
 30 | /// assert!(result.is_some());
 31 | /// assert_eq!(result.unwrap()[0], "This is an example of a URL encoded string <>?={}|");
 32 | /// ```
 33 | pub struct URLDecoder;
 34 | 
 35 | impl Crack for Decoder<URLDecoder> {
 36 |     fn new() -> Decoder<URLDecoder> {
 37 |         Decoder {
 38 |             name: "URL",
 39 |             description: "URL encoding, officially known as percent-encoding, is a method to encode arbitrary data in a Uniform Resource Identifier (URI) using only the limited US-ASCII characters legal within a URI.",
 40 |             link: "https://en.wikipedia.org/wiki/URL_encoding",
 41 |             tags: vec!["url", "web", "decoder", "base"],
 42 |             popularity: 0.6,
 43 |             phantom: std::marker::PhantomData,
 44 |         }
 45 |     }
 46 | 
 47 |     /// This function does the actual decoding
 48 |     /// It returns an Option<string> if it was successful
 49 |     /// Else the Option returns nothing and the error is logged in Trace
 50 |     fn crack(&self, text: &str, checker: &CheckerTypes) -> CrackResult {
 51 |         trace!("Trying url with text {:?}", text);
 52 |         let decoded_text: Option<String> = decode_url_no_error_handling(text);
 53 | 
 54 |         trace!("Decoded text for url: {:?}", decoded_text);
 55 |         let mut results = CrackResult::new(self, text.to_string());
 56 | 
 57 |         if decoded_text.is_none() {
 58 |             debug!("Failed to decode url because URLDecoder::decode_url_no_error_handling returned None");
 59 |             return results;
 60 |         }
 61 | 
 62 |         let decoded_text = decoded_text.unwrap();
 63 |         if !check_string_success(&decoded_text, text) {
 64 |             info!(
 65 |                 "Failed to decode url because check_string_success returned false on string {}",
 66 |                 decoded_text
 67 |             );
 68 |             return results;
 69 |         }
 70 | 
 71 |         let checker_result = checker.check(&decoded_text);
 72 |         results.unencrypted_text = Some(vec![decoded_text]);
 73 | 
 74 |         results.update_checker(&checker_result);
 75 | 
 76 |         results
 77 |     }
 78 |     /// Gets all tags for this decoder
 79 |     fn get_tags(&self) -> &Vec<&str> {
 80 |         &self.tags
 81 |     }
 82 |     /// Gets the name for the current decoder
 83 |     fn get_name(&self) -> &str {
 84 |         self.name
 85 |     }
 86 |     /// Gets the description for the current decoder
 87 |     fn get_description(&self) -> &str {
 88 |         self.description
 89 |     }
 90 |     /// Gets the link for the current decoder
 91 |     fn get_link(&self) -> &str {
 92 |         self.link
 93 |     }
 94 | }
 95 | 
 96 | /// helper function
 97 | fn decode_url_no_error_handling(text: &str) -> Option<String> {
 98 |     // Runs the code to decode url
 99 |     // Doesn't perform error handling, call from_url
100 |     if let Ok(decoded_text) = urlencoding::decode(text) {
101 |         return Some(decoded_text.into_owned());
102 |     }
103 |     None
104 | }
105 | 
106 | #[cfg(test)]
107 | mod tests {
108 |     use super::URLDecoder;
109 |     use crate::{
110 |         checkers::{
111 |             athena::Athena,
112 |             checker_type::{Check, Checker},
113 |             CheckerTypes,
114 |         },
115 |         decoders::interface::{Crack, Decoder},
116 |     };
117 | 
118 |     // helper for tests
119 |     fn get_athena_checker() -> CheckerTypes {
120 |         let athena_checker = Checker::<Athena>::new();
121 |         CheckerTypes::CheckAthena(athena_checker)
122 |     }
123 | 
124 |     #[test]
125 |     fn url_decodes_successfully() {
126 |         // This tests if URL can decode URL successfully
127 |         let url_decoder = Decoder::<URLDecoder>::new();
128 |         let result = url_decoder.crack(
129 |             "This%20is%20an%20example%20of%20a%20URL%20encoded%20string%20%3C%3E%3F%3D%7B%7D%7C",
130 |             &get_athena_checker(),
131 |         );
132 |         assert_eq!(
133 |             result.unencrypted_text.unwrap()[0],
134 |             "This is an example of a URL encoded string <>?={}|"
135 |         );
136 |     }
137 | 
138 |     #[test]
139 |     fn url_handles_panics() {
140 |         // This tests if URL can handle panics
141 |         // It should return None
142 |         let url_decoder = Decoder::<URLDecoder>::new();
143 |         let result = url_decoder
144 |             .crack(
145 |                 "hello my name is panicky mc panic face!",
146 |                 &get_athena_checker(),
147 |             )
148 |             .unencrypted_text;
149 |         assert!(result.is_none());
150 |     }
151 | 
152 |     #[test]
153 |     fn url_handles_panic_if_empty_string() {
154 |         // This tests if URL can handle an empty string
155 |         // It should return None
156 |         let url_decoder = Decoder::<URLDecoder>::new();
157 |         let result = url_decoder
158 |             .crack("", &get_athena_checker())
159 |             .unencrypted_text;
160 |         assert!(result.is_none());
161 |     }
162 | 
163 |     #[test]
164 |     fn url_handles_panic_if_emoji() {
165 |         // This tests if URL can handle an emoji
166 |         // It should return None
167 |         let url_decoder = Decoder::<URLDecoder>::new();
168 |         let result = url_decoder
169 |             .crack("😂", &get_athena_checker())
170 |             .unencrypted_text;
171 |         assert!(result.is_none());
172 |     }
173 | }
174 | 


--------------------------------------------------------------------------------
/.github/build.yml:
--------------------------------------------------------------------------------
  1 | name: CD Pipeline
  2 | 
  3 | on: [push]
  4 | 
  5 | jobs:
  6 |   build-nix:
  7 |     env:
  8 |       IN_PIPELINE: true
  9 |     runs-on: ${{ matrix.os }}
 10 |     if: github.ref == 'refs/heads/master'
 11 |     strategy:
 12 |       matrix:
 13 |         type: [ubuntu-x64, ubuntu-x86, armv7, aarch64]
 14 |         include:
 15 |           - type: ubuntu-x64
 16 |             os: ubuntu-latest
 17 |             target: x86_64-unknown-linux-musl
 18 |             name: x86_64-linux-ares
 19 |             path: target/x86_64-unknown-linux-musl/release/ares
 20 |             pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
 21 |           - type: ubuntu-x86
 22 |             os: ubuntu-latest
 23 |             target: i686-unknown-linux-musl
 24 |             name: x86-linux-ares
 25 |             path: target/i686-unknown-linux-musl/release/ares
 26 |             pkg_config_path: /usr/lib/i686-linux-gnu/pkgconfig
 27 |           - type: armv7
 28 |             os: ubuntu-latest
 29 |             target: armv7-unknown-linux-gnueabihf
 30 |             name: armv7-linux-ares
 31 |             path: target/armv7-unknown-linux-gnueabihf/release/ares
 32 |             pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
 33 |           - type: aarch64
 34 |             os: ubuntu-latest
 35 |             target: aarch64-unknown-linux-gnu
 36 |             name: aarch64-linux-ares
 37 |             path: target/aarch64-unknown-linux-gnu/release/ares
 38 |             pkg_config_path: /usr/lib/x86_64-linux-gnu/pkgconfig
 39 |     steps:
 40 |       - uses: actions/checkout@v4
 41 |       - name: Cache cargo & target directories
 42 |         uses: Swatinem/rust-cache@v2
 43 |       - name: Build binary
 44 |         uses: houseabsolute/actions-rust-cross@v0
 45 |         with:
 46 |           command: build
 47 |           target: ${{ matrix.target }}
 48 |           args: "--locked --release"
 49 |           strip: true
 50 |           toolchain: stable
 51 |       - name: Build tar.gz for homebrew installs
 52 |         if: matrix.type == 'ubuntu-x64'
 53 |         run: |
 54 |           tar czf ${{ matrix.name }}.tar.gz -C target/x86_64-unknown-linux-musl/release ares
 55 |       - uses: actions/upload-artifact@v4
 56 |         with:
 57 |           name: ${{ matrix.name }}
 58 |           path: ${{ matrix.path }}
 59 |       - uses: actions/upload-artifact@v4
 60 |         if: matrix.type == 'ubuntu-x64'
 61 |         with:
 62 |           name: ${{ matrix.name }}.tar.gz
 63 |           path: ${{ matrix.name }}.tar.gz
 64 |   
 65 |   build-deb:
 66 |     needs: [build-nix]
 67 |     runs-on: ubuntu-latest
 68 |     env:
 69 |       IN_PIPELINE: true
 70 |     steps:
 71 |       - uses: actions/checkout@v4
 72 |       - name: Install cargo-deb
 73 |         run: cargo install -f cargo-deb
 74 |       - uses: awalsh128/cache-apt-pkgs-action@v1
 75 |         with:
 76 |           packages: musl-tools # provides musl-gcc
 77 |           version: 1.0
 78 |       - name: Install musl toolchain
 79 |         run: rustup target add x86_64-unknown-linux-musl
 80 |       - name: Deb Build
 81 |         run: cargo deb --target=x86_64-unknown-linux-musl
 82 |       - name: Upload Deb Artifact
 83 |         uses: actions/upload-artifact@v4
 84 |         with:
 85 |           name: ares.deb
 86 |           path: ./target/x86_64-unknown-linux-musl/debian/*
 87 | 
 88 |   build-macos:
 89 |     env:
 90 |       IN_PIPELINE: true
 91 |     runs-on: macos-latest
 92 |     if: github.ref == 'refs/heads/master'
 93 |     steps:
 94 |       - uses: actions/checkout@v4
 95 |       - name: Cache cargo & target directories
 96 |         uses: Swatinem/rust-cache@v2
 97 |       - name: Build binary
 98 |         uses: houseabsolute/actions-rust-cross@v0
 99 |         with:
100 |           command: build
101 |           target: x86_64-apple-darwin
102 |           args: "--locked --release"
103 |           strip: true
104 |           toolchain: stable
105 |       - name: Build tar.gz for homebrew installs
106 |         run: |
107 |           tar czf x86_64-macos-ares.tar.gz -C target/x86_64-apple-darwin/release ares
108 |       - uses: actions/upload-artifact@v4
109 |         with:
110 |           name: x86_64-macos-ares
111 |           path: target/x86_64-apple-darwin/release/ares
112 |       - uses: actions/upload-artifact@v4
113 |         with:
114 |           name: x86_64-macos-ares.tar.gz
115 |           path: x86_64-macos-ares.tar.gz
116 |   
117 |   build-macos-aarch64:
118 |     env:
119 |       IN_PIPELINE: true
120 |     runs-on: macos-latest
121 |     if: github.ref == 'refs/heads/master'
122 |     steps:
123 |       - uses: actions/checkout@v4
124 |       - name: Cache cargo & target directories
125 |         uses: Swatinem/rust-cache@v2
126 |       - name: Build binary
127 |         uses: houseabsolute/actions-rust-cross@v0
128 |         with:
129 |           command: build
130 |           target: aarch64-apple-darwin
131 |           args: "--locked --release"
132 |           strip: true
133 |           toolchain: stable
134 |       - name: Build tar.gz for homebrew installs
135 |         run: |
136 |           tar czf aarch64-macos-ares.tar.gz -C target/aarch64-apple-darwin/release ares
137 |       - uses: actions/upload-artifact@v4
138 |         with:
139 |           name: aarch64-macos-ares
140 |           path: target/aarch64-apple-darwin/release/ares
141 |       - uses: actions/upload-artifact@v4
142 |         with:
143 |           name: aarch64-macos-ares.tar.gz
144 |           path: aarch64-macos-ares.tar.gz
145 | 
146 |   build-windows:
147 |     env:
148 |       IN_PIPELINE: true
149 |     runs-on: ${{ matrix.os }}
150 |     if: github.ref == 'refs/heads/master'
151 |     strategy:
152 |       matrix:
153 |         type: [windows-x64, windows-x86]
154 |         include:
155 |           - type: windows-x64
156 |             os: windows-latest
157 |             target: x86_64-pc-windows-msvc
158 |             name: x86_64-windows-ares.exe
159 |             path: target\x86_64-pc-windows-msvc\release\ares.exe
160 |           - type: windows-x86
161 |             os: windows-latest
162 |             target: i686-pc-windows-msvc
163 |             name: x86-windows-ares.exe
164 |             path: target\i686-pc-windows-msvc\release\ares.exe
165 |     steps:
166 |       - uses: actions/checkout@v4
167 |       - name: Cache cargo & target directories
168 |         uses: Swatinem/rust-cache@v2
169 |       - name: Build binary
170 |         uses: houseabsolute/actions-rust-cross@v0
171 |         with:
172 |           command: build
173 |           target:  ${{ matrix.target }}
174 |           args: "--locked --release"
175 |           strip: true
176 |           toolchain: stable
177 |       - uses: actions/upload-artifact@v4
178 |         with:
179 |           name: ${{ matrix.name }}
180 |           path: ${{ matrix.path }}
181 | 


--------------------------------------------------------------------------------
/images/main_demo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="840" height="537.62"><rect width="840" height="537.62" rx="5" ry="5" class="a"/><svg y="0%" x="0%"><circle cx="20" cy="20" r="6" fill="#ff5f58"/><circle cx="40" cy="20" r="6" fill="#ffbd2e"/><circle cx="60" cy="20" r="6" fill="#18c132"/></svg><svg height="477.62" viewBox="0 0 80 47.762" width="800" x="15" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" y="50"><style>@keyframes n{0%{transform:translateX(0)}2%{transform:translateX(-160px)}17.3%{transform:translateX(-240px)}31.2%{transform:translateX(-320px)}40.7%{transform:translateX(-400px)}40.9%{transform:translateX(-480px)}41.1%{transform:translateX(-720px)}to{transform:translateX(-800px)}}.a{fill:#282d35}.f,.g{fill:#a8cc8c;font-weight:700;white-space:pre}.g{fill:#66c2cd}.h{fill:#b9c0cb}.i,.j,.k{fill:#282d35;white-space:pre}.j,.k{fill:#b9c0cb}.k{fill:#dbab79;font-weight:700}</style><g font-family="Monaco,Consolas,Menlo,'Bitstream Vera Sans Mono','Powerline Symbols',monospace" font-size="1.67"><defs><symbol id="1"><text y="1.67" class="f">➜</text><text x="3.006" y="1.67" class="g">~</text></symbol><symbol id="2"><text y="1.67" class="f">➜</text><text x="3.006" y="1.67" class="g">~</text><path class="h" d="M5.01 0h75v2.171h-75z"/><text x="5.01" y="1.67" class="i">ciphey -t &apos;LJIVE222KFJGUWSRJZ2FUUKSNNNFCTTLLJIVE5C2KFJGWWSRKJVVUUKOORNFCUTLLJ</text></symbol><symbol id="3"><path class="h" d="M0 0h80v2.171H0z"/><text y="1.67" class="i">IVE222KFHHIWSRKJVVUUKSNNNEOUTULJIU4222KFJGWWSRJZ2FUUKONNNFCTTKLJIU45C2KFJGWWSHJZ</text></symbol><symbol id="4"><path class="h" d="M0 0h80v2.171H0z"/><text y="1.67" class="i">VVUR2SORNFCUTLLJIVE222I5JHIWSRKJVVUR2ONJNEOTTULJIVE222KFJGWWSRJZ2FUUKSNNNFCTTLLJ</text></symbol><symbol id="5"><path class="h" d="M0 0h34v2.171H0z"/><text y="1.67" class="i">IU45C2KFHGWWSRJZVFUUKSHU======&apos; -d</text></symbol><symbol id="6"><text y="1.67" class="j">🥳</text><text x="3.006" y="1.67" class="j">ciphey</text><text x="8.016" y="1.67" class="j">has</text><text x="12.024" y="1.67" class="j">decoded</text><text x="20.04" y="1.67" class="j">205</text><text x="24.048" y="1.67" class="j">times</text><text x="30.06" y="1.67" class="j">times.</text></symbol><symbol id="7"><text y="1.67" class="j">If</text><text x="3.006" y="1.67" class="j">you</text><text x="7.014" y="1.67" class="j">would</text><text x="13.026" y="1.67" class="j">have</text><text x="18.036" y="1.67" class="j">used</text><text x="23.046" y="1.67" class="j">Ciphey,</text><text x="31.062" y="1.67" class="j">it</text><text x="34.068" y="1.67" class="j">would</text><text x="40.08" y="1.67" class="j">have</text><text x="45.09" y="1.67" class="j">taken</text><text x="51.102" y="1.67" class="j">you</text><text x="55.11" y="1.67" class="j">41</text><text x="58.116" y="1.67" class="j">seconds</text></symbol><symbol id="8"><text y="1.67" class="j">The</text><text x="4.008" y="1.67" class="j">plaintext</text><text x="14.028" y="1.67" class="j">is:</text></symbol><symbol id="9"><text y="1.67" class="k">hello,</text><text x="7.014" y="1.67" class="k">world!</text></symbol><symbol id="10"><text y="1.67" class="j">and</text><text x="4.008" y="1.67" class="j">the</text><text x="8.016" y="1.67" class="j">decoders</text><text x="17.034" y="1.67" class="j">used</text><text x="22.044" y="1.67" class="j">are</text><text x="26.052" y="1.67" class="k">Base32</text><text x="33.066" y="1.67" class="k">→</text><text x="35.07" y="1.67" class="k">Caesar</text><text x="42.084" y="1.67" class="k">Cipher</text><text x="49.098" y="1.67" class="k">→</text><text x="51.102" y="1.67" class="k">Base64</text><text x="58.116" y="1.67" class="k">→</text><text x="60.12" y="1.67" class="k">Binary</text></symbol><symbol id="a"><path fill="transparent" d="M0 0h80v23H0z"/></symbol><symbol id="b"><path fill="#6f7683" d="M0 0h1.102v2.171H0z"/></symbol></defs><path class="a" d="M0 0h80v47.762H0z"/><g style="animation-duration:5.440228s;animation-iteration-count:infinite;animation-name:n;animation-timing-function:steps(1,end)"><svg width="880"><svg><use xlink:href="#a"/><use xlink:href="#b" x="-.004"/></svg><svg x="80"><use xlink:href="#a"/><use xlink:href="#b" x="4.996"/><use xlink:href="#1"/></svg><svg x="160"><use xlink:href="#a"/><use xlink:href="#b" x="4.996"/><use xlink:href="#1"/></svg><svg x="240"><use xlink:href="#a"/><use xlink:href="#b" x="33.996" y="6.488"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/></svg><svg x="320"><use xlink:href="#a"/><use xlink:href="#b" x="-.004" y="8.659"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/></svg><svg x="400"><use xlink:href="#a"/><use xlink:href="#b" x="-.004" y="17.343"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/></svg><svg x="480"><use xlink:href="#a"/><use xlink:href="#b" x="-.004" y="23.856"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/><use xlink:href="#8" y="17.368"/><use xlink:href="#9" y="19.539"/><use xlink:href="#10" y="21.71"/></svg><svg x="560"><use xlink:href="#a"/><use xlink:href="#b" x="4.996" y="23.856"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/><use xlink:href="#8" y="17.368"/><use xlink:href="#9" y="19.539"/><use xlink:href="#10" y="21.71"/><use xlink:href="#1" y="23.881"/></svg><svg x="640"><use xlink:href="#a"/><use xlink:href="#b" x="4.996" y="23.856"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/><use xlink:href="#8" y="17.368"/><use xlink:href="#9" y="19.539"/><use xlink:href="#10" y="21.71"/><use xlink:href="#1" y="23.881"/></svg><svg x="720"><use xlink:href="#a"/><use xlink:href="#b" x="4.996" y="23.856"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/><use xlink:href="#8" y="17.368"/><use xlink:href="#9" y="19.539"/><use xlink:href="#10" y="21.71"/><use xlink:href="#1" y="23.881"/></svg><svg x="800"><use xlink:href="#a"/><use xlink:href="#b" x="-.004" y="26.027"/><use xlink:href="#2"/><use xlink:href="#3" y="2.171"/><use xlink:href="#4" y="4.342"/><use xlink:href="#5" y="6.513"/><use xlink:href="#6" y="10.855"/><use xlink:href="#7" y="13.026"/><use xlink:href="#8" y="17.368"/><use xlink:href="#9" y="19.539"/><use xlink:href="#10" y="21.71"/><use xlink:href="#1" y="23.881"/></svg></svg></g></g></svg></svg>


--------------------------------------------------------------------------------