├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples └── readme.md ├── rhosts.graffle ├── rust-toolchain ├── src ├── cmd │ ├── build.rs │ ├── cache.rs │ ├── core.rs │ ├── init.rs │ └── mod.rs ├── config.rs ├── lib.rs ├── main.rs ├── types.rs └── utils.rs └── tests └── readme.md /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | on: 3 | push: 4 | branches: 5 | - "**" 6 | pull_request: 7 | branches: 8 | - "**" 9 | 10 | jobs: 11 | check_pkg: 12 | name: Check Package 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v3 18 | name: Clone repository 19 | 20 | - name: Setup Rust 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | profile: minimal 24 | toolchain: stable 25 | override: true 26 | 27 | - name: Check Package 28 | uses: actions-rs/cargo@v1 29 | with: 30 | command: check 31 | 32 | test_pkg: 33 | needs: [check_pkg] 34 | name: Test Package 35 | 36 | runs-on: ubuntu-latest 37 | 38 | steps: 39 | - uses: actions/checkout@v3 40 | name: Clone repository 41 | 42 | - name: Setup Rust 43 | uses: actions-rs/toolchain@v1 44 | with: 45 | profile: minimal 46 | toolchain: stable 47 | override: true 48 | 49 | - name: Test Package 50 | uses: actions-rs/cargo@v1 51 | with: 52 | command: test 53 | 54 | lint_pkg: 55 | needs: [test_pkg] 56 | name: Lint Package 57 | 58 | runs-on: ubuntu-latest 59 | 60 | steps: 61 | - uses: actions/checkout@v3 62 | name: Clone repository 63 | 64 | - name: Setup Rust 65 | uses: actions-rs/toolchain@v1 66 | with: 67 | profile: minimal 68 | toolchain: stable 69 | override: true 70 | components: rustfmt, clippy 71 | 72 | - name: Check Package Format 73 | uses: actions-rs/cargo@v1 74 | with: 75 | command: fmt 76 | args: --all -- --check 77 | 78 | - name: Lint Package 79 | uses: actions-rs/cargo@v1 80 | with: 81 | command: clippy 82 | # Note: Increate or decrease if necessary. 83 | args: -- -D error 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | 13 | #Added by cargo 14 | 15 | /target 16 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rhosts" 3 | version = "0.0.2" 4 | authors = ["Steven Black "] 5 | edition = "2021" 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/StevenBlack/rhosts" 9 | homepage = "https://github.com/StevenBlack/rhosts" 10 | description = "Tools to mess with hosts files." 11 | 12 | 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | addr = "0.15.3" 17 | anyhow = "1.0.44" 18 | async-std = { version = "1.12.0", features = ["unstable", "attributes"] } 19 | chrono = "0.4" 20 | futures = "0.3" 21 | psl = "2.0.70" 22 | regex = "1.5.5" 23 | reqwest = { version = "0.11", features = ["blocking", "json"] } 24 | directories = "4.0.1" 25 | num-format = "0.4.0" 26 | arboard = "2.1.1" 27 | async-task-group = "0.2.1" 28 | serde = { version = "1.0.138", features = ["serde_derive"] } 29 | serde_derive = "1.0.138" 30 | serde_json = "1.0.82" 31 | array_tool = "1.0.3" 32 | thousands = "0.2.0" 33 | clap = { version = "4.0.15", features = ["derive", "cargo"] } 34 | clap_complete = "4.0.2" 35 | indexmap = "2.6.0" 36 | 37 | [dev-dependencies] 38 | semver = "1.0" 39 | async-std = { version = "1.12.0", features = ["unstable", "attributes"] } 40 | anyhow = "1.0.44" 41 | 42 | [[bin]] 43 | # name = "rhosts" 44 | name = "rh" # The name of the target. 45 | path = "src/main.rs" # The source file of the target. 46 | test = true # Is tested by default. 47 | doctest = true # Documentation examples are tested by default. 48 | bench = true # Is benchmarked by default. 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Steven Black 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rhosts (rh) 2 | 3 | Host file tools written in [Rust](https://www.rust-lang.org/) conceived while 4 | stuck at home during a pandemic. 5 | 6 | > [!NOTE] 7 | > This is all very preliminary. This is not presently fit for general consumption. 8 | 9 | > [!NOTE] 10 | > **External non-rust dependency**: `openssl-dev` 11 | 12 | ## Calling `rh` 13 | 14 | This is the output from `$ rh -h`. 15 | 16 | ```rust 17 | $ rh -h 18 | 19 | Tools to mess with hosts files. 20 | 21 | Usage: rh [OPTIONS] [COMMAND] 22 | 23 | Commands: 24 | build Build hosts files 25 | cache Application cache initialize, prime, clear, or report 26 | init Initialize cache and templates 27 | info Display additional information about the application 28 | help Print this message or the help of the given subcommand(s) 29 | 30 | Options: 31 | -m, --main The main hosts file, the basis for comparison [default: base] 32 | -c, --compare The hosts file to compare to mainhosts 33 | --ip The ip address to use when listing hosts [default: 0.0.0.0] 34 | -d, --default_hosts Add default hosts to when listing hosts The default hosts will be placed at the top of hosts lists 35 | -s, --sort Sort the domains. The sort order is domain, tdl, subdomain1, subdomain2, etc 36 | -o, --output The output file. By default, output is to std out 37 | -p, --plain Plain listing - domains only, without addresses, when listing domains 38 | -q, --quiet Quiet, terse output mode. Outputs the number of domains only 39 | --stats Print statistics about the domains [possible values: true, false] 40 | -i, --intersection Print the intersection of mainhosts and comparehosts 41 | -r, --rootdomains 42 | -t, --tld Print a tally of top level domains found in the list 43 | -l, --limit Limit for listing TLD and root domains, 0 = unlimited [default: 30] 44 | --skipheaders Omit the file comment headers in output 45 | --showduplicates List duplicates when reporting on a hosts list 46 | --invalid List invalid domains when reporting on a hosts list 47 | --clip Use the contents of the system clipboard as compare hosts 48 | -u, --unique List the unique domain names 49 | -v, --verbose Verbose output, useful for development 50 | --skipcache Do not use cache 51 | -h, --help Print help information 52 | -V, --version Print version information 53 | ``` 54 | 55 | ## Vision for this project 56 | 57 | This is to be a **full-featured swiss-knife** for assessing and working with 58 | amalgamated hosts files. 59 | 60 | ![MissionVsVision](https://user-images.githubusercontent.com/80144/158078813-87141f60-a03f-4367-a8c1-3d8da68de45e.gif) 61 | 62 | ## Mission for development 63 | 64 | Ultimately this will 65 | 66 | 1. replace the python-based [hosts](https://github.com/StevenBlack/hosts) build tools 67 | 2. replace [ghosts](https://github.com/StevenBlack/ghosts), the set of ancillary 68 | tools, written in Go, to assess various hosts lists, 69 | 70 | ## Goals of this project 71 | 72 | Here is the list of tangible goals for the project. 73 | 74 | * [Extensible architecture](https://github.com/StevenBlack/rhosts/wiki/Extensible-Architecture-Discussion) so development can progress cleanly on many fronts. 75 | * Collect and maintain historical statistics about amalgamated lists oriduced, 76 | and of the component lists that make up the amalgamated hosts. 77 | * Ability to asses the impact of each list in the composition of amalgamated hosts. 78 | * Ability to asses the impact of proposed additions to the amalgamated hosts. 79 | 80 | ## Related repositories 81 | 82 | * [StevenBlack/hosts](https://github.com/StevenBlack/hosts) is my amalgamated hosts file, with custom variants, from various curated sources. 83 | * [StevenBlack/ghosts](https://github.com/StevenBlack/ghosts) is a cli tool written in Go. 84 | -------------------------------------------------------------------------------- /examples/readme.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Individual examples are run like `cargo run --example my_example`. 4 | -------------------------------------------------------------------------------- /rhosts.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenBlack/rhosts/0f1aa8ad976f116850cd208f11053cf48e333928/rhosts.graffle -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | stable 2 | -------------------------------------------------------------------------------- /src/cmd/build.rs: -------------------------------------------------------------------------------- 1 | use crate::{types::Amalgam, Arguments}; 2 | use anyhow::Error; 3 | 4 | // Build command implementation 5 | // This is all very experimental right now. 6 | pub async fn execute(args: Arguments) -> Result<(), Error> { 7 | if args.verbose { 8 | println!("Handled by 'build'."); 9 | } 10 | let amalgam = Amalgam::new(vec!(args.mainhosts)).await; 11 | 12 | 13 | 14 | if args.domains_sort { 15 | let sorteddomains = amalgam.sorteddomains(); 16 | for domain in sorteddomains { 17 | if args.plain_output { 18 | println!("{}", domain); 19 | } else { 20 | println!("{} {}", args.iplocalhost, domain); 21 | } 22 | 23 | } 24 | return Ok(()); 25 | 26 | } 27 | for domain in amalgam.domains { 28 | println!("{}", domain); 29 | } 30 | 31 | Ok(()) 32 | } 33 | -------------------------------------------------------------------------------- /src/cmd/cache.rs: -------------------------------------------------------------------------------- 1 | //! Cache related sommands and services 2 | //! 3 | 4 | // #![allow(dead_code)] 5 | use anyhow::{bail, anyhow}; 6 | use async_std::println; 7 | use crate::{Commands, Arguments, config::get_shortcuts, types::Hostssource, utils::hash}; 8 | use clap::Subcommand; 9 | use anyhow::Context; 10 | use directories::ProjectDirs; 11 | use futures::executor::block_on; 12 | use std::{ 13 | fs::{self, File}, 14 | io::prelude::*, 15 | path::{Path,PathBuf} 16 | }; 17 | 18 | #[derive(Hash)] 19 | /// Enum containing the possible cacheable types 20 | pub enum Cacheable { 21 | Vec(Vec), 22 | String(String), 23 | } 24 | 25 | #[derive(Clone, Debug, Subcommand)] 26 | /// Enum containing the possible actions for the `cache` subcommand. 27 | pub enum CacheCommands { 28 | /// clean the cache 29 | Clear, 30 | /// Prime the cache. 31 | Prime, 32 | /// Report on the cache 33 | Report, 34 | /// Information about the cache 35 | Info, 36 | } 37 | 38 | /// Display information about the application cache. 39 | pub async fn info(_args:Arguments) -> anyhow::Result<()> { 40 | let cache_dir = get_cache_dir().await; 41 | println!("Cache information:").await; 42 | println!("Local cache folder: {}", cache_dir.display()).await; 43 | Ok(()) 44 | } 45 | 46 | /// Initialize the application cache. 47 | pub async fn init(args:Arguments) -> anyhow::Result<()> { 48 | let cache_dir = get_cache_dir().await; 49 | if !Path::new(&cache_dir).is_dir() { 50 | if args.verbose { 51 | println!("Initializing empty cache.").await; 52 | } 53 | fs::create_dir_all(cache_dir)?; 54 | } 55 | Ok(()) 56 | } 57 | 58 | /// Get cached item from the application cache. 59 | pub async fn get(s: String) -> Option { 60 | let pb = get_cache_dir().await.join(get_cache_key(Cacheable::String(s))); 61 | if pb.is_file() { 62 | Some(pb) 63 | } else { 64 | None 65 | } 66 | } 67 | 68 | /// Set cached item in the application cache. 69 | pub async fn set(file: String, body: String) -> anyhow::Result<()> { 70 | let mut output = File::create(get_cache_dir().await.join(get_cache_key(Cacheable::String(file)))).expect("Unable to cache HTTP request result."); 71 | if write!(output, "{}", body).is_ok() { 72 | Ok(()) 73 | } else { 74 | Err(anyhow!("Unable to cache HTTP request result.")) 75 | } 76 | } 77 | 78 | /// Deletes all cache data. 79 | pub async fn delete(args: Arguments) -> anyhow::Result<()> { 80 | if args.verbose { 81 | println!("Deleting cache.").await; 82 | } 83 | fs::remove_dir_all(get_cache_dir().await)?; 84 | Ok(()) 85 | } 86 | 87 | /// Get the cache directory. 88 | pub async fn execute(args: Arguments) -> anyhow::Result<()> { 89 | if args.verbose { 90 | println!("Handled by 'cache'.").await; 91 | _ = info(args.clone()); 92 | } 93 | 94 | match &args.command { 95 | Some(Commands::Cache { cacheaction: Some(CacheCommands::Clear) }) => { 96 | clear(args.clone()).await?; 97 | }, 98 | Some(Commands::Cache { cacheaction: Some(CacheCommands::Prime) }) => { 99 | prime(args.clone()).await?; 100 | }, 101 | Some(Commands::Cache { cacheaction: Some(CacheCommands::Report) }) => { 102 | report(args.clone()).await?; 103 | }, 104 | Some(Commands::Cache { cacheaction: Some(CacheCommands::Info) }) => { 105 | info(args.clone()).await?; 106 | }, 107 | _ => { 108 | bail!("No such cache subcommand."); 109 | } 110 | }; 111 | Ok(()) 112 | } 113 | 114 | /// Delete and reinitialize cache 115 | async fn clear(args: Arguments) -> anyhow::Result<()> { 116 | if args.verbose { 117 | println!("Clearing cache.").await; 118 | } 119 | delete(args.clone()).await.context(format!("unable to delete cache"))?; 120 | init(args.clone()).await.context(format!("Unable to initialize cache"))?; 121 | Ok(()) 122 | } 123 | 124 | /// Prime all caches 125 | pub(crate) async fn prime(args: Arguments) -> anyhow::Result<()> { 126 | if args.verbose { 127 | println!("Priming cache.").await; 128 | } 129 | clear(args.clone()).await.context(format!("unable to delete cache"))?; 130 | let mut shortcuts: Vec = get_shortcuts().into_values().collect(); 131 | shortcuts.dedup(); 132 | for shortcut in shortcuts { 133 | if args.verbose { 134 | println!("Priming {}", shortcut.to_owned()).await; 135 | } 136 | block_on(Hostssource::new(shortcut.to_owned(), shortcut.to_owned())); 137 | } 138 | Ok(()) 139 | } 140 | 141 | /// Report information about the current state of cache 142 | async fn report(args: Arguments) -> anyhow::Result<()> { 143 | if args.verbose { 144 | println!("Reporting cache.").await; 145 | println!("Arguments received: {:?}", args).await; 146 | } 147 | println!("Cache report is to be implemented.").await; 148 | Ok(()) 149 | } 150 | 151 | /// Returns the cache folder following the user's OS conventions. 152 | pub async fn get_cache_dir() -> PathBuf { 153 | let proj_dirs = ProjectDirs::from("", "", "rh").unwrap(); 154 | let cache_dir = proj_dirs.cache_dir(); 155 | if !cache_dir.exists() { 156 | // create the folder if it does not exists 157 | let create_dir_result:Result<(), std::io::Error> = fs::create_dir_all(cache_dir); 158 | if create_dir_result.is_err() { 159 | async_std::println!("Unable to create cache folder").await; 160 | panic!(); 161 | } 162 | } 163 | 164 | // proj_dirs.cache_dir().to_owned() 165 | cache_dir.to_owned() 166 | } 167 | 168 | /// Returns the hashed cache key. 169 | pub fn get_cache_key(s: Cacheable) -> String { 170 | match s { 171 | Cacheable::Vec(v) => { 172 | let mut mv = v.clone(); 173 | mv.sort(); 174 | hash(mv.join("")) 175 | } 176 | Cacheable::String(s) => hash(s), 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/cmd/core.rs: -------------------------------------------------------------------------------- 1 | use crate::types::{Comparable, Hostssource}; 2 | use crate::Arguments; 3 | /// Core behavior for the application 4 | /// 5 | use anyhow::Error; 6 | use futures::executor::block_on; 7 | use arboard::Clipboard; 8 | 9 | pub fn execute(args: Arguments) -> Result<(), Error> { 10 | // If we're here, no subcommand was specified 11 | if args.verbose { 12 | println!("Handled by 'core'."); 13 | } 14 | 15 | // step 1: load the mainhosts 16 | let mut mainhosts = Hostssource { 17 | args: args.clone(), 18 | ..Default::default() 19 | }; 20 | // ignore the result of this load for now 21 | _ = block_on(mainhosts.load(&args.mainhosts)); 22 | 23 | if args.isolate.is_some() { 24 | // handle the hosts list isolation here. 25 | unimplemented!(); 26 | 27 | } else if args.sysclipboard { 28 | let mut clipboard = Clipboard::new().unwrap(); 29 | let clipboard_text = clipboard.get_text().unwrap(); 30 | if args.verbose { 31 | println!("Clipboard contents:\n{}", clipboard_text); 32 | } 33 | let mut comparisonhosts = Hostssource { 34 | args: args.clone(), 35 | ..Default::default() 36 | }; 37 | // ignore the result of this load for now 38 | _ = block_on(comparisonhosts.load(&clipboard_text)); 39 | 40 | // now, compare the two 41 | mainhosts.compare(Box::new(comparisonhosts)); 42 | 43 | } else if args.comparehosts.is_some() { 44 | let mut comparisonhosts = Hostssource { 45 | args: args.clone(), 46 | ..Default::default() 47 | }; 48 | // ignore the result of this load for now 49 | _ = block_on(comparisonhosts.load(&args.comparehosts.unwrap())); 50 | 51 | // now, compare the two 52 | mainhosts.compare(Box::new(comparisonhosts)); 53 | } else { 54 | println!("{}", mainhosts); 55 | } 56 | 57 | // return Err(anyhow!("Some error")); 58 | 59 | // Err(anyhow!("Some error")) 60 | Ok(()) 61 | } 62 | 63 | /// Dump relavent config information 64 | pub fn info(args: Arguments) { 65 | println!("Core information:"); 66 | println!("Arguments received: {:?}", args); 67 | } 68 | 69 | #[test] 70 | fn this_test_always_passes() {} -------------------------------------------------------------------------------- /src/cmd/init.rs: -------------------------------------------------------------------------------- 1 | use crate::Arguments; 2 | use anyhow::Error; 3 | 4 | // Init command implementation 5 | pub async fn execute(args: Arguments) -> Result<(), Error> { 6 | if args.verbose { 7 | println!("Handled by 'init'."); 8 | } 9 | // for now, prime the cache 10 | crate::cmd::cache::prime(args.clone()).await?; 11 | Ok(()) 12 | } 13 | -------------------------------------------------------------------------------- /src/cmd/mod.rs: -------------------------------------------------------------------------------- 1 | // Subcommand modules for the `rhosts` binary. 2 | pub mod build; 3 | pub mod cache; 4 | pub mod core; 5 | pub mod init; 6 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{ 3 | collections::BTreeMap, 4 | path::PathBuf, 5 | fs, fmt, 6 | }; 7 | use anyhow::anyhow; 8 | 9 | use crate::{Arguments, types::Tags, utils::{Combinations, flatten}}; 10 | // use crate::alloc::{Allocator, Global}; 11 | extern crate directories; 12 | // use directories::{BaseDirs, ProjectDirs, UserDirs}; 13 | use directories::ProjectDirs; 14 | 15 | /// print configuration information 16 | pub fn info(_args:Arguments) -> anyhow::Result<()> { 17 | println!("Configuration:"); 18 | println!("Local config file: {}", get_config_file()?.to_string_lossy()); 19 | Ok(()) 20 | } 21 | 22 | pub fn init(_args:Arguments) -> anyhow::Result<()> { 23 | Ok(()) 24 | } 25 | 26 | pub fn get_config_file() -> anyhow::Result { 27 | if let Some(proj_dirs) = ProjectDirs::from("", "", "rh") { 28 | let config_dir = proj_dirs.config_dir(); 29 | // Lin: /home/alice/.config/rh/rh.json 30 | // Win: C:\Users\Alice\AppData\rh\rh.json 31 | // Mac: /Users/Alice/Library/Application Support/rh/rh.json 32 | if !config_dir.exists() { 33 | // create the folder if it does not exists 34 | fs::create_dir_all(config_dir)?; 35 | } 36 | let config_file = config_dir.join("rh.json"); 37 | if !config_file.exists() { 38 | // create the file if it does not exist 39 | fs::File::create(&config_file)?; 40 | } 41 | return Ok(config_file); 42 | } 43 | return Err(anyhow!("Error reckoning config file.")); 44 | } 45 | 46 | 47 | #[allow(dead_code)] 48 | pub fn read_config_file() -> String { 49 | let config_file = get_config_file(); 50 | if config_file.is_ok() { 51 | let config_file_contents_result = 52 | fs::read_to_string(config_file.expect("Problem with config file.")); 53 | let configdata = match config_file_contents_result { 54 | Ok(file) => { 55 | let j = serde_json::from_str(&file); 56 | if j.is_ok() { 57 | j.unwrap() 58 | } else { 59 | "{}".to_string() 60 | } 61 | }, 62 | Err(_) => "File read error".to_string(), 63 | }; 64 | configdata 65 | } else { 66 | "".to_string() 67 | } 68 | } 69 | 70 | pub fn get_shortcuts() -> BTreeMap { 71 | let mut ret = BTreeMap::new(); 72 | ret.insert( 73 | "b".to_string(), 74 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts".to_string(), 75 | ); 76 | ret.insert( 77 | "base".to_string(), 78 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts".to_string(), 79 | ); 80 | ret.insert( 81 | "f".to_string(), 82 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews/hosts" 83 | .to_string(), 84 | ); 85 | ret.insert( 86 | "f-only".to_string(), 87 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-only/hosts" 88 | .to_string(), 89 | ); 90 | ret.insert( 91 | "fg".to_string(), 92 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling/hosts" 93 | .to_string(), 94 | ); 95 | ret.insert( 96 | "fgp".to_string(), 97 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling-porn/hosts" 98 | .to_string(), 99 | ); 100 | ret.insert( 101 | "fgps".to_string(), 102 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling-porn-social/hosts" 103 | .to_string(), 104 | ); 105 | ret.insert( 106 | "fgs".to_string(), 107 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-gambling-social/hosts" 108 | .to_string(), 109 | ); 110 | ret.insert( 111 | "fp".to_string(), 112 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-porn/hosts" 113 | .to_string(), 114 | ); 115 | ret.insert( 116 | "fps".to_string(), 117 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-porn-social/hosts" 118 | .to_string(), 119 | ); 120 | ret.insert( 121 | "fs".to_string(), 122 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews-social/hosts" 123 | .to_string(), 124 | ); 125 | ret.insert( 126 | "g".to_string(), 127 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/gambling/hosts" 128 | .to_string(), 129 | ); 130 | ret.insert( 131 | "g-only".to_string(), 132 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/gambling-only/hosts" 133 | .to_string(), 134 | ); 135 | ret.insert( 136 | "gp".to_string(), 137 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/gambling-porn/hosts" 138 | .to_string(), 139 | ); 140 | ret.insert( 141 | "gps".to_string(), 142 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/gambling-porn-social/hosts" 143 | .to_string(), 144 | ); 145 | ret.insert( 146 | "gs".to_string(), 147 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/gambling-social/hosts" 148 | .to_string(), 149 | ); 150 | ret.insert( 151 | "p".to_string(), 152 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/porn/hosts" 153 | .to_string(), 154 | ); 155 | ret.insert( 156 | "p-only".to_string(), 157 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/porn-only/hosts" 158 | .to_string(), 159 | ); 160 | ret.insert( 161 | "ps".to_string(), 162 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/porn-social/hosts" 163 | .to_string(), 164 | ); 165 | ret.insert( 166 | "s".to_string(), 167 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/social/hosts" 168 | .to_string(), 169 | ); 170 | ret.insert( 171 | "s-only".to_string(), 172 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/social-only/hosts" 173 | .to_string(), 174 | ); 175 | ret.insert( 176 | "adaway".to_string(), 177 | // adaway is paused 178 | // "https://raw.githubusercontent.com/AdAway/adaway.github.io/master/hosts.txt".to_string(), 179 | "https://raw.githubusercontent.com/StevenBlack/hosts/refs/heads/master/data/adaway.org/hosts".to_string(), 180 | ); 181 | ret.insert( 182 | "add2o7net".to_string(), 183 | "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.2o7Net/hosts" 184 | .to_string(), 185 | ); 186 | ret.insert( 187 | "adddead".to_string(), 188 | "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Dead/hosts".to_string(), 189 | ); 190 | ret.insert( 191 | "addrisk".to_string(), 192 | "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Risk/hosts".to_string(), 193 | ); 194 | ret.insert( 195 | "addspam".to_string(), 196 | "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Spam/hosts".to_string(), 197 | ); 198 | ret.insert( 199 | "baddboyz".to_string(), 200 | "https://raw.githubusercontent.com/mitchellkrogza/Badd-Boyz-Hosts/master/hosts".to_string(), 201 | ); 202 | ret.insert( 203 | "clefspear".to_string(), 204 | // clefspear is paused 205 | // "https://raw.githubusercontent.com/Clefspeare13/pornhosts/master/0.0.0.0/hosts".to_string(), 206 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/extensions/porn/clefspeare13/hosts".to_string(), 207 | ); 208 | ret.insert( 209 | "fakenews".to_string(), 210 | "https://raw.githubusercontent.com/marktron/fakenews/master/fakenews".to_string(), 211 | ); 212 | ret.insert( 213 | "hostsvn".to_string(), 214 | "https://raw.githubusercontent.com/bigdargon/hostsVN/master/option/hosts-VN".to_string(), 215 | ); 216 | ret.insert( 217 | "kadhosts".to_string(), 218 | "https://raw.githubusercontent.com/FiltersHeroes/KADhosts/master/KADhosts.txt" 219 | .to_string(), 220 | ); 221 | ret.insert( 222 | "mvps".to_string(), 223 | // mvps is paused 224 | // "https://winhelp2002.mvps.org/hosts.txt".to_string(), 225 | "https://raw.githubusercontent.com/StevenBlack/hosts/refs/heads/master/data/mvps.org/hosts".to_string(), 226 | ); 227 | ret.insert( 228 | "sinfonietta-gambling".to_string(), 229 | "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/gambling-hosts".to_string(), 230 | ); 231 | ret.insert( 232 | "sinfonietta-porn".to_string(), 233 | "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/pornography-hosts" 234 | .to_string(), 235 | ); 236 | ret.insert( 237 | "sinfonietta-snuff".to_string(), 238 | "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/snuff-hosts".to_string(), 239 | ); 240 | ret.insert( 241 | "sinfonietta-social".to_string(), 242 | "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/social-hosts".to_string(), 243 | ); 244 | ret.insert( 245 | "someonewhocares".to_string(), 246 | "https://someonewhocares.org/hosts/zero/hosts".to_string(), 247 | ); 248 | ret.insert( 249 | "stevenblack".to_string(), 250 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/data/StevenBlack/hosts" 251 | .to_string(), 252 | ); 253 | ret.insert( 254 | "tiuxo-porn".to_string(), 255 | "https://raw.githubusercontent.com/tiuxo/hosts/master/porn".to_string(), 256 | ); 257 | ret.insert( 258 | "tiuxo".to_string(), 259 | "https://raw.githubusercontent.com/tiuxo/hosts/master/ads".to_string(), 260 | ); 261 | ret.insert( 262 | "uncheckyads".to_string(), 263 | "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/UncheckyAds/hosts" 264 | .to_string(), 265 | ); 266 | ret.insert( 267 | "urlhaus".to_string(), 268 | "https://urlhaus.abuse.ch/downloads/hostfile/".to_string(), 269 | ); 270 | ret.insert( 271 | "yoyo".to_string(), 272 | "https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext&useip=0.0.0.0" 273 | .to_string(), 274 | ); 275 | ret 276 | } 277 | 278 | use serde::{Deserialize, Serialize}; 279 | #[derive(Clone, Debug, Deserialize, Serialize)] 280 | pub struct Component { 281 | pub name: String, 282 | pub destination: String, 283 | pub tags: Tags, 284 | } 285 | 286 | impl fmt::Display for Component { 287 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 288 | write!(f, "\"name\": {}, \"destination\": {}, \"tags\": {:?}", self.name, self.destination, self.tags) 289 | } 290 | } 291 | 292 | pub type Components = Vec; 293 | 294 | // HERE PLAYING WITH AN ALTERNATE WAY OF DEFINING Components 295 | 296 | // #[derive(Clone, Debug, Deserialize, Serialize)] 297 | // pub struct Komponents(Vec); 298 | 299 | // impl fmt::Display for Komponents { 300 | // fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 301 | // write!(f, "Values:\n")?; 302 | // for v in &self.0 { 303 | // write!(f, "\t{}", v)?; 304 | // } 305 | // Ok(()) 306 | // } 307 | // } 308 | 309 | // impl<'a, Komponent, A: Allocator> IntoIterator for &'a Vec { 310 | // type Item = &'a Component; 311 | // type IntoIter = slice::Iter<'a, Component>; 312 | 313 | // fn into_iter(self) -> Self::IntoIter { 314 | // self.0.iter() 315 | // } 316 | // } 317 | 318 | // impl<'a, Komponent, A: Allocator> IntoIterator for &'a mut Vec { 319 | // type Item = &'a mut Component; 320 | // type IntoIter = slice::IterMut<'a, Component>; 321 | 322 | // fn into_iter(self) -> Self::IntoIter { 323 | // self.0.iter_mut() 324 | // } 325 | // } 326 | 327 | // impl Komponents { 328 | // #[inline] 329 | // pub fn len(&self) -> usize { 330 | // self.0.len() 331 | // } 332 | 333 | // pub fn retain(&mut self, mut f: F) 334 | // where 335 | // F: FnMut(&Component) -> bool, 336 | // { 337 | // self.0.retain_mut(|elem| f(elem)); 338 | // } 339 | // } 340 | 341 | 342 | 343 | #[allow(dead_code)] 344 | pub fn get_products_json() -> String { 345 | let products = r#"[ 346 | { 347 | "name": "base", 348 | "destination": "./", 349 | "tags": ["base"] 350 | }, 351 | { 352 | "name": "f-only", 353 | "destination": "./alternates/fakenews/only", 354 | "tags": ["fakenews"] 355 | }, 356 | { 357 | "name": "f", 358 | "destination": "./alternates/fakenews", 359 | "tags": ["base", "fakenews"] 360 | }, 361 | { 362 | "name": "fg", 363 | "destination": "./alternates/fakenews-gamnbling", 364 | "tags": ["base", "fakenews", "gambling"] 365 | }, 366 | { 367 | "name": "fgp", 368 | "destination": "./alternates/fakenews-gambling-porn", 369 | "tags": ["base", "fakenews", "gambling", "porn"] 370 | }, 371 | { 372 | "name": "fgps", 373 | "destination": "./alternates/fakenews-gambling-porn-social", 374 | "tags": ["base", "fakenews", "gambling", "porn", "social"] 375 | }, 376 | { 377 | "name": "fgs", 378 | "destination": "./alternates/fakenews-gambling-social", 379 | "tags": ["base", "fakenews", "gambling", "social"] 380 | }, 381 | { 382 | "name": "fp", 383 | "destination": "./alternates/fakenews-porn", 384 | "tags": ["base", "fakenews", "porn"] 385 | }, 386 | { 387 | "name": "fps", 388 | "destination": "./alternates/fakenews-porn-social", 389 | "tags": ["base", "fakenews", "porn", "social"] 390 | }, 391 | { 392 | "name": "fs", 393 | "destination": "./alternates/fakenews-social", 394 | "tags": ["base", "fakenews", "social"] 395 | }, 396 | { 397 | "name": "g-only", 398 | "destination": "./alternates/gambling/only", 399 | "tags": ["gambling"] 400 | }, 401 | { 402 | "name": "g", 403 | "destination": "./alternates/gambling", 404 | "tags": ["base", "gambling"] 405 | }, 406 | { 407 | "name": "gp", 408 | "destination": "./alternates/gambling-porn", 409 | "tags": ["base", "gambling", "porn"] 410 | }, 411 | { 412 | "name": "gps", 413 | "destination": "./alternates/gambling-porn-social", 414 | "tags": ["base", "gambling", "porn", "social"] 415 | }, 416 | { 417 | "name": "gs", 418 | "destination": "./alternates/gambling-social", 419 | "tags": ["base", "gambling", "social"] 420 | }, 421 | { 422 | "name": "p-only", 423 | "destination": "./alternates/porn/only", 424 | "tags": ["porn"] 425 | }, 426 | { 427 | "name": "p", 428 | "destination": "./alternates/porn", 429 | "tags": ["base", "porn"] 430 | }, 431 | { 432 | "name": "ps", 433 | "destination": "./alternates/porn-social", 434 | "tags": ["base", "porn", "social"] 435 | }, 436 | { 437 | "name": "s-only", 438 | "destination": "./alternates/social/only", 439 | "tags": ["social"] 440 | }, 441 | { 442 | "name": "s", 443 | "destination": "./alternates/social", 444 | "tags": ["base", "social"] 445 | } 446 | ]"#.trim().to_string(); 447 | products 448 | } 449 | 450 | #[derive(Debug, Deserialize, Serialize)] 451 | pub struct Config { 452 | sources: SourcesSpecs, 453 | } 454 | 455 | #[derive(Clone, Debug, Deserialize, Serialize)] 456 | pub struct SourceSpec { 457 | pub name: String, 458 | pub url: String, 459 | pub destination: String, 460 | pub tags: Tags, 461 | } 462 | 463 | type SourcesSpecs = Vec; 464 | 465 | impl fmt::Display for SourceSpec { 466 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 467 | // Customize so only `x` and `y` are denoted. 468 | write!( 469 | f, 470 | "name: {}, destination: {}, tags: {:?}", 471 | self.name, self.destination, self.tags 472 | ) 473 | } 474 | } 475 | 476 | pub fn gettaggroups() -> Vec> { 477 | let tags = get_unique_tags(); 478 | let mut taggroups = vec!(); 479 | for n in 1..tags.len() +1 { 480 | let groupsvec: Vec<_> = Combinations::new(tags.clone(), n).collect(); 481 | taggroups.push(groupsvec); 482 | // println!("{:?}", groupsvec); 483 | } 484 | flatten(taggroups) 485 | } 486 | 487 | #[allow(dead_code)] 488 | pub fn get_unique_tags() -> Tags { 489 | // yields all the unique tags we have 490 | use array_tool::vec::Uniq; 491 | let json = get_sources_json(); 492 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for getting tags."); 493 | let mut tags: Tags= vec!(); 494 | for x in config { 495 | for t in x.tags { 496 | tags.push(t); 497 | } 498 | } 499 | let mut uniquetags = tags.unique(); 500 | uniquetags.sort(); 501 | uniquetags 502 | } 503 | 504 | #[allow(dead_code)] 505 | pub fn get_sources_by_tag(tag: String) -> Vec { 506 | let json = get_sources_json(); 507 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for getting tags."); 508 | let mut sources = vec!(); 509 | for x in config { 510 | if x.tags.contains(&tag) { 511 | sources.push(x); 512 | } 513 | } 514 | sources 515 | } 516 | 517 | #[allow(dead_code)] 518 | pub fn get_source_names_by_tag(tag: String) -> Vec { 519 | let json = get_sources_json(); 520 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for getting tags."); 521 | let mut sources = vec!(); 522 | for x in config { 523 | if x.tags.contains(&tag) { 524 | sources.push(x.name); 525 | } 526 | } 527 | sources 528 | } 529 | 530 | 531 | #[allow(dead_code)] 532 | pub fn get_sources_json() -> String { 533 | let sources = r#"[ 534 | { 535 | "name": "adaway", 536 | "url": "https://raw.githubusercontent.com/AdAway/adaway.github.io/master/hosts.txt", 537 | "destination": "./data/adaway.org", 538 | "tags": ["base"] 539 | }, 540 | { 541 | "name": "add2o7net", 542 | "url": "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.2o7Net/hosts", 543 | "destination": "./data/add.2o7net", 544 | "tags": ["base"] 545 | }, 546 | { 547 | "name": "adddead", 548 | "url": "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Dead/hosts", 549 | "destination": "./data/add.dead", 550 | "tags": ["base"] 551 | }, 552 | { 553 | "name": "addrisk", 554 | "url": "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Risk/hosts", 555 | "destination": "./data/add.risk", 556 | "tags": ["base"] 557 | }, 558 | { 559 | "name": "addspam", 560 | "url": "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Spam/hosts", 561 | "destination": "./data/add.spam", 562 | "tags": ["base"] 563 | }, 564 | { 565 | "name": "baddboyz", 566 | "url": "https://raw.githubusercontent.com/mitchellkrogza/Badd-Boyz-Hosts/master/hosts", 567 | "destination": "./data/Badd-Boyz-Hosts", 568 | "tags": ["base"] 569 | }, 570 | { 571 | "name": "bigdargon-gambling", 572 | "url": "https://raw.githubusercontent.com/bigdargon/hostsVN/master/extensions/gambling/hosts", 573 | "destination": "./extensions/gambling/bigdargon/", 574 | "tags": ["gambling"] 575 | }, 576 | { 577 | "name": "bigdargon-porn", 578 | "url": "https://raw.githubusercontent.com/bigdargon/hostsVN/master/extensions/adult/hosts", 579 | "destination": "./extensions/porn/bigdargon/", 580 | "tags": ["porn"] 581 | }, 582 | { 583 | "name": "clefspear", 584 | "url": "https://raw.githubusercontent.com/StevenBlack/hosts/master/extensions/porn/clefspeare13/hosts", 585 | "destination": "./extensions/porn/clefspeare13/", 586 | "tags": ["porn"] 587 | }, 588 | { 589 | "name": "marktron-fakenews", 590 | "url": "https://raw.githubusercontent.com/marktron/fakenews/master/fakenews", 591 | "destination": "./extensions/fakenews", 592 | "tags": ["fakenews"] 593 | }, 594 | { 595 | "name": "hostsvn", 596 | "url": "https://raw.githubusercontent.com/bigdargon/hostsVN/master/option/hosts-VN", 597 | "destination": "./data/hostsVN", 598 | "tags": ["base"] 599 | }, 600 | { 601 | "name": "kadhosts", 602 | "url": "https://raw.githubusercontent.com/FiltersHeroes/KADhosts/master/KADhosts.txt", 603 | "destination": "./data/KADhosts", 604 | "tags": ["base"] 605 | }, 606 | { 607 | "name": "mvps", 608 | "url": "https://winhelp2002.mvps.org/hosts.txt", 609 | "destination": "./data/mvps.org", 610 | "tags": ["base"] 611 | }, 612 | { 613 | "name": "sinfonietta-gambling", 614 | "url": "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/gambling-hosts", 615 | "destination": "./extensions/gambling", 616 | "tags": ["gambling"] 617 | }, 618 | { 619 | "name": "sinfonietta-porn", 620 | "url": "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/pornography-hosts", 621 | "destination": "./extensions/porn/sinfonietta", 622 | "tags": ["porn"] 623 | }, 624 | { 625 | "name": "sinfonietta-snuff", 626 | "url": "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/snuff-hosts", 627 | "destination": "./extensions/porn/sinfonietta-snuff", 628 | "tags": ["porn"] 629 | }, 630 | { 631 | "name": "sinfonietta-social", 632 | "url": "https://raw.githubusercontent.com/Sinfonietta/hostfiles/master/social-hosts", 633 | "destination": "./extensions/social/sinfonietta", 634 | "tags": ["social"] 635 | }, 636 | { 637 | "name": "someonewhocares", 638 | "url": "https://someonewhocares.org/hosts/zero/hosts", 639 | "destination": "./data/someonewhocares.org", 640 | "tags": ["base"] 641 | }, 642 | { 643 | "name": "stevenblack", 644 | "url": "https://raw.githubusercontent.com/StevenBlack/hosts/master/data/StevenBlack/hosts", 645 | "destination": "./data/StevenBlack", 646 | "tags": ["base"] 647 | }, 648 | { 649 | "name": "tiuxo-porn", 650 | "url": "https://raw.githubusercontent.com/tiuxo/hosts/master/porn", 651 | "destination": "./extensions/porn/tiuxo", 652 | "tags": ["porn"] 653 | }, 654 | { 655 | "name": "tiuxo", 656 | "url": "https://raw.githubusercontent.com/tiuxo/hosts/master/ads", 657 | "destination": "./data/tiuxo", 658 | "tags": ["base"] 659 | }, 660 | { 661 | "name": "uncheckyads", 662 | "url": "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/UncheckyAds/hosts", 663 | "destination": "./data/UncheckyAds", 664 | "tags": ["base"] 665 | }, 666 | { 667 | "name": "urlhaus", 668 | "url": "https://urlhaus.abuse.ch/downloads/hostfile/", 669 | "destination": "./data/URLhaus", 670 | "tags": ["base"] 671 | }, 672 | { 673 | "name": "yoyo", 674 | "url": "https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext&useip=0.0.0.0", 675 | "destination": "./data/yoyo.org", 676 | "tags": ["base"] 677 | } 678 | ]"#.trim().to_string(); 679 | sources 680 | } 681 | 682 | #[cfg(test)] 683 | mod tests { 684 | use super::*; 685 | 686 | #[test] 687 | fn test_function_get_config_file_returns_an_actionable_file_path() { 688 | let cf = get_config_file(); 689 | assert!(cf.is_ok_and(|fp| fp.is_file() && fp.exists())); 690 | } 691 | 692 | #[test] 693 | fn test_read_config_file() { 694 | let cf = read_config_file(); 695 | dbg!(cf); 696 | } 697 | 698 | #[test] 699 | fn test_shortcuts() { 700 | let hm = get_shortcuts(); 701 | assert_eq!(hm.get(&"yoyo".to_string()), Some(&"https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts&mimetype=plaintext&useip=0.0.0.0".to_string())); 702 | assert_eq!(hm.get(&"zzz".to_string()), None); 703 | } 704 | 705 | #[test] 706 | fn test_mut_shortcuts() { 707 | let mut hm = get_shortcuts(); 708 | hm.insert("yoyo".to_string(), "foo.bar".to_string()); 709 | assert_eq!(hm.get(&"yoyo".to_string()), Some(&"foo.bar".to_string())); 710 | } 711 | 712 | #[test] 713 | fn test_get_products_json() { 714 | let json = get_products_json(); 715 | let products: Components = serde_json::from_str(json.as_str()).expect("Invalid JSON in recipe."); 716 | println!("{:?}", products); 717 | assert!(products.len() > 5); 718 | } 719 | 720 | #[test] 721 | fn test_taging_products_json() { 722 | // this test just lists all the products a tag belongs to. 723 | let json = get_products_json(); 724 | let config: Components = serde_json::from_str(json.as_str()).expect("Invalid JSON recepe tag specification."); 725 | 726 | let tags = get_unique_tags(); 727 | for tag in tags { 728 | println!("\n# {}", &tag); 729 | let mut c = config.clone(); 730 | c.retain(|x| x.tags.contains(&tag.to_string())); 731 | for x in c { 732 | println!("{x}"); 733 | } 734 | } 735 | assert_eq!(Some(2), Some(1 + 1)); 736 | } 737 | 738 | #[test] 739 | fn test_get_sources_by_tag() { 740 | let tests = ["base", "fakenews", "gambling", "porn", "social"]; 741 | for test in tests { 742 | println!(); 743 | println!("== {} ==", test.to_string()); 744 | let sources = get_sources_by_tag(test.to_string()); 745 | for s in sources.clone() { 746 | println!("{:?}", s.name); 747 | } 748 | assert!(sources.len() > 0); 749 | } 750 | } 751 | 752 | #[test] 753 | fn test_get_sources_by_tag_fakenews() { 754 | let sources = get_sources_by_tag("fakenews".to_string()); 755 | for s in sources.clone() { 756 | println!("{:?}", s.name); 757 | } 758 | assert!(sources.len() == 1); 759 | } 760 | 761 | #[test] 762 | fn test_get_config_json() { 763 | let json = get_sources_json(); 764 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON configuration."); 765 | for o in config.clone() { 766 | println!("{:?} ⬅️ {:?}", o.tags, o.url); 767 | } 768 | assert!(config.len() > 5); 769 | } 770 | 771 | #[test] 772 | fn test_taging_config_json() { 773 | // this test lists all the sources of a tag. 774 | let json = get_sources_json(); 775 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for taging."); 776 | 777 | let tags = get_unique_tags(); 778 | for tag in tags { 779 | println!("\n# {}", &tag); 780 | let mut c = config.clone(); 781 | c.retain(|x| x.tags.contains(&tag.to_string())); 782 | for x in c { 783 | println!("{x}"); 784 | } 785 | } 786 | assert_eq!(Some(2), Some(1 + 1)); 787 | } 788 | 789 | #[test] 790 | fn test_gettaggroups() { 791 | println!("{:?}", gettaggroups()); 792 | assert!(1 == 1) 793 | } 794 | 795 | #[test] 796 | fn test_grouping_config_json_data() { 797 | // this test tells us if data destination folders exist. 798 | use std::path::PathBuf; 799 | 800 | macro_rules! ternary { 801 | ($c:expr, $v:expr, $v1:expr) => { 802 | if $c {$v} else {$v1} 803 | }; 804 | } 805 | 806 | let json = get_sources_json(); 807 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for grouping."); 808 | for x in config { 809 | let path: PathBuf = ["/Users/Steve/Dropbox/dev/hosts", x.destination.as_str()].iter().collect(); 810 | // let b: bool = Path::new(x.destination.as_str()).is_dir(); 811 | let b: bool = path.is_dir(); 812 | // println!("{} - {}", x.destination, b); 813 | println!("{} {}", x.destination, ternary!(b,"✅", "❌")); 814 | } 815 | assert_eq!(Some(2), Some(1 + 1)); 816 | } 817 | 818 | #[test] 819 | fn test_get_unique_tags() { 820 | // this test ensures we get a vec of unique tags. 821 | let tags = get_unique_tags(); 822 | assert!(tags.contains(&"base".to_string())); 823 | assert!(tags.contains(&"porn".to_string())); 824 | println!("{:?}", tags); 825 | } 826 | 827 | #[test] 828 | fn test_config_name_collisions() { 829 | // this test ensures we have no name collisions between sources and recipies. 830 | use std::collections::HashSet; 831 | 832 | let json = get_sources_json(); 833 | let config: SourcesSpecs = serde_json::from_str(json.as_str()).expect("Invalid JSON for sources."); 834 | let json = get_products_json(); 835 | let recipies: Components = serde_json::from_str(json.as_str()).expect("Invalid JSON for recipies."); 836 | let mut check = HashSet::new(); 837 | 838 | for source in config { 839 | if !check.insert(source.name.clone()) { 840 | println!("{} ❌ is a duplicate source", source.name); 841 | } 842 | } 843 | for recipe in recipies { 844 | if !check.insert(recipe.name.clone()) { 845 | println!("{} ❌ is a duplicate recipe", recipe.name); 846 | } 847 | } 848 | assert_eq!(Some(2), Some(1 + 1)); 849 | } 850 | } 851 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /// The error types used through out this crate. 2 | pub mod errors {} 3 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | //! This Rust module provides functionality for managing and processing hosts files 2 | //! with various options for comparison, sorting, and filtering. 3 | //! 4 | //! We use `clap` for command-line argument parsing and `anyhow` 5 | //! for error handling. 6 | //! 7 | //! # Modules 8 | //! 9 | //! - **cmd**: Handles specific commands and their implementations. 10 | //! - **config**: Contains configuration management utilities and helpers. 11 | //! - **types**: Defines data structures and types. 12 | //! - **utils**: Includes utility functions for common tasks. 13 | //! 14 | //! # Main Structs and Enums 15 | //! 16 | //! ## `Arguments` 17 | //! 18 | //! The primary structure representing the parsed command-line arguments. This 19 | //! structure includes options for specifying hosts files, output preferences, and 20 | //! commands for execution. 21 | //! 22 | //! ### Fields 23 | //! 24 | //! - **mainhosts** (`String`): Specifies the main hosts file for comparison. 25 | //! Defaults to `"base"`. 26 | //! - **comparehosts** (`Option`): Specifies the hosts file to compare against the main hosts. 27 | //! - **isolate** (`Option`): Specifies the hosts list to isolate and compare to the main hosts. 28 | //! - **iplocalhost** (`String`): Defines the IP address to use when listing hosts. Defaults to `"0.0.0.0"`. 29 | //! - **adddefaults** (`bool`): Adds default hosts to the top of the host lists. 30 | //! - **domains_sort** (`bool`): Enables domain sorting based on hierarchy. 31 | //! - **output** (`Option`): Specifies an output file; otherwise, stdout is used. 32 | //! - **plain_output** (`bool`): Generates plain domain listings (domains only, without IP addresses). 33 | //! - **quiet** (`bool`): Enables quiet mode, displaying only the number of domains. 34 | //! - **stats** (`Option`): Displays statistics about the domains. 35 | //! - **intersection_list** (`bool`): Outputs the intersection of `mainhosts` and `comparehosts`. 36 | //! - **rootdomains** (`bool`): Outputs a count of root domains. 37 | //! - **tld** (`bool`): Outputs a tally of top-level domains (TLDs). 38 | //! - **limit** (`usize`): Limits the number of TLD/root domain listings. Defaults to 30; `0` for unlimited. 39 | //! - **skipheaders** (`bool`): Omits file comment headers in the output. 40 | //! - **showduplicates** (`bool`): Lists duplicate domains when reporting a hosts list. 41 | //! - **showinvalids** (`bool`): Lists invalid domains when reporting a hosts list. 42 | //! - **sysclipboard** (`bool`): Uses system clipboard contents as the compare hosts. 43 | //! - **uniquelist** (`bool`): Outputs unique domain names. 44 | //! - **verbose** (`bool`): Enables verbose output for development or debugging. 45 | //! - **command** (`Option`): Specifies a subcommand to execute. 46 | //! - **skipcache** (`bool`): Prevents using cached data. 47 | //! 48 | //! ### Methods 49 | //! 50 | //! - **`Arguments::new`**: Generates a new `Arguments` instance with default or configured values. 51 | //! - **Returns**: `Arguments` 52 | //! 53 | //! ## `Commands` 54 | //! 55 | //! Enum representing subcommands for the application. 56 | //! 57 | //! ### Variants 58 | //! 59 | //! - **Build**: Builds hosts files with an optional formula. 60 | //! - **formula** (`Option`): Specifies the formula to use. 61 | //! - **Cache**: Manages application cache with subcommands for actions. 62 | //! - **cacheaction** (`Option`): A subcommand for cache-specific actions. 63 | //! - **Init**: Initializes cache and templates. 64 | //! - **Info**: Displays additional application information. 65 | //! 66 | //! # Functions 67 | //! 68 | //! ## `show_info` 69 | //! 70 | //! Prints detailed application information such as version, description, author, 71 | //! license, homepage, and repository, as well as specific information from various 72 | //! application modules. 73 | //! 74 | //! ### Parameters 75 | //! 76 | //! - `args` (`Arguments`): The parsed command-line arguments. 77 | //! 78 | //! ### Returns 79 | //! 80 | //! - `Result<(), Error>`: Outputs `Ok(())` on success or an error if an issue occurs. 81 | //! 82 | //! ## `main` 83 | //! 84 | //! The entry point of the application that processes command-line inputs, initializes 85 | //! configurations, and executes the specified command or default behavior. 86 | //! 87 | //! ### Asynchronous Execution 88 | //! 89 | //! - Leverages `async_std::main` for asynchronous execution of certain tasks. 90 | //! 91 | //! ### Returns 92 | //! 93 | //! - `Result<(), Error>`: Outputs `Ok(())` on success or an error if initialization or execution fails. 94 | //! 95 | //! ### Behavior Based on Commands 96 | //! 97 | //! - Executes the given `Commands` variant or defaults to core command execution if no subcommand is provided. 98 | //! 99 | //! ## `test_default_command_line_arguments` 100 | //! 101 | //! A unit test to validate the default behavior and configuration of the `Arguments` struct. 102 | //! 103 | //! ### Assertions 104 | //! 105 | //! - Verifies default parsing and initialization of critical fields in `Arguments`: 106 | //! - **`mainhosts`**: Fetched from the base key in shortcuts. 107 | //! - **`comparehosts`**, **`iplocalhost`**, **`tld`**, and **`stats`**. 108 | //! 109 | //! ### Test Outcome 110 | //! 111 | //! - Ensures expected defaults for arguments and initializes properly when no command-line arguments are provided. 112 | 113 | extern crate clap; 114 | use anyhow::Error; 115 | use clap::{Parser, Subcommand}; 116 | use config::get_shortcuts; 117 | 118 | mod cmd; 119 | mod config; 120 | mod types; 121 | mod utils; 122 | 123 | /// Command-line arguments structure. 124 | /// 125 | /// This struct is designed to parse and represent the input arguments. 126 | /// 127 | /// # Fields 128 | /// 129 | /// - `mainhosts`: 130 | /// The main hosts file acting as the basis for comparison. Defaults to "base". 131 | /// - Short Flag: `-m` 132 | /// - Long Flag: `--main` 133 | /// 134 | /// - `comparehosts`: 135 | /// The hosts file to compare against the `mainhosts`. 136 | /// - Short Flag: `-c` 137 | /// - Long Flag: `--compare` 138 | /// 139 | /// - `isolate`: 140 | /// A hosts file to isolate and compare against the `mainhosts`. This builds a temporary 141 | /// adjusted `mainhosts` without the isolated file, then compares the temporary adjusted 142 | /// `mainhosts` with the isolated file. 143 | /// - Long Flag: `--isolate` 144 | /// 145 | /// - `iplocalhost`: 146 | /// The IP address to associate with listed hosts. Defaults to `0.0.0.0`. 147 | /// - Long Flag: `--ip` 148 | /// 149 | /// - `adddefaults`: 150 | /// Flag to include default hosts at the top of the hosts lists, if enabled. 151 | /// - Short Flag: `-d` 152 | /// - Long Flag: `--default_hosts` 153 | /// 154 | /// - `domains_sort`: 155 | /// Flag to sort domains by order: domain, TLD, subdomains. 156 | /// - Short Flag: `-s` 157 | /// - Long Flag: `--sort` 158 | /// 159 | /// - `output`: 160 | /// Specifies an output file. By default, output is sent to `stdout`. 161 | /// - Short Flag: `-o` 162 | /// - Long Flag: `--output` 163 | /// 164 | /// - `plain_output`: 165 | /// Enables plain listing mode where only domains (without IP addresses) are listed. 166 | /// - Short Flag: `-p` 167 | /// - Long Flag: `--plain` 168 | /// 169 | /// - `quiet`: 170 | /// Enables quiet output mode, only showing the count of domains. 171 | /// - Short Flag: `-q` 172 | /// - Long Flag: `--quiet` 173 | /// 174 | /// - `stats`: 175 | /// Optional flag to display statistics about the domains. 176 | /// - Long Flag: `--stats` 177 | /// 178 | /// - `intersection_list`: 179 | /// Prints the intersection of `mainhosts` and `comparehosts`. 180 | /// - Short Flag: `-i` 181 | /// - Long Flag: `--intersection` 182 | /// 183 | /// - `rootdomains`: 184 | /// Lists root domains and their respective counts. 185 | /// - Short Flag: `-r` 186 | /// - Long Flag: `--rootdomains` 187 | /// 188 | /// - `tld`: 189 | /// Displays a tally of top-level domains (TLDs) in the list. 190 | /// - Short Flag: `-t` 191 | /// - Long Flag: `--tld` 192 | /// 193 | /// - `limit`: 194 | /// Sets a limit for listing TLDs and root domains, where `0` indicates no limit. Defaults to `30`. 195 | /// - Long Flag: `--limit` 196 | /// 197 | /// - `skipheaders`: 198 | /// Omits file comment headers in the output. 199 | /// - Long Flag: `--skipheaders` 200 | /// 201 | /// - `showduplicates`: 202 | /// Lists duplicate domains found in the hosts list. 203 | /// - Long Flag: `--showduplicates` 204 | /// 205 | /// - `showinvalids`: 206 | /// Lists invalid domains detected in the hosts list. 207 | /// - Long Flag: `--invalid` 208 | /// 209 | /// - `sysclipboard`: 210 | /// Uses the contents of the system clipboard as the `comparehosts` input. 211 | /// - Long Flag: `--clip` 212 | /// 213 | /// - `uniquelist`: 214 | /// Lists unique domain names in the hosts list. 215 | /// - Short Flag: `-u` 216 | /// - Long Flag: `--unique` 217 | /// 218 | /// - `verbose`: 219 | /// Enables verbose output, useful for debugging or detailed inspection. 220 | /// - Short Flag: `-v` 221 | /// - Long Flag: `--verbose` 222 | /// 223 | /// - `command`: 224 | /// Specifies an optional subcommand to execute. Refer to the `Commands` enum for available 225 | /// subcommand options. 226 | /// 227 | /// - `skipcache`: 228 | /// Disables caching to ensure fresh processing. 229 | /// - Long Flag: `--skipcache` 230 | #[derive(Debug, Default, Parser)] 231 | #[clap(author, version, about, long_about = None)] 232 | #[deny(missing_docs)] 233 | 234 | #[derive(Clone)] 235 | pub struct Arguments { 236 | #[clap( 237 | short, 238 | long = "main", 239 | default_value = "base", 240 | help = "The main hosts file, the basis for comparison" 241 | )] 242 | mainhosts: String, 243 | 244 | 245 | #[clap( 246 | short, 247 | long = "compare", 248 | help = "The hosts file to compare to mainhosts" 249 | )] 250 | comparehosts: Option, 251 | 252 | #[clap( 253 | long = "isolate", 254 | help = "The hosts list to isolate and compare to mainhosts" 255 | )] 256 | isolate: Option, 257 | 258 | #[clap( 259 | long = "ip", 260 | default_value = "0.0.0.0", 261 | help = "The ip address to use when listing hosts" 262 | )] 263 | iplocalhost: String, 264 | 265 | #[clap( 266 | short = 'd', 267 | long = "default_hosts", 268 | help = "Add default hosts for when listing hosts. The default hosts will be placed at the top of hosts lists" 269 | 270 | )] 271 | adddefaults: bool, 272 | 273 | #[clap( 274 | short = 's', 275 | long = "sort", 276 | help = "Sort the domains. The sort order is domain, tdl, subdomain1, subdomain2, etc" 277 | )] 278 | domains_sort: bool, 279 | 280 | #[clap( 281 | short, 282 | long, 283 | help = "The output file. By default, output is to std out" 284 | )] 285 | output: Option, 286 | 287 | #[clap( 288 | short = 'p', 289 | long = "plain", 290 | help = "Plain listing - domains only, without addresses, when listing domains" 291 | )] 292 | plain_output: bool, 293 | 294 | /// 295 | #[clap( 296 | short, 297 | long, 298 | help = "Quiet, terse output mode. Outputs the number of domains only" 299 | )] 300 | quiet: bool, 301 | 302 | #[clap(long, help = "Print statistics about the domains")] 303 | stats: Option, 304 | 305 | #[clap( 306 | short, 307 | long = "intersection", 308 | help = "Print the intersection of mainhosts and comparehosts" 309 | )] 310 | intersection_list: bool, 311 | 312 | #[clap( 313 | short, 314 | long, 315 | help = "List of root domains and their tally" 316 | )] 317 | rootdomains: bool, 318 | 319 | #[clap( 320 | short, 321 | long, 322 | help = "Print a tally of top level domains found in the list" 323 | )] 324 | tld: bool, 325 | 326 | #[clap( 327 | short, 328 | long, 329 | default_value = "30", 330 | help = "Limit for listing TLD and root domains, 0 = unlimited" 331 | )] 332 | limit: usize, 333 | 334 | #[clap(long, help = "Omit the file comment headers in output")] 335 | skipheaders: bool, 336 | 337 | #[clap(long, help = "List duplicates when reporting on a hosts list")] 338 | showduplicates: bool, 339 | 340 | #[clap(long = "invalid", help = "List invalid domains when reporting on a hosts list")] 341 | showinvalids: bool, 342 | 343 | #[clap(long = "clip", help = "Use the contents of the system clipboard as compare hosts")] 344 | sysclipboard: bool, 345 | 346 | #[clap(short, long = "unique", help = "List the unique domain names")] 347 | uniquelist: bool, 348 | 349 | #[clap(short, long = "verbose", help = "Verbose output, useful for development")] 350 | verbose: bool, 351 | 352 | #[clap(subcommand)] 353 | command: Option, 354 | 355 | #[clap(long = "skipcache", help = "Do not use cache")] 356 | skipcache: bool, 357 | } 358 | 359 | impl Arguments { 360 | pub fn new() -> Arguments { 361 | // Special code goes here ... 362 | let shortcuts = get_shortcuts(); 363 | let d = Arguments { 364 | mainhosts: shortcuts 365 | .get("base") 366 | .expect("The base key is not defined.") 367 | .to_owned(), 368 | iplocalhost: "0.0.0.0".to_string(), 369 | stats: Some(true), 370 | skipcache: false, 371 | ..Default::default() 372 | }; 373 | d 374 | } 375 | } 376 | 377 | #[derive(Clone, Debug, Subcommand)] 378 | pub enum Commands { 379 | /// Build hosts files 380 | Build { 381 | #[clap(short, long)] 382 | /// The formula to build 383 | formula: Option, 384 | }, 385 | /// Application cache initialize, prime, clear, or report. 386 | Cache { 387 | /// Cache subcommand 388 | #[clap(subcommand)] 389 | cacheaction: Option, 390 | }, 391 | /// Initialize cache and templates 392 | Init, 393 | /// Display additional information about the application 394 | Info, 395 | } 396 | 397 | /** 398 | * Displays detailed information about the current application, configuration, and related commands. 399 | * 400 | * This function provides an organized "info dump" that includes metadata about the application (such as version, 401 | * description, author, license, homepage, and repository), as well as additional information from various components. 402 | * 403 | * # Arguments 404 | * 405 | * * `args` - A structure of type `Arguments` that holds any necessary command-line arguments or context. 406 | * 407 | * # Returns 408 | * 409 | * * `Ok(())` if the operation completes successfully. 410 | * * `Err(Error)` if there is an issue during execution. 411 | * 412 | * # Metadata Displayed 413 | * 414 | * * Application version (from `CARGO_PKG_VERSION`). 415 | * * Description of the application (from `CARGO_PKG_DESCRIPTION`). 416 | * * Author(s) (from `CARGO_PKG_AUTHORS`). 417 | * * License information (from `CARGO_PKG_LICENSE`). 418 | * * Homepage URL (from `CARGO_PKG_HOMEPAGE`). 419 | * * Repository URL (from `CARGO_PKG_REPOSITORY`). 420 | * 421 | * # Side Effects 422 | * 423 | * * Outputs formatted information to the standard output (`stdout`). 424 | * * Delegates additional info retrieval and display to the following modules: 425 | * - `config` (via `config::info`). 426 | * - `cmd::cache` (via `cmd::cache::info`). 427 | * - `cmd::core` (via `cmd::core::info`). 428 | * 429 | * # Formatting 430 | * 431 | * The output includes a centered title ("info dump") bounded by dashes, followed by sections of metadata 432 | * and component-specific information, all separated by blank lines for better readability. 433 | * 434 | * # Example 435 | * 436 | * ``` 437 | * use your_crate::Arguments; 438 | * 439 | * let args = Arguments::new(); // Example initialization of `Arguments`. 440 | * if let Err(e) = show_info(args) { 441 | * eprintln!("Error displaying information: {}", e); 442 | * } 443 | * ``` 444 | */ 445 | fn show_info(args:Arguments) -> Result<(), Error> { 446 | println!(); 447 | println!("{}",format!("{:-^1$}", " info dump ", 40)); 448 | println!("rh version: {}", env!("CARGO_PKG_VERSION")); 449 | println!("Description: {}", env!("CARGO_PKG_DESCRIPTION")); 450 | println!("Author: {}", env!("CARGO_PKG_AUTHORS")); 451 | println!("License: {}", env!("CARGO_PKG_LICENSE")); 452 | println!(); 453 | println!("Homepage: {}", env!("CARGO_PKG_HOMEPAGE")); 454 | println!("Repository: {}", env!("CARGO_PKG_REPOSITORY")); 455 | println!(); 456 | _ = config::info(args.clone()); 457 | println!(); 458 | _ = cmd::cache::info(args.clone()); 459 | println!(); 460 | _ = cmd::core::info(args.clone()); 461 | println!(); 462 | println!("{}",format!("{:-^1$}", "", 40)); 463 | println!(); 464 | 465 | Ok(()) 466 | } 467 | 468 | #[async_std::main] 469 | async fn main() -> Result<(), Error> { 470 | let args = Arguments::parse(); 471 | config::init(args.clone())?; 472 | cmd::cache::init(args.clone()).await?; 473 | 474 | // Check which subcomamnd the user specified, if any... 475 | let res = match &args.command { 476 | None => cmd::core::execute(args), 477 | Some(Commands::Init) => cmd::init::execute(args).await, 478 | Some(Commands::Build { formula: _ }) => cmd::build::execute(args).await, 479 | Some(Commands::Cache { cacheaction: _ }) => cmd::cache::execute(args).await, 480 | Some(Commands::Info) => {show_info(args)}, 481 | }; 482 | 483 | if let Err(e) = res { 484 | println!("Error {:?}", e); 485 | std::process::exit(101); 486 | } 487 | Ok(()) 488 | } 489 | 490 | 491 | #[test] 492 | fn test_default_command_line_arguments() { 493 | let arguments = Arguments::new(); 494 | assert_eq!( 495 | arguments.mainhosts, 496 | get_shortcuts() 497 | .get("base") 498 | .expect("The base key does not exist") 499 | .to_owned(), 500 | "Expected mainhosts to be fetched by the base key" 501 | ); 502 | assert_eq!( 503 | arguments.comparehosts 504 | , None, 505 | "Expected the comparehosts argument to be None" 506 | ); 507 | assert_eq!( 508 | arguments.iplocalhost 509 | , "0.0.0.0".to_string(), 510 | "Expected the iplocalhost argument to be 0.0.0.0" 511 | ); 512 | assert_eq!( 513 | arguments.tld 514 | , false, 515 | "Expected the tld argument to be false" 516 | ); 517 | assert_eq!( 518 | arguments.stats, Some(true), 519 | "Expected the stats argument to be Some(true)" 520 | ); 521 | } -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use anyhow; 2 | use indexmap::IndexSet; 3 | use std::{ 4 | collections::HashMap, 5 | fmt, 6 | fmt::Display, 7 | fs::File, 8 | io::{prelude::*, BufReader}, path::Path, 9 | }; 10 | // See also [Rust: Domain Name Validation](https://bas-man.dev/post/rust/domain-name-validation/) 11 | use crate::{ 12 | cmd::cache, config::get_shortcuts 13 | }; 14 | use crate::utils::{is_domain, norm_string, trim_inline_comments}; 15 | use crate::Arguments; 16 | use futures::executor::block_on; 17 | use num_format::{Locale, ToFormattedString}; 18 | use std::cmp::Ordering; 19 | 20 | pub type Domain = String; 21 | pub type Domains = IndexSet; 22 | pub type Tag = String; 23 | pub type Tags = Vec; 24 | // pub type IPaddress = String; 25 | 26 | 27 | #[derive(Debug, Default, Clone)] 28 | pub struct Host { 29 | // ip_address: IPaddress, 30 | // domain: Domain, 31 | } 32 | 33 | // Source: https://users.rust-lang.org/t/structs-with-similar-fields/99065/4 34 | // Source: https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=e39ad82c6bfa82742428a10ee629c631 35 | macro_rules! with_hosts_collection_shared_fields_and_impl { 36 | ($(#[$attr:meta])* struct $name:ident { $($field_name:ident: $field_type:tt,)*} ) => { 37 | $(#[$attr])* 38 | pub struct $name { 39 | pub name: String, 40 | pub location: String, 41 | pub raw_list: Vec, 42 | pub front_matter: Vec, 43 | pub domains: Domains, 44 | pub duplicates: Domains, 45 | pub invalids: Domains, 46 | pub args: Arguments, 47 | $(pub $field_name: $field_type,)* 48 | } 49 | 50 | impl fmt::Display for $name { 51 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 52 | if self.args.quiet { 53 | writeln!(f, "{}", self.domains.len()) 54 | } else { 55 | writeln!( 56 | f, 57 | // "Domain report for: {}\n{}\nDomains: {}\nDuplicate domains: {}\nInvalid domains: {}", 58 | "Domain report for: {}\n{}\nDomains: {}", 59 | self.location, 60 | format!("🔎: {}",self.location.replace("https://raw.githubusercontent.com/","")), 61 | self.domains.len().to_formatted_string(&Locale::en), 62 | // self.duplicates.len().to_formatted_string(&Locale::en), 63 | // self.invalids.len().to_formatted_string(&Locale::en) 64 | )?; 65 | if self.args.showduplicates && self.duplicates.len() > 0 { 66 | writeln!(f, "Duplicates list:")?; 67 | for dup in &self.duplicates { 68 | writeln!(f, "{}", dup)?; 69 | } 70 | } 71 | if self.args.showinvalids && self.invalids.len() > 0 { 72 | writeln!(f, "Invalids list:")?; 73 | for invalid in &self.invalids { 74 | writeln!(f, "{}", invalid)?; 75 | } 76 | } 77 | 78 | if self.args.tld && self.args.rootdomains { 79 | // lay them up side by side 80 | writeln!(f, "Top {} TLD and root domains:", self.args.limit)?; 81 | let tld = self.tld(); 82 | let rootdomains = self.rootdomains(); 83 | let left_pad = tld.iter().map(|(tld, count)| format!("{:>10}: {:>7}", tld, count.to_formatted_string(&Locale::en)).len()).max().unwrap_or(0); 84 | for ((tld, tld_count), (root, root_count)) in tld.iter().zip(rootdomains.iter()) { 85 | writeln!(f, "{:width$} {}: {}", format!("{:>10}: {:>7} ", format!(".{}", tld), tld_count.to_formatted_string(&Locale::en)), root, root_count.to_formatted_string(&Locale::en), width = left_pad)?; 86 | } 87 | } else { 88 | if self.args.tld { 89 | writeln!(f, "TLD:")?; 90 | let tlds = self.tld(); 91 | for tld in tlds { 92 | writeln!(f, " {:>10}: {:>7}", format!(".{}", tld.0), tld.1.to_formatted_string(&Locale::en))?; 93 | } 94 | } 95 | if self.args.rootdomains { 96 | writeln!(f, "Root domains:")?; 97 | let rootdomains = self.rootdomains(); 98 | for rd in rootdomains { 99 | writeln!(f, " {}: {}", rd.0, rd.1.to_formatted_string(&Locale::en))?; 100 | } 101 | } 102 | } 103 | 104 | Ok(()) 105 | } 106 | } 107 | } 108 | 109 | impl $name { 110 | pub fn tld(&self) -> Vec<(Domain, u32)> { 111 | // Step 1: Extract TLDs and count occurrences 112 | let mut count: HashMap = HashMap::new(); 113 | for domain in &self.domains { 114 | // Split the domain by '.' and get the last part 115 | if let Some(tld) = domain.rsplit('.').next() { 116 | *count.entry(tld.to_lowercase()).or_insert(0) += 1; 117 | } 118 | } 119 | 120 | // Step 2: Sort the counts in descending order 121 | let mut count_vec: Vec<_> = count.into_iter().collect(); 122 | count_vec.sort_by(|a, b| if a.1 == b.1 { 123 | a.0.cmp(&b.0) 124 | } else { 125 | b.1.cmp(&a.1) 126 | }); 127 | if self.args.limit > 0 { 128 | if count_vec.len() > self.args.limit { 129 | count_vec.truncate(self.args.limit) 130 | } 131 | } 132 | count_vec 133 | } 134 | 135 | pub fn rootdomains(&self) -> Vec<(Domain, u32)> { 136 | // Step 1: Extract TLDs and count occurrences 137 | let mut count: HashMap = HashMap::new(); 138 | for domain in &self.domains { 139 | // Split the domain by '.' and get the last two parts 140 | let parts: Vec<&str> = domain.split('.').collect(); 141 | if parts.len() >= 2 { 142 | // Join the last two segments to form the root domain 143 | let rootdomain = format!("{}.{}", parts[parts.len() - 2], parts[parts.len() - 1]); 144 | *count.entry(rootdomain.to_lowercase()).or_insert(0) += 1; 145 | } 146 | } 147 | 148 | // Step 2: Sort the counts in descending order 149 | let mut count_vec: Vec<_> = count.into_iter().collect(); 150 | count_vec.sort_by(|a, b| if a.1 == b.1 { 151 | a.0.cmp(&b.0) 152 | } else { 153 | b.1.cmp(&a.1) 154 | }); 155 | if self.args.limit > 0 { 156 | if count_vec.len() > self.args.limit { 157 | count_vec.truncate(self.args.limit) 158 | } 159 | } 160 | count_vec 161 | } 162 | 163 | pub fn sorteddomains(&self) -> Vec { 164 | // Function to parse a domain into components: (subdomain, root domain, TLD) 165 | fn parse_domain(domain: &str) -> Vec { 166 | let parts: Vec<&str> = domain.split('.').collect(); 167 | let tld = parts.last().unwrap().to_string(); // Get TLD 168 | let root = parts.get(parts.len() - 2).unwrap_or(&"").to_string(); // Get root domain 169 | let subdomain = parts[..parts.len() - 2].join("."); // Join remaining parts as subdomain 170 | 171 | let mut r = vec![subdomain, root, tld]; 172 | // If there are no subdomains, push an empty string 173 | if r[0].is_empty() { 174 | r[0] = "".to_string(); 175 | } 176 | r 177 | } 178 | 179 | let mut v: Vec = self.domains.clone().into_iter().collect(); 180 | v.sort_by(|a, b| { 181 | let a_parts = parse_domain(a); 182 | let b_parts = parse_domain(b); 183 | 184 | // Compare by root domain and TLD first 185 | match a_parts[1].cmp(&b_parts[1]) { 186 | Ordering::Equal => { 187 | // Then compare by first-level subdomain 188 | match a_parts[0].cmp(&b_parts[0]) { 189 | Ordering::Equal => { 190 | // Finally, compare remaining subdomains 191 | a_parts[2..].cmp(&b_parts[2..]) 192 | } 193 | other => other, 194 | } 195 | } 196 | other => other, 197 | } 198 | }); 199 | v 200 | } 201 | } 202 | 203 | impl Comparable for $name { 204 | fn get_domains(&self) -> &IndexSet { 205 | &self.domains 206 | } 207 | fn get_args(&self) -> &Arguments { 208 | &self.args 209 | } 210 | } 211 | } 212 | } 213 | 214 | with_hosts_collection_shared_fields_and_impl!( 215 | #[derive(Debug, Default, Clone)] 216 | struct Hostssource {} 217 | ); 218 | 219 | 220 | pub trait Comparable: Display + Send + Sync { 221 | fn get_domains(&self) -> &IndexSet; 222 | fn get_args(&self) -> &Arguments; 223 | 224 | fn compare(&self, thing: Box) { 225 | println!("{}", self); 226 | println!("{}", thing); 227 | if self.get_args().intersection_list { 228 | _ = self.intersection(thing); 229 | } 230 | } 231 | 232 | /// Tally the intersection of two domain lists 233 | fn intersection(&self, comp: Box) -> () { 234 | let first = self.get_domains().len(); 235 | let second = comp.get_domains().len(); 236 | let mut combined = self.get_domains().clone(); 237 | for domain in comp.get_domains().clone() { 238 | combined.insert(domain); 239 | } 240 | println!("Intersection: {} domains", (first + second - combined.len()).to_formatted_string(&Locale::en)); 241 | () 242 | } 243 | } 244 | 245 | pub type Hostssources = Vec; 246 | 247 | impl Hostssource { 248 | pub async fn new(location: impl Into, name: impl Into) -> Hostssource { 249 | let mut hs = Hostssource { 250 | name: name.into(), 251 | ..Default::default() 252 | }; 253 | // Ignore the result for now. 254 | _ = hs.load(&location.into()).await; 255 | hs 256 | } 257 | 258 | pub async fn load(&mut self, src: &str) -> anyhow::Result<()> { 259 | let mut actualsrc = src; 260 | // check if src is a shortcut 261 | let shortcuts = get_shortcuts(); 262 | let shortcut = shortcuts.get(src); 263 | if shortcut.is_some() { 264 | self.location = shortcut.unwrap().to_string(); 265 | actualsrc = self.location.as_str(); 266 | } else { 267 | self.location = actualsrc.to_string(); 268 | } 269 | 270 | let normalizedsrc = actualsrc.to_lowercase(); 271 | 272 | if actualsrc.contains('\n') { 273 | // if it's a list of domains 274 | self.raw_list = actualsrc 275 | .trim() 276 | .split('\n') 277 | .map(|l| l.trim().to_string()) 278 | .collect::>(); 279 | self.location = "text input".to_string(); 280 | } else if normalizedsrc.starts_with("http") { 281 | // if it's a URL 282 | // check the cache 283 | let cache_file = cache::get(normalizedsrc.clone()).await; 284 | if !self.args.skipcache && cache_file.is_some() { 285 | // read the cache 286 | if self.args.verbose { 287 | println!("==> Loading from cache: {}", src); 288 | } 289 | let file = File::open(cache_file.unwrap()).expect(&format!("File does not exist: {}", actualsrc)); 290 | let buf = BufReader::new(file); 291 | self.raw_list = buf 292 | .lines() 293 | .map(|l| l.expect("Could not parse line")) 294 | .collect(); 295 | } else { 296 | // if no cache 297 | if self.args.verbose { 298 | println!("==> Loading over HTTP(S): {}", src); 299 | } 300 | let resp = reqwest::blocking::get(actualsrc).expect("request failed"); 301 | let body = resp.text().expect("body invalid"); 302 | self.raw_list = body.clone().lines().map(|l| l.to_string()).collect(); 303 | // submit to cache 304 | _ = cache::set(normalizedsrc.clone(), body); 305 | } 306 | } else if Path::new(actualsrc).exists(){ 307 | // if it's a file 308 | let file = File::open(actualsrc).expect(&format!("Problem opening file: {}", actualsrc)); 309 | let buf = BufReader::new(file); 310 | self.raw_list = buf 311 | .lines() 312 | .map(|l| l.expect("Could not parse line")) 313 | .collect(); 314 | } else { 315 | // To Do: bomb out more gracefully 316 | panic!("Shortcut, URL, or File \"{}\" does not exist.", actualsrc); 317 | } 318 | self.normalize(); 319 | 320 | return Ok(()); 321 | } 322 | 323 | fn normalize(&mut self) { 324 | self.trimlines(); 325 | self.removeblanklines(); 326 | self.frontmatter(); 327 | self.removecommentlines(); 328 | self.extract_domains(); 329 | if self.args.domains_sort { 330 | let sorted = self.sorteddomains(); 331 | self.domains.drain(..); 332 | self.domains = sorted.into_iter().collect(); 333 | } 334 | } 335 | 336 | fn trimlines(&mut self) { 337 | let mut lines: Vec = self.raw_list.clone(); 338 | 339 | lines.iter_mut().for_each(|line| { 340 | *line = norm_string(line.as_str()); 341 | *line = trim_inline_comments(line.to_owned()); 342 | if line.chars().count() > 0 && !self.domains.insert(line.to_owned()) { 343 | self.duplicates.insert(line.to_owned()); 344 | }; 345 | }); 346 | } 347 | 348 | fn extract_domains(&mut self) { 349 | let mut domains_result: Domains = IndexSet::new(); 350 | // Domain aliases which are often found in hosts files which we do not want 351 | // to flag as formally invalid. 352 | let headertokens = vec![ 353 | "::1", 354 | "broadcasthost", 355 | "fe80::1%lo0", 356 | "ff00::0", 357 | "ff02::1", 358 | "ff02::2", 359 | "ff02::3", 360 | "ip6-allhosts", 361 | "ip6-allnodes", 362 | "ip6-allrouters", 363 | "ip6-localhost", 364 | "ip6-localnet", 365 | "ip6-loopback", 366 | "ip6-mcastprefix", 367 | "local", 368 | "localhost", 369 | "localhost.localdomain" 370 | ]; 371 | 372 | for line in &self.domains { 373 | for element in line.split_whitespace() { 374 | if element != "0.0.0.0" 375 | && element != "127.0.0.1" 376 | && element != "255.255.255.255" 377 | && !headertokens.contains(&element) { 378 | if is_domain(element) { 379 | let unique = domains_result.insert(element.to_string()); 380 | if !unique { 381 | self.duplicates.insert(element.to_string()); 382 | } 383 | } else { 384 | self.invalids.insert(element.to_string()); 385 | } 386 | } 387 | } 388 | } 389 | self.domains = domains_result; 390 | } 391 | 392 | fn removeblanklines(&mut self) { 393 | self.domains.retain(|line| !line.is_empty()); 394 | } 395 | 396 | fn frontmatter(&mut self) { 397 | for line in &self.raw_list { 398 | if line.starts_with('#') { 399 | self.front_matter.push(line.to_string()); 400 | } else { 401 | break; 402 | } 403 | } 404 | } 405 | 406 | fn removecommentlines(&mut self) { 407 | self.domains.retain(|line| !line.starts_with('#')); 408 | } 409 | } 410 | 411 | with_hosts_collection_shared_fields_and_impl!( 412 | #[derive(Default)] 413 | struct Amalgam { 414 | sources: Hostssources, 415 | } 416 | ); 417 | 418 | impl Amalgam { 419 | #[allow(dead_code)] 420 | pub async fn new(locations: Vec + Clone>) -> Amalgam { 421 | let mut amalgam: Amalgam = Amalgam { 422 | sources: Hostssources::new(), 423 | front_matter: vec![], 424 | domains: Domains::new(), 425 | ..Default::default() 426 | }; 427 | for l in locations { 428 | let mut s = block_on( 429 | Hostssource::new( 430 | l.clone().into(), 431 | l.into(), 432 | ) 433 | ); 434 | amalgam.front_matter.append(&mut s.front_matter); 435 | for domain in s.domains.clone() { 436 | amalgam.domains.insert(domain); 437 | } 438 | amalgam.raw_list.append(&mut s.raw_list.clone()); 439 | amalgam.sources.push(s); 440 | } 441 | amalgam 442 | } 443 | } 444 | 445 | #[async_std::test] 446 | async fn test_create_amalgam_with_lists_has_domains() { 447 | use thousands::Separable; 448 | let a = 449 | Amalgam::new( 450 | vec![ 451 | "stevenblack", 452 | "mvps", 453 | "yoyo", 454 | "someonewhocares", 455 | ] 456 | ).await 457 | ; 458 | let mut tally: usize = 0; 459 | for s in a.sources { 460 | tally += s.domains.len(); 461 | println!("Source {}: {} domains", s.name, s.domains.len().separate_with_commas()); 462 | } 463 | println!("Total: {} domains in all, {} domains net", tally.separate_with_commas(), a.domains.len().separate_with_commas()); 464 | assert!( 465 | tally >= a.domains.len(), 466 | "Expected total domains to be greater than or equal to net domains" 467 | ); 468 | } 469 | 470 | #[async_std::test] 471 | async fn test_create_amalgam_with_duplicate_lists_does_not_double_count_domains() { 472 | let a = 473 | Amalgam::new( 474 | vec![ 475 | "stevenblack", 476 | ]).await; 477 | let b = 478 | Amalgam::new( 479 | vec![ 480 | "stevenblack", 481 | "stevenblack", 482 | ]).await; 483 | assert!( 484 | a.domains.len() == b.domains.len(), 485 | "Expected resultant number of domainsto be equal" 486 | ); 487 | } 488 | 489 | #[async_std::test] 490 | async fn test_create_amalgam_with_shortcuts_has_domains() { 491 | use thousands::Separable; 492 | let a = 493 | Amalgam::new( 494 | vec![ 495 | "base", 496 | "p", 497 | "p-only", 498 | "g", 499 | "g-only", 500 | "fgps", 501 | ] 502 | ).await 503 | ; 504 | let mut tally: usize = 0; 505 | for s in a.sources { 506 | tally += s.domains.len(); 507 | println!("Source {}: {} domains", s.name, s.domains.len().separate_with_commas()); 508 | } 509 | println!("Total: {} domains in all, {} domains net", tally.separate_with_commas(), a.domains.len().separate_with_commas()); 510 | assert!( 511 | tally >= a.domains.len(), 512 | "Expected total domains to be greater or equal to net domains" 513 | ); 514 | } 515 | 516 | #[cfg(test)] 517 | mod tests { 518 | use super::*; 519 | use async_task_group::group; 520 | use futures::executor::block_on; 521 | 522 | #[async_std::test] 523 | async fn no_task() { 524 | let handle = group(|group| async move { Ok::<_, ()>(group) }); 525 | assert!( 526 | handle.await.is_ok(), 527 | "Expectred task group to be ok" 528 | ); 529 | } 530 | 531 | #[async_std::test] 532 | async fn one_empty_task() { 533 | let handle = group(|group| async move { 534 | group.spawn(async move { Ok(()) }); 535 | Ok::<_, ()>(group) 536 | }); 537 | assert!( 538 | handle.await.is_ok(), 539 | "Expected task group to be ok" 540 | ); 541 | } 542 | 543 | // ToDo: skip this test if the folder and file do not exist 544 | #[test] 545 | fn test_hostssource_load_from_file_has_domains() { 546 | let mut s = Hostssource { 547 | ..Default::default() 548 | }; 549 | // ignore the result of this load for now 550 | _ = block_on(s.load("/Users/Steve/Dropbox/dev/hosts/hosts")); 551 | assert_eq!( 552 | s.location, "/Users/Steve/Dropbox/dev/hosts/hosts", 553 | "Loading from local file, expected location to be /Users/Steve/Dropbox/dev/hosts/hosts" 554 | ); 555 | assert!( 556 | s.front_matter.len() > 0, 557 | "Loading from local file, expected front matter length to be greater than 0" 558 | ); 559 | assert!( 560 | s.raw_list.len() > 1_000, 561 | "Loading from local file, expected raw list length to be greater than 1,000" 562 | ); 563 | assert!( 564 | s.domains.len() > 1_000, 565 | "Loading from local file, expected number of domains to be greater than 1,000" 566 | ); 567 | } 568 | 569 | // ToDo: skip this test if the folder and file do not exist 570 | #[test] 571 | fn test_hostssource_new_from_file_has_domains() { 572 | let s = block_on( 573 | Hostssource::new( 574 | "/Users/Steve/Dropbox/dev/hosts/hosts", 575 | "arbitrary name", 576 | ) 577 | ); 578 | assert_eq!( 579 | s.location, 580 | "/Users/Steve/Dropbox/dev/hosts/hosts", 581 | "Loading from local file, expected location to be /Users/Steve/Dropbox/dev/hosts/hosts"); 582 | assert!( 583 | s.front_matter.len() > 0, 584 | "Loading from local file, expected front matter length to be greater than 0" 585 | ); 586 | assert!( 587 | s.raw_list.len() > 1_000, 588 | "Loading from local file, expected raw list length to be greater than 1,000" 589 | ); 590 | assert!( 591 | s.domains.len() > 1_000, 592 | "Loading from local file, expected number of domains to be greater than 1,000" 593 | ); 594 | assert!( 595 | s.raw_list.len() > 1_000, 596 | "Loading from local file, expected raw list length to be greater than 1,000" 597 | ); 598 | assert!( 599 | s.domains.len() > 1_000, 600 | "Loading from local file, expected number of domains to be greater than 1,000" 601 | ); 602 | } 603 | 604 | #[test] 605 | fn test_hostssource_load_from_github_has_domains() { 606 | let mut s = Hostssource { 607 | ..Default::default() 608 | }; 609 | let url = "https://raw.githubusercontent.com/StevenBlack/hosts/f5d5efab/data/URLHaus/hosts"; 610 | // ignore the result of this load for now 611 | _ = block_on(s.load(&url)); 612 | assert_eq!( 613 | s.location, 614 | url.to_string(), 615 | "Loading from GitHub, expected location to and URL to be the same" 616 | ); 617 | assert!( 618 | s.front_matter.len() > 4, 619 | "Loading from GitHub, expected front matter length to be greater than 4" 620 | ); 621 | assert!( 622 | s.raw_list.len() > 1000, 623 | "Loading from GitHub, expected raw list length to be greater than 1,000" 624 | ); 625 | assert!( 626 | s.domains.len() > 1000, 627 | "Loading from GitHub, expected number of domains to be greater than 1,000" 628 | ); 629 | } 630 | 631 | #[test] 632 | fn test_hostssource_load_big_file_from_github_has_domains() { 633 | let mut s = Hostssource { 634 | ..Default::default() 635 | }; 636 | let url = "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts"; 637 | // ignore the result of this load for now 638 | _ = block_on(s.load(&url)); 639 | assert_eq!( 640 | s.location, url.to_string(), 641 | "Loading a big file from GitHub, expected location to and URL to be the same" 642 | ); 643 | assert!( 644 | s.front_matter.len() > 4, 645 | "Loading a big file from GitHub, expected front matter length to be greater than 4" 646 | ); 647 | assert!( 648 | s.raw_list.len() > 1_000, 649 | "Loading a big file from GitHub, expected raw list length to be greater than 1,000" 650 | ); 651 | assert!( 652 | s.domains.len() > 1_000, 653 | "Loading a big file from GitHub, expected number of domains to be greater than 1,000" 654 | ); 655 | } 656 | 657 | #[test] 658 | fn test_hostssource_load_from_shortcut_has_domains() { 659 | let mut s = Hostssource { 660 | ..Default::default() 661 | }; 662 | // ignore the result of this load for now 663 | _ = block_on(s.load("base")); 664 | assert_eq!( 665 | s.location, 666 | "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts", 667 | "When using a shortcut, expected location to and URL to be the same" 668 | ); 669 | assert!( 670 | s.front_matter.len() > 0, 671 | "When using a shortcut, expected front matter length to be greater than 0" 672 | ); 673 | assert!( 674 | s.raw_list.len() > 1_000, 675 | "When using a shortcut, expected raw list length to be greater than 1,000" 676 | ); 677 | assert!( 678 | s.domains.len() > 1_000, 679 | "When using a shortcut, expected number of domains to be greater than 1,000" 680 | ); 681 | } 682 | 683 | #[test] 684 | fn test_hostssource_load_from_multi_line_text_has_domains() { 685 | let mut s = Hostssource { 686 | ..Default::default() 687 | }; 688 | // ignore the result of this load for now 689 | _ = block_on(s.load( 690 | r##" 691 | # test 692 | # test 2 693 | 0.0.0.0 example.com 694 | # this is a comment 695 | 0.0.0.0 www.example.com 696 | "##, 697 | )); 698 | assert!( 699 | s.front_matter.len() == 2, 700 | "Expected front matter length to be 2, but got: {}", 701 | s.front_matter.len() 702 | ); 703 | assert!( 704 | s.raw_list.len() == 5, 705 | "Expected raw list length to be 5, but got: {}", 706 | s.raw_list.len() 707 | ); 708 | assert!( 709 | s.domains.len() == 2, 710 | "Expected the number of domains to be 2, but got: {}", 711 | s.domains.len() 712 | ); 713 | assert!( 714 | s.duplicates.len() == 0, 715 | "Expected the number of duplicates to be 0, but got: {}", 716 | s.duplicates.len() 717 | ); 718 | assert!( 719 | s.invalids.len() == 0, 720 | "Expected the number of invalids to be 0, but got: {}", 721 | s.invalids.len() 722 | ); 723 | } 724 | 725 | #[test] 726 | fn test_hostssource_load_from_single_line_text_has_domains() { 727 | let mut s = Hostssource { 728 | ..Default::default() 729 | }; 730 | // ignore the result of this load for now 731 | _ = block_on(s.load( 732 | r##" 733 | 0.0.0.0 example.com 734 | "##, 735 | )); 736 | assert!( 737 | s.front_matter.len() == 0 738 | , "Expected front matter to be 0, but got: {}", 739 | s.front_matter.len() 740 | ); 741 | assert!( 742 | s.raw_list.len() == 1, 743 | "Expected raw list length to be 1, but got: {}", 744 | s.raw_list.len() 745 | ); 746 | assert!( 747 | s.domains.len() == 1, 748 | "Expected the number of domains to be 1, but got: {}", 749 | s.domains.len() 750 | ); 751 | assert!( 752 | s.duplicates.len() == 0, 753 | "Expected the number of duplicates to be 0, but got: {}", 754 | s.duplicates.len() 755 | ); 756 | assert!( 757 | s.invalids.len() == 0, 758 | "Expected the number of invalids to be 0, but got: {}", 759 | s.invalids.len() 760 | ); 761 | } 762 | 763 | #[test] 764 | fn test_hostssource_load_from_multi_line_text_with_duplicates_has_no_duplicate_domains() { 765 | let mut s = Hostssource { 766 | ..Default::default() 767 | }; 768 | // ignore the result of this load for now 769 | _ = block_on(s.load( 770 | r##" 771 | # test 772 | # test 2 773 | 0.0.0.0 example.com 774 | 0.0.0.0 www.example.com 775 | 0.0.0.0 example.com 776 | "##, 777 | )); 778 | assert!( 779 | s.front_matter.len() == 2, 780 | "Expected front matter to be 2, but got: {}", 781 | s.front_matter.len() 782 | ); 783 | assert!( 784 | s.raw_list.len() == 5, 785 | "Expected raw list length to be 5, but got: {}", 786 | s.raw_list.len() 787 | ); 788 | assert!( 789 | s.domains.len() == 2, 790 | "Expected the number of domains to be 2, but got: {}", 791 | s.domains.len() 792 | ); 793 | assert!( 794 | s.duplicates.len() == 1, 795 | "Expected the number of duplicates to be 1, but got: {}", 796 | s.duplicates.len() 797 | ); 798 | } 799 | 800 | #[test] 801 | fn test_hostssource_load_from_multi_line_text_with_multiple_domains_per_line_produces_normalized_list_of_domains() { 802 | let mut s = Hostssource { 803 | ..Default::default() 804 | }; 805 | // ignore the result of this load for now 806 | _ = block_on(s.load( 807 | r##" 808 | # comment line 809 | # comment line 2 810 | 0.0.0.0 example.com 811 | 0.0.0.0 www.example.com 812 | 127.0.0.1 example.org www.example.org 813 | 127.0.0.1 something.org 814 | # some other comment 815 | 127.0.0.1 something.else.org 816 | "##, 817 | )); 818 | assert!( 819 | s.domains.len() == 6, 820 | "Expected domains to be 6, but got: {}", 821 | s.domains.len() 822 | ); 823 | 824 | let expected_domains: IndexSet = IndexSet::from([ 825 | "example.com".to_string(), 826 | "www.example.com".to_string(), 827 | "example.org".to_string(), 828 | "www.example.org".to_string(), 829 | "something.org".to_string(), 830 | "something.else.org".to_string(), 831 | ]); 832 | assert!( 833 | s.domains == expected_domains, 834 | "Expected domains to be identical, but got: {:?} expected: {:?}", 835 | s.domains, 836 | expected_domains); 837 | } 838 | 839 | #[test] 840 | fn test_hostssource_load_from_multi_line_text_with_three_domains_per_line_produces_normalized_list_of_domains() { 841 | let mut s = Hostssource { 842 | ..Default::default() 843 | }; 844 | // ignore the result of this load for now 845 | _ = block_on(s.load( 846 | r##" 847 | # comment line 848 | # comment line 2 849 | 0.0.0.0 example.com www.example.com example.org 850 | # some other comment 851 | "##, 852 | )); 853 | assert!(s.domains.len() == 3); 854 | 855 | let expected_domains: IndexSet = IndexSet::from([ 856 | "example.com".to_string(), 857 | "www.example.com".to_string(), 858 | "example.org".to_string(), 859 | ]); 860 | assert!( 861 | s.domains == expected_domains, 862 | "Expected domains to be identical, but got: {:?} expected: {:?}", 863 | s.domains, 864 | expected_domains 865 | ); 866 | } 867 | 868 | #[test] 869 | fn test_domains_type_inserting_duplicates_does_not_produce_duplicate_domains() { 870 | let mut d = Domains::new(); 871 | d.insert("foo.com".to_string()); 872 | d.insert("foo.com".to_string()); 873 | d.insert("bar.com".to_string()); 874 | assert!(d.len() == 2); 875 | let mut d2 = Domains::new(); 876 | d2.insert("foo.com".to_string()); 877 | d2.insert("foo.com".to_string()); 878 | d2.insert("bar.com".to_string()); 879 | for domain in d2 { 880 | d.insert(domain); 881 | } 882 | assert!( 883 | d.len() == 2, 884 | "Expected domain set to be 2, but got {}", 885 | d.len() 886 | ); 887 | } 888 | } 889 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | /// Utilities and functions 3 | /// 4 | use addr::parser::DnsName; 5 | use psl::List; 6 | use std::hash::{Hash, Hasher}; 7 | use std::collections::hash_map::DefaultHasher; 8 | 9 | 10 | pub fn is_domain(s: &str) -> bool { 11 | // parse_dns_name(s).is_ok() 12 | if !s.contains('.') { 13 | return false; 14 | } 15 | List.parse_dns_name(s).is_ok() 16 | } 17 | 18 | #[test] 19 | fn test_is_domaain_function_handles_good_and_bad_domains() { 20 | assert_eq!(is_domain("localhost"), false); 21 | assert_eq!(is_domain("com"), false); 22 | assert_eq!(is_domain("github"), false); 23 | assert_eq!(is_domain("github.com"), true); 24 | // assert_eq!(is_domain("github-.com"), false); 25 | // assert_eq!(is_domain("-github.com"), false); 26 | assert_eq!(is_domain("www.github.com"), true); 27 | assert_eq!(is_domain("123.com"), true); 28 | } 29 | 30 | #[test] 31 | fn test_is_domaain_function_handles_labels_that_are_too_long() { 32 | 33 | // Each element of a domain name separated by [.] is called a “label.” 34 | // The maximum length of each label is 63 characters, and a full domain 35 | // name can have a maximum of 253 characters. Alphanumeric characters and 36 | // hyphens can be used in labels, but a domain name must not commence 37 | // or end with a hyphen. 38 | 39 | 40 | // this label is max length (63) 41 | assert_eq!( 42 | is_domain(("a".repeat(63) + ".com").as_str()), 43 | true 44 | ); 45 | // this label is too long (>= 64) 46 | assert_eq!( 47 | is_domain(("a".repeat(64) + ".com").as_str()), 48 | false 49 | ); 50 | } 51 | 52 | #[test] 53 | fn test_is_domaain_function_handles_domains_that_are_too_long() { 54 | 55 | // Each element of a domain name separated by [.] is called a “label.” 56 | // The maximum length of each label is 63 characters, and a full domain 57 | // name can have a maximum of 253 characters. Alphanumeric characters and 58 | // hyphens can be used in labels, but a domain name must not commence 59 | // or end with a hyphen. 60 | 61 | // this domain is max length (253) 62 | assert_eq!( 63 | // 61 * 4 = 244 64 | is_domain((("a".repeat(60) + ".").repeat(4) + "56789.com").as_str()), 65 | true 66 | ); 67 | // this domain too long (length > 253) 68 | assert_eq!( 69 | // 61 * 4 = 244 70 | is_domain((("a".repeat(60) + ".").repeat(4) + "56789x.com").as_str()), 71 | false 72 | ); 73 | } 74 | 75 | #[allow(dead_code)] 76 | pub fn print_type_of(_: &T) { 77 | println!("===> {}", std::any::type_name::()) 78 | } 79 | 80 | pub fn trim_inline_comments(s: String) -> String { 81 | if let Some(result) = s.find("#") { 82 | if let Some(inner) = s.get(..result) { 83 | return inner.trim().to_string(); 84 | } 85 | } 86 | s 87 | } 88 | 89 | pub fn norm_string(passed: &str) -> String { 90 | let x: Vec<_> = passed.trim().split_ascii_whitespace().collect(); 91 | x.join(" ") 92 | } 93 | 94 | #[test] 95 | fn test_string_with_excessive_spaces_is_trimmed_and_normalized() { 96 | assert_eq!( 97 | norm_string(" Hello World "), 98 | "Hello World".to_string() 99 | ); 100 | } 101 | 102 | #[test] 103 | fn test_line_feeds_are_properly_handled() { 104 | assert!("xx\nxx".contains("\n")); 105 | assert_eq!( 106 | "xx\nxx" 107 | .to_string() 108 | .split("\n") 109 | .map(|l| l.to_string()) 110 | .collect::>() 111 | .len(), 112 | 2 113 | ); 114 | } 115 | 116 | pub fn hash(obj: T) -> String 117 | where 118 | T: Hash, 119 | { 120 | let mut hasher = DefaultHasher::new(); 121 | obj.hash(&mut hasher); 122 | format!("{:x}", hasher.finish()) 123 | } 124 | 125 | #[test] 126 | fn test_hashing_strings_returns_expected_values() { 127 | assert_eq!(hash("hosts".to_string()),"b6e6d131fe41b528".to_string()); 128 | assert_eq!(hash("domains".to_string()),"5ae5d5636edd71d4".to_string()); 129 | } 130 | 131 | pub fn flatten(nested: Vec>) -> Vec { 132 | nested.into_iter().flatten().collect() 133 | } 134 | 135 | pub struct Combinations 136 | where 137 | T: Ord + Clone, 138 | { 139 | original: Vec, 140 | possition: Vec, 141 | len: usize, 142 | started: bool, 143 | } 144 | 145 | impl Combinations 146 | where 147 | T: Ord + Clone, 148 | { 149 | /// Initializes the setup for the permutation. 150 | /// `original`: `Vec` of the values to permutate over (example: vec![1, 2, 2, 3]) 151 | /// `len`: The length of the returned length (number of draws, attempts) 152 | /// ``` 153 | /// use combinations::Combinations; 154 | /// 155 | /// let computed: Vec<_> = Combinations::new(vec![1, 2, 2, 3, 4], 3).collect(); 156 | /// let expected = vec![ 157 | /// vec![1, 2, 2], 158 | /// vec![1, 2, 3], 159 | /// vec![1, 2, 4], 160 | /// vec![1, 3, 4], 161 | /// vec![2, 2, 3], 162 | /// vec![2, 2, 4], 163 | /// vec![2, 3, 4], 164 | /// ]; 165 | /// assert!(computed == expected) 166 | /// ``` 167 | /// Note: This sorts the original vector as the algorithm requires this. 168 | pub fn new(mut original: Vec, len: usize) -> Self { 169 | if original.len() >= len && len >= 1 { 170 | original.sort_unstable(); 171 | Self { 172 | original, 173 | possition: (0..len).collect(), 174 | len, 175 | started: false, 176 | } 177 | } else { 178 | panic!("the length has to be smaller then the datasets len"); 179 | } 180 | } 181 | 182 | #[inline] 183 | fn insert(&self, col: &mut Vec) { 184 | col.clear(); 185 | self.possition 186 | .iter() 187 | .enumerate() 188 | .for_each(|(p, n)| col.insert(p, self.original[*n].clone())) 189 | } 190 | 191 | 192 | /// clears the contents of the comb vector and inserts the next combination into the vec. 193 | /// This is usefull if you do not need the data from the previous iteration. 194 | /// Note: LLVM might do this for you for normal iterations?. 195 | // need to check the note in comment 196 | pub fn next_combination(&mut self, mut comb: &mut Vec) -> bool { 197 | if !self.started { 198 | // first pass throught 199 | self.started = true; 200 | self.insert(&mut comb); 201 | true 202 | } else { 203 | let org_len = self.original.len(); 204 | // check if we cant bump the back number 205 | if self.original[self.possition[self.len - 1]] == self.original[org_len - 1] { 206 | // locate the number closest behind that needs to be bumped 207 | for i in 2..=self.len { 208 | if self.original[self.possition[self.len - i]] < self.original[org_len - i] { 209 | //find the value of the 210 | let lastpos = self.possition[self.len - i]; 211 | let val = &self.original[lastpos]; 212 | for j in lastpos + 1..org_len { 213 | if *val < self.original[j] { 214 | for k in 0..i { 215 | self.possition[self.len - i + k] = j + k; 216 | } 217 | self.insert(&mut comb); 218 | return true; 219 | } 220 | } 221 | } 222 | } 223 | false 224 | } else { 225 | let mut i = self.possition[self.len - 1]; 226 | let current = &self.original[i]; 227 | let mut next = current; 228 | while current == next { 229 | i += 1; 230 | next = &self.original[i]; 231 | } 232 | self.possition[self.len - 1] = i; 233 | self.insert(&mut comb); 234 | true 235 | } 236 | } 237 | } 238 | } 239 | 240 | impl Iterator for Combinations 241 | where 242 | T: Ord + Clone, 243 | { 244 | type Item = Vec; 245 | 246 | fn next(&mut self) -> Option { 247 | let mut vals = Vec::with_capacity(self.len); 248 | if self.next_combination(&mut vals) { 249 | Some(vals) 250 | } else { 251 | None 252 | } 253 | } 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | use super::*; 259 | 260 | #[test] 261 | fn combinations_next_works_as_expected() { 262 | assert_eq!( 263 | Combinations::new(vec![2, 2, 2], 2).next().unwrap() , 264 | vec![2, 2], 265 | "next should return the next combination" 266 | ) 267 | } 268 | 269 | #[test] 270 | fn combinations_generate_works_as_expected() { 271 | assert_eq!( 272 | Combinations::new(vec![1, 2, 3], 2) 273 | .take(10) 274 | .collect::>(), 275 | vec![vec![1, 2], vec![1, 3], vec![2, 3]], 276 | "take should return the correct combinations" 277 | ) 278 | } 279 | 280 | #[test] 281 | fn test_complicated() { 282 | let actual: Vec<_> = Combinations::new(vec![1, 2, 2, 3, 4], 3).collect(); 283 | let expected = vec![ 284 | vec![1, 2, 2], 285 | vec![1, 2, 3], 286 | vec![1, 2, 4], 287 | vec![1, 3, 4], 288 | vec![2, 2, 3], 289 | vec![2, 2, 4], 290 | vec![2, 3, 4], 291 | ]; 292 | assert_eq!( 293 | actual, 294 | expected, 295 | "Complex combinations should work as expected" 296 | ) 297 | } 298 | 299 | #[test] 300 | fn test_combinations_with_strings() { 301 | let actual: Vec<_> = Combinations::new(vec!["1", "2", "2", "3", "4"], 3).collect(); 302 | let expected = vec![ 303 | vec!["1", "2", "2"], 304 | vec!["1", "2", "3"], 305 | vec!["1", "2", "4"], 306 | vec!["1", "3", "4"], 307 | vec!["2", "2", "3"], 308 | vec!["2", "2", "4"], 309 | vec!["2", "3", "4"], 310 | ]; 311 | assert_eq!( 312 | actual, 313 | expected, 314 | "combinations of strings should work as expected" 315 | ) 316 | } 317 | } 318 | -------------------------------------------------------------------------------- /tests/readme.md: -------------------------------------------------------------------------------- 1 | # Integration Tests 2 | 3 | Integration tests are invoved via `cargo test`. 4 | --------------------------------------------------------------------------------