├── .github ├── ISSUE_TEMPLATE.md ├── ISSUE_TEMPLATE │ ├── bug.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.ko.md ├── README.md ├── assets ├── balpan-init-demo.gif └── vhs │ └── demo-balpan-init.tape ├── languages.toml ├── src ├── analyzer.rs ├── commands │ ├── boyer_moore.rs │ ├── grep.rs │ ├── mod.rs │ └── pattern_search.rs ├── config.rs ├── grammar.rs ├── language.rs ├── lib.rs ├── main.rs ├── scanner.rs ├── tokens.rs ├── tree_sitter_extended.rs └── utils.rs └── tests ├── analyzer_test.rs ├── analyzer_test └── analyze_test.rs ├── integration_test.rs ├── integration_test ├── analyze_command_test.rs └── analyze_command_test │ ├── c_test.rs │ ├── c_test │ ├── neovim_case_test.rs │ ├── nginx_case_test.rs │ └── redis_case_test.rs │ ├── cpp_test.rs │ ├── cpp_test │ └── blazingmq_case_test.rs │ ├── javascript_test.rs │ ├── javascript_test │ ├── react_native_case_test.rs │ └── svelt_cast_test.rs │ ├── python_test.rs │ ├── python_test │ ├── django_case_test.rs │ ├── python_dependency_injector_case_test.rs │ └── rustpython_case_test.rs │ ├── ruby_test.rs │ ├── ruby_test │ └── mastodon_case_test.rs │ ├── rust_test.rs │ ├── rust_test │ ├── anyhow_case_test.rs │ ├── rustpython_case_test.rs │ └── serde_case_test.rs │ ├── typescript_test.rs │ └── typescript_test │ ├── angular_case_test.rs │ ├── async_case_test.rs │ ├── svelt_case_test.rs │ └── typescript_case_test.rs ├── pattern_search_test.rs └── tree_sitter_extended_test.rs /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | Please follow this guide when creating a new issue. This speeds up the process of replicating the issue and finding a solution. 3 | 4 | ### Environment details 5 | 6 | * Operation system: 7 | * Language: 8 | 9 | ### Expected behavior 10 | 11 | ### Actual behavior 12 | 13 | ### Steps to reproduce the behavior 14 | 15 | ### Additional details 16 | 17 | * screenshots or screencapture 18 | 19 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: Report a problem 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | * **OS version**: 11 | * **Language**: 12 | 13 | ### 🐛 Describe the bug 14 | 15 | 16 | 17 | ### Expected behaviour 18 | 19 | 20 | 21 | ### Steps to reproduce 22 | 23 | 24 | 25 | Steps to reproduce the behaviour: 26 | 1. Go to ... 27 | 2. ... 28 | 29 | ### Other details or context 30 | 31 | 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest a new feature. 4 | title: Add ... 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | ### Describe the new feature 12 | 13 | 14 | 15 | ### How does this help you? 16 | 17 | 18 | 19 | ### If we couldn't add this feature, is there a compromise you can think of? 20 | 21 | 22 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## :star2: What does this PR do? 2 | 3 | 4 | 5 | ## :bug: Recommendations for testing 6 | 7 | 8 | 9 | ## :memo: Links to relevant issues or information 10 | 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | build: 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | 22 | - name: Set up Rust 23 | uses: actions-rs/toolchain@v1 24 | with: 25 | toolchain: stable 26 | 27 | - name: Build 28 | run: cargo build --verbose 29 | 30 | - name: Run tests 31 | run: cargo test --verbose 32 | 33 | - name: Run Clippy 34 | run: cargo clippy -- -D warnings 35 | 36 | - name: Run fmt 37 | run: cargo fmt --all -- --check 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.svg 3 | .DS_Store -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "balpan" 3 | version = "0.2.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | aho-corasick = "1.0.4" 10 | anyhow = "1.0.71" 11 | cc = "1.0.79" 12 | clap = { version = "4.3.21", features = ["derive"] } 13 | etcetera = "0.8.0" 14 | git2 = "0.17.2" 15 | glob = "0.3.1" 16 | ignore = "0.4.20" 17 | indoc = "2.0.3" 18 | libloading = "0.8.0" 19 | log = "0.4.18" 20 | once_cell = "1.18.0" 21 | regex = "1.9.5" 22 | serde = { version = "1.0", features = ["derive"] } 23 | serde_json = "1.0.104" 24 | strsim = "0.10.0" 25 | tempfile = "3.8.0" 26 | tokio = { version = "1.32.0", features = ["full"] } 27 | toml = "0.7.4" 28 | tree-sitter = "0.20.10" 29 | 30 | [lib] 31 | doctest = false 32 | -------------------------------------------------------------------------------- /README.ko.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |

Balpan CLI

6 |
오픈소스 생태계에 기여하고자 하는 사람들의 온보딩을 돕는 "발판"
7 |
오픈소스 프로젝트의 가독성을 높이고, 누구나 기여할 수 있도록 하자
8 | 9 | [![Rust](https://img.shields.io/badge/Rust-000000?style=for-the-badge&logo=rust&logoColor=white)](https://www.rust-lang.org/) 10 | ![Work In Progress](https://img.shields.io/badge/Work%20In%20Progress-orange?style=for-the-badge) 11 | 12 |
13 | 14 | ## Table of Contents 15 | 16 | - [Introduction](#introduction) 17 | - [Installation](#installation) 18 | - [Requirements](#requirements) 19 | - [Install using homebrew](#brew) 20 | - [Install using cargo](#cargo) 21 | - [Quickstart](#quickstart) 22 | - [Features](#features) 23 | - [Supported Language](#supported-languages) 24 | - [`balpan init`](#balpan-init) 25 | 26 | ## Introduction 27 | 28 | **balpan**은 오픈소스 생태계에 기여하고자 하는 사람들의 온보딩을 돕는 **발판** 이라는 의미로 시작했습니다. ([🔗](https://m.khan.co.kr/national/national-general/article/202109152114035#c2b)) 29 | 30 | **balpan**은 [treesitter](https://tree-sitter.github.io)를 이용해서 소스코드를 트리 구조로 분석하여 시각화하고, 능동적으로 소스코드를 읽는 사람들에게는 좀 더 진입장벽을 낮추는 것을 목표로 합니다. 31 | 32 | 책을 읽을 때도 줄을 치면서 읽듯이, 소스코드를 읽을때도 역시 책갈피 관리하듯이 읽을 수 있도록 하는 것부터 시작합니다. 33 | 34 | ### Disclaimer 35 | 36 | * 트리시터를 설치하는 문제를 부분적으로 해결하기 위해 [helix](https://github.com/helix-editor/helix) 소스코드의 일부를 사용했습니다. 37 | * 관련 코드: [config.rs](https://github.com/malkoG/balpan/blob/main/src/config.rs), [grammar.rs](https://github.com/malkoG/balpan/blob/main/src/grammar.rs), [lib.rs](https://github.com/malkoG/balpan/blob/main/src/lib.rs) 38 | 39 | ## Installation 40 | 41 | ### Requirements 42 | 43 | - OS: Linux/macOS 44 | - Cargo (cargo를 이용해서 설치하는 경우) 45 | 46 | ### Install using homebrew 47 | 48 | ```bash 49 | $ brew install malkoG/x/balpan 50 | ``` 51 | * ⚠️ 당장은 homebrew brew를 이용해서 설치하는 경우 `0.1.1` 릴리즈만 설치될 수 있습니다. 52 | * 릴리즈를 출시할때마다 homebrew에 배포하는 과정을 자동화하는 방법은 알아보고 있는 중입니다. 53 | 54 | ### Install using cargo 55 | 56 | ```bash 57 | $ cargo install --path . 58 | ``` 59 | 60 | ### Quickstart 61 | 62 | **balpan**의 모든 명령어들은 소스코드를 트리구조의 형태로 분석하기 위해 treesitter 기반으로 생성된 파서를 이용합니다. 63 | **balpan**의 각 명령어를 사용하기 전에 분석하고자 하는 리포지토리의 홈 디텍토리에서 아래의 명령어를 실행해주세요. 64 | 65 | ```bash 66 | $ balpan init 67 | ``` 68 | 69 | ## Features 70 | 71 | ### Supported Languages 72 | 73 | 당장은 지원하는 언어가 많이 없지만, 트리시터가 지원하는 언어라면 모두 지원할 수 있도록 하는 것을 지향합니다. 74 | 75 | - Rust 76 | - Python 77 | - Ruby 78 | 79 | ### `balpan init` 80 | 81 | ![balpan init demo animation](./assets/balpan-init-demo.gif) 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | * [한국어](./README.ko.md) 3 | 4 | # IN CONSTRUCTION 🚧 5 | 6 | -------------------------------------------------------------------------------- /assets/balpan-init-demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/balpan-rs/balpan/f1bd990b041f005116585913d6b92250e388cf15/assets/balpan-init-demo.gif -------------------------------------------------------------------------------- /assets/vhs/demo-balpan-init.tape: -------------------------------------------------------------------------------- 1 | Output assets/balpan-init-demo.gif 2 | 3 | Set FontSize 15 4 | Set Width 1200 5 | Set Height 600 6 | 7 | Type "# This is demo animation for `balpan init`" 8 | Sleep 300ms 9 | Enter 10 | 11 | Type "" 12 | Sleep 300ms 13 | Enter 14 | 15 | 16 | Type "balpan init # Automatically generates TODO comments" 17 | Sleep 300ms 18 | Enter 19 | 20 | Type "rg ' \[TODO\] ' src" 21 | Sleep 300ms 22 | Enter 23 | 24 | Sleep 8s 25 | -------------------------------------------------------------------------------- /src/analyzer.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::collections::VecDeque; 3 | 4 | use tree_sitter::{Node, Parser, Point, Range, Tree}; 5 | 6 | use crate::grammar::get_language; 7 | use crate::language::Language; 8 | use crate::tokens::CommentToken; 9 | use crate::tree_sitter_extended::{MembershipCheck, RangeFactory, ResolveSymbol}; 10 | 11 | pub struct Analyzer { 12 | pub source_code: String, 13 | pub language: Language, 14 | } 15 | 16 | impl<'tree> Analyzer { 17 | fn get_indent_comment_pool(&self) -> Vec { 18 | let comment_token = CommentToken::from_language(&self.language); 19 | let comment = comment_token.to_str(); 20 | let ident = match self.language { 21 | Language::Ruby => " ", 22 | _ => " ", 23 | }; 24 | let max_ident_level = 100; 25 | 26 | (0..max_ident_level) 27 | .map(|level| { 28 | let indent = ident.repeat(level); 29 | format!("{}{}", indent, comment) 30 | }) 31 | .collect() 32 | } 33 | 34 | fn get_syntax_tree(&self) -> Tree { 35 | let parser = RefCell::new(Parser::new()); 36 | let language = get_language(self.language.as_str()).unwrap(); 37 | 38 | let mut ts_parser = parser.borrow_mut(); 39 | ts_parser 40 | .set_language(language) 41 | .expect("treesitter parser for given language does not exists"); 42 | 43 | let tree = ts_parser.parse(&self.source_code, None); 44 | 45 | tree.expect("Failed to parsing given source code") 46 | } 47 | 48 | pub fn analyze(&self) -> VecDeque { 49 | let tree = self.get_syntax_tree(); 50 | let nodes = self.get_scannable_nodes(&tree); 51 | 52 | let ignorable_node_types = self.language.ignorable_node_types(); 53 | 54 | let nested_traversable_symbols = self.language.nested_traversable_symbols(); 55 | 56 | let mut writer_queue = VecDeque::new(); 57 | let mut pending_queue = VecDeque::new(); 58 | let mut nodes_queue = VecDeque::from(nodes); 59 | let mut indentation_context: VecDeque<(Node, String)> = VecDeque::new(); 60 | let indent_comment_pool = self.get_indent_comment_pool(); 61 | let mut latest_comment_line = ""; 62 | let mut latest_comment_line_index = -1_isize; 63 | 64 | let mut lines = vec![]; 65 | for line in self.source_code.lines() { 66 | lines.push(line.to_string()); 67 | } 68 | 69 | for (i, line) in lines.iter().enumerate() { 70 | let row = i; 71 | let line_idx = i as isize; 72 | let column = line.len(); 73 | 74 | let cursor_position = Point { row, column }; 75 | 76 | if nodes_queue.is_empty() { 77 | writer_queue.push_back(line.to_owned()); 78 | continue; 79 | } 80 | 81 | let (current_node, (row, from, to)) = match nodes_queue.front() { 82 | Some(item) => item, 83 | None => panic!("Failed to retrieve treesitter node from queue"), 84 | }; 85 | 86 | let mut symbol_name_with_context = String::new(); 87 | 88 | let mut pop_node = false; 89 | 90 | match Range::from_node(*current_node) { 91 | node_range if cursor_position.is_member_of(node_range) => { 92 | let node_type = current_node.kind(); 93 | 94 | // rust specific code 95 | if node_type == "mod_item" 96 | && node_range.start_point.row == node_range.end_point.row 97 | { 98 | while !pending_queue.is_empty() { 99 | let decorator_line: &str = pending_queue.pop_front().unwrap(); 100 | writer_queue.push_back(decorator_line.to_owned()); 101 | } 102 | writer_queue.push_back(line.to_owned()); 103 | nodes_queue.pop_front(); 104 | continue; 105 | } 106 | 107 | if ignorable_node_types.contains(&node_type) { 108 | while !pending_queue.is_empty() { 109 | let decorator_line: &str = pending_queue.pop_front().unwrap(); 110 | writer_queue.push_back(decorator_line.to_owned()); 111 | } 112 | writer_queue.push_back(line.to_owned()); 113 | nodes_queue.pop_front(); 114 | continue; 115 | } 116 | 117 | if node_type == self.language.decorator_node_type() { 118 | pending_queue.push_back(line); 119 | } else { 120 | for (_node, node_symbol) in indentation_context.iter() { 121 | symbol_name_with_context 122 | .push_str(&format!("{} > ", node_symbol).to_string()); 123 | } 124 | 125 | let node_symbol_with_indent = &lines[*row]; 126 | let node_symbol = &node_symbol_with_indent[from.to_owned()..to.to_owned()]; 127 | 128 | if *from == 0 && *to == 0 { 129 | symbol_name_with_context.push_str("anonymous"); 130 | } else { 131 | symbol_name_with_context.push_str(node_symbol); 132 | } 133 | 134 | let indent_size = indentation_context.len(); 135 | let comment_line: String = format!( 136 | "{} {}", 137 | indent_comment_pool[indent_size].clone(), 138 | symbol_name_with_context 139 | ); 140 | 141 | if latest_comment_line != comment_line { 142 | writer_queue.push_back(comment_line); 143 | } 144 | if !pending_queue.is_empty() { 145 | while !pending_queue.is_empty() { 146 | if let Some(queued_line) = pending_queue.pop_front() { 147 | writer_queue.push_back(queued_line.to_owned()); 148 | } 149 | } 150 | } 151 | writer_queue.push_back(line.to_owned()); 152 | pop_node = true; 153 | } 154 | 155 | if !indentation_context.is_empty() { 156 | if let Some((current_context, _)) = indentation_context.back() { 157 | if cursor_position.row >= current_context.end_position().row { 158 | indentation_context.pop_back(); 159 | } 160 | } 161 | } 162 | 163 | if nested_traversable_symbols.contains(&node_type) { 164 | let (_, from, to) = current_node.identifier_range(); 165 | 166 | let symbol: String; 167 | if from == 0 && to == 0 { 168 | symbol = "anonymous".to_string(); 169 | } else { 170 | symbol = line[from.to_owned()..to.to_owned()].to_string(); 171 | } 172 | 173 | indentation_context.push_back((*current_node, symbol)); 174 | pop_node = true; 175 | } 176 | 177 | if cursor_position == current_node.end_position() { 178 | pop_node = true; 179 | } 180 | 181 | if pop_node { 182 | nodes_queue.pop_front(); 183 | } 184 | } 185 | _ => { 186 | if !indentation_context.is_empty() { 187 | if let Some((current_context, _)) = indentation_context.back() { 188 | if cursor_position.row >= current_context.end_position().row { 189 | indentation_context.pop_back(); 190 | } 191 | } 192 | } 193 | 194 | if line == latest_comment_line && latest_comment_line_index == line_idx - 1 { 195 | continue; 196 | } 197 | 198 | let indentation_level = indentation_context.len(); 199 | if line.starts_with(&indent_comment_pool[indentation_level]) { 200 | latest_comment_line = line; 201 | latest_comment_line_index = line_idx; 202 | } 203 | writer_queue.push_back(line.to_owned()); 204 | } 205 | } 206 | } 207 | 208 | writer_queue.to_owned() 209 | } 210 | 211 | /// This methods collects treesitter nodes with BFS 212 | /// 213 | /// All of tree sitter nodes are ordered by non decreasing order 214 | fn get_scannable_nodes(&self, tree: &'tree Tree) -> Vec<(Node<'tree>, (usize, usize, usize))> { 215 | let mut deq: VecDeque> = VecDeque::new(); 216 | let scannable_node_types = self.language.scannable_node_types(); 217 | let nested_traversable_symbols = self.language.nested_traversable_symbols(); 218 | let mut result = Vec::new(); 219 | deq.push_back(tree.root_node()); 220 | 221 | while !deq.is_empty() { 222 | if let Some(node) = deq.pop_front() { 223 | let node_type = node.kind(); 224 | 225 | if scannable_node_types.contains(&node_type) { 226 | let identifier_range = node.identifier_range(); 227 | result.push((node.to_owned(), identifier_range)); 228 | } 229 | 230 | if !nested_traversable_symbols.contains(&node_type) 231 | && node_type != self.language.top_level_node_type() 232 | { 233 | continue; 234 | } 235 | deq = self.enqueue_child_nodes(deq, &node); 236 | } 237 | } 238 | 239 | result.sort_by(|(u, _), (v, _)| u.start_position().row.cmp(&v.start_position().row)); 240 | 241 | result.to_owned() 242 | } 243 | 244 | fn enqueue_child_nodes( 245 | &self, 246 | mut deq: VecDeque>, 247 | node: &Node<'tree>, 248 | ) -> VecDeque> { 249 | let mut cursor = node.walk(); 250 | let scannable_node_types = self.language.scannable_node_types(); 251 | let node_type = node.kind(); 252 | 253 | if self.language == Language::Ruby { 254 | if node_type == self.language.top_level_node_type() { 255 | for child_node in node.children(&mut cursor) { 256 | if scannable_node_types.contains(&child_node.kind()) { 257 | deq.push_back(child_node); 258 | } 259 | } 260 | return deq; 261 | } 262 | } else { 263 | for child_node in node.children(&mut cursor) { 264 | if scannable_node_types.contains(&child_node.kind()) { 265 | deq.push_back(child_node); 266 | } 267 | } 268 | } 269 | 270 | cursor.reset(*node); 271 | 272 | if let Some(body) = node.child_by_field_name("body") { 273 | let mut body_cursor = body.walk(); 274 | for child_node in body.children(&mut body_cursor) { 275 | if scannable_node_types.contains(&child_node.kind()) { 276 | deq.push_back(child_node); 277 | } 278 | } 279 | } 280 | 281 | deq 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/commands/boyer_moore.rs: -------------------------------------------------------------------------------- 1 | // ref: https://www.sspilsbury.com/2017-09-23-explaining-boyer-moore/ 2 | // ref: https://github.com/peterjoel/needle/blob/master/src/skip_search.rs 3 | 4 | pub struct BoyerMooreSearch<'a, T> { 5 | pattern: &'a [T], 6 | bad_character_table: [usize; 256], 7 | good_suffixes_table: Vec, 8 | } 9 | 10 | impl<'a, T> BoyerMooreSearch<'a, T> 11 | where 12 | T: Copy + PartialEq + Into, 13 | { 14 | /// Create new Boyer-Moore Search object with given pattern. 15 | /// 16 | /// ### Example 17 | /// 18 | /// Basic usage: 19 | /// 20 | /// If you want to search a pattern ("abc" in this case) in a text, 21 | /// you can simply put it in the function as an argument. 22 | /// 23 | /// ``` 24 | /// use balpan::commands::boyer_moore::BoyerMooreSearch; 25 | /// 26 | /// let searcher = BoyerMooreSearch::new(b"abc"); 27 | /// ``` 28 | pub fn new(pattern: &'a [T]) -> BoyerMooreSearch { 29 | Self { 30 | pattern, 31 | bad_character_table: build_bad_chars_table(pattern), 32 | good_suffixes_table: build_suffixes_table(pattern), 33 | } 34 | } 35 | } 36 | 37 | /// `SearchIn` trait is define the interface which can iterate over the pattern in the text. 38 | pub trait SearchIn<'a, H: ?Sized> { 39 | type Iter: Iterator; 40 | 41 | fn find_in(&'a self, text: &'a H) -> Self::Iter; 42 | fn find_overlapping_in(&'a self, text: &'a H) -> Self::Iter; 43 | /// Find the first occurrence of the pattern within the given text. 44 | fn find_first_position(&'a self, text: &'a H) -> Option { 45 | self.find_in(text).next() 46 | } 47 | } 48 | 49 | impl<'a, T> SearchIn<'a, [T]> for BoyerMooreSearch<'a, T> 50 | where 51 | T: Copy + PartialEq + Into, 52 | { 53 | type Iter = BoyerMooreIter<'a, T>; 54 | /// Find all occurrences of the pattern within the given text, 55 | /// but only consider non-overlapping cases. 56 | /// 57 | /// `find_in` skips over the length of the pattern each time 58 | /// a match is found, so that overlapping occurrences are ignored. 59 | /// 60 | /// ### How it works: 61 | /// 62 | /// 1. Initialize the search at the beginning of the text. 63 | /// 2. Compare the pattern with the text at the current position. 64 | /// 3. If a match is found, yield the current position and skip forward by the parent's length (to ensure no overlaps). 65 | /// 4. If no match is found, apply the Boyer-Moore skipping rules (bad character and good suffix rules) 66 | /// to jump ahead and continue to search. 67 | /// 5. Repeat steps 2-4 until the end of the text is reached. 68 | /// 69 | /// ### Example 70 | /// 71 | /// Basic usage: 72 | /// 73 | /// ``` ignore 74 | /// use balpan::commands::boyer_moore::{BoyerMooreSearch, SearchIn}; 75 | /// 76 | /// let searcher = BoyerMooreSearch::new(b"aba"); 77 | /// let text = b"ababa"; 78 | /// 79 | /// let result: Vec = searcher.find_in(text).collect(); 80 | /// 81 | /// assert_eq!(vec![0], result); 82 | /// ``` 83 | fn find_in(&'a self, text: &'a [T]) -> Self::Iter { 84 | BoyerMooreIter { 85 | searcher: self, 86 | text, 87 | pos: 0, 88 | overlap_match: false, 89 | } 90 | } 91 | /// Find all the overlapping occurrences of the pattern within given text, including the overlapping matches. 92 | /// Unlike the `find_in` method, which ony considers non-overlapping cases. 93 | /// by considering each position in the text as a starting point for the pattern. 94 | /// 95 | /// ### How it works: 96 | /// 97 | /// 1. Initialize the search at the beginning of the text. 98 | /// 2. Compare the pattern with the text at the current position. 99 | /// 3. If a match is found, yield the current position and move only one position forward 100 | /// (instead of skipping by the parent's length). 101 | /// 4. If no match is found, apply the Boyer-Moore skipping rules (bad character and good suffix rules) 102 | /// to jump ahead and continue to search. 103 | /// 5. Repeat steps 2-4 until the end of the text is reached. 104 | /// 105 | /// ### Example 106 | /// 107 | /// ``` ignore 108 | /// use balpan::commands::boyer_moore::{BoyerMooreSearch, SearchIn}; 109 | /// 110 | /// let searcher = BoyerMooreSearch::new(b"aaba"); 111 | /// let text = b"aabaabaaba"; 112 | /// 113 | /// let result: Vec = searcher.find_overlapping_in(text).collect(); 114 | /// 115 | /// assert_eq!(vec![0, 3, 6], result); 116 | /// ``` 117 | /// 118 | /// The `find_overlapping_in` method would find matches at some positions, 119 | /// which means that the pattern "aaba" occurs at positions 0, 3, and 6 in the text. 120 | fn find_overlapping_in(&'a self, text: &'a [T]) -> Self::Iter { 121 | BoyerMooreIter { 122 | searcher: self, 123 | text, 124 | pos: 0, 125 | overlap_match: true, 126 | } 127 | } 128 | } 129 | 130 | pub struct BoyerMooreIter<'a, T> { 131 | searcher: &'a BoyerMooreSearch<'a, T>, 132 | text: &'a [T], 133 | pos: usize, 134 | overlap_match: bool, 135 | } 136 | 137 | impl<'a, T> Iterator for BoyerMooreIter<'a, T> 138 | where 139 | T: Copy + PartialEq + Into, 140 | { 141 | type Item = usize; 142 | 143 | fn next(&mut self) -> Option { 144 | find_from_position(&self.searcher, self.text, self.pos).map(|pos| { 145 | match self.overlap_match { 146 | true => self.pos = pos + 1, 147 | false => self.pos = pos + self.searcher.pattern.len(), 148 | } 149 | 150 | pos 151 | }) 152 | } 153 | } 154 | /// `find_pending_character_index` method is looking for the occurrence of a specific character (pattern) 155 | /// in a given slice of characters (`chars`). 156 | /// 157 | /// If the character is found, the function returns the index of the found character - start index (`start`), 158 | /// effectively returning the relative position of the found character within the slice starting from the start index (`start`). 159 | /// 160 | /// If the character is not found, simply return 0. 161 | /// 162 | /// ### How it works: 163 | /// 1. Iterate through the slice of characters starting from the index 'start + 1'. 164 | /// 2. Compare each character with the given pattern. 165 | /// 3. If a match is found, return the relative position (i.e., the current index minus the start index). 166 | /// 4. If no match is found, return 0. 167 | /// 168 | /// ### Example 169 | /// 170 | /// - chars: \['A', 'B', 'C', 'B', 'D'\] 171 | /// - start: 1 172 | /// - pattern: 'B' 173 | /// 174 | /// Step 1: Start searching from index `start + 1` (i.e., 2): 175 | /// 176 | /// chars A B C B D 177 | /// index 0 1 2 3 4 178 | /// start ^ 179 | /// 180 | /// Step 2: Compare each character with the given pattern 'B': 181 | /// 182 | /// chars A B C B D 183 | /// index 0 1 2 3 4 184 | /// start ^ ^ ^ ^ 185 | /// pattern B B B 186 | /// 187 | /// Step 3: Pattern 'B' found at index 3, relative position is 3 - 1 = 2. 188 | /// 189 | /// chars A B C B D 190 | /// index 0 1 2 3 4 191 | /// start ^ ^ 192 | /// pattern B B 193 | /// Result 2 194 | /// 195 | /// ### Example 196 | /// 197 | /// ```ignore 198 | /// use balpan::commands::boyer_moore::find_pending_character_index; 199 | /// 200 | /// let chars = vec!['A', 'B', 'C', 'B', 'D']; 201 | /// let start = 1; 202 | /// let pattern = &'B'; 203 | /// 204 | /// let result = find_pending_character_index(&chars, start, pattern); 205 | /// 206 | /// assert_eq!(2, result); 207 | /// ``` 208 | pub fn find_pending_character_index(chars: &[char], start: usize, pattern: &char) -> usize { 209 | for (i, item) in chars.iter().enumerate().skip(start + 1) { 210 | if item == pattern { 211 | return i - start; 212 | } 213 | } 214 | 215 | 0 216 | } 217 | /// `build_bad_chars_table` method is building a table of bad characters, which is key part of the Boyer-Moore algorithm. 218 | /// 219 | /// ### Description 220 | /// 221 | /// This method pre-computes a table that allows the algorithm to skip sections of the text to be searched, 222 | /// resulting in a lower number of overall character comparisons. 223 | /// 224 | /// In other words, this method creates a table that helps the main search function 225 | /// know how far to jump when a mismatch is found. 226 | /// 227 | /// The table's size is usually 256 bytes, to cover all possible ASCII characters. 228 | /// 229 | /// ### How it works: 230 | /// 231 | /// For example, let's assume a pattern "GATC": 232 | /// 233 | /// - pattern: "GATC" 234 | /// - length: 4 235 | /// 236 | /// Step 1: Initialize the table with the length of the pattern: 237 | /// 238 | /// table A B C D E F G ... T U V W X Y Z 239 | /// value 4 4 4 4 4 4 4 ... 4 4 4 4 4 4 4 240 | /// 241 | /// Step 2: Iterate through the pattern and update the table except the last character: 242 | /// 243 | /// 'G' is at index 0, distance to end - 1 = 4 - 0 - 1 = 3 244 | /// 'A' is at index 1, distance to end - 1 = 4 - 1 - 1 = 2 245 | /// 'T' is at index 2, distance to end - 1 = 4 - 2 - 1 = 1 246 | /// 'C' is the last character, skip 247 | /// 248 | /// Step 3: Update the table with the calculated distances: 249 | /// 250 | /// table A B C D E F G ... T U V W X Y Z 251 | /// value 2 4 4 4 4 4 3 ... 1 4 4 4 4 4 4 252 | /// 253 | /// ### Conclusion 254 | /// 255 | /// This table is used in the search process, allowing the BM search to skip over portions of 256 | /// the text that do not contain possible matches, thereby reducing the number of comparisons. 257 | pub fn build_bad_chars_table(needle: &[T]) -> [usize; 256] 258 | where 259 | T: Into + Copy, 260 | { 261 | let mut table = [needle.len(); 256]; 262 | for i in 0..needle.len() - 1 { 263 | let c: usize = needle[i].into(); 264 | table[c] = needle.len() - i - 1; 265 | } 266 | 267 | table 268 | } 269 | /// `get_suffix_table` method computes the suffix table. 270 | /// This table helps in defining how much to jump in case of a mismatch after some matches. 271 | /// 272 | /// ### Description 273 | /// 274 | /// This method computes a table where the entry at index `i` represents the length of 275 | /// the largest suffix of the pattern ending at position `i` that is also a prefix of the pattern. 276 | /// 277 | /// ### How it works: 278 | /// 279 | /// Assume a pattern is "ABAB": 280 | /// 281 | /// Step 1: Initialize the suffixes table with 0 282 | /// 283 | /// table A B A B 284 | /// suffixes 0 0 0 0 285 | /// 286 | /// Step 2: Start with suffix length 1. and check for each suffix it's a prefix of the pattern. 287 | /// 288 | /// For suffix length `1`: `B` is not a prefix, continue 289 | /// 290 | /// For suffix length `2`: `AB` is a prefix, update the entry 291 | /// 292 | /// table A B A B 293 | /// suffixes 0 0 2 0 294 | /// 295 | /// For suffix length `3`: `BAB` is not a prefix, continue 296 | /// 297 | /// ### Conclusion 298 | /// 299 | /// This table used to create the good suffix shift table, which tells the how far to go 300 | /// in case of a mismatch. By understanding the structure of the pattern itself, the BM 301 | /// can skip ahead more efficiently, by reduce the number of comparisons. 302 | pub fn get_suffix_table(pattern: &[T]) -> Vec { 303 | let len = pattern.len(); 304 | let mut suffixes = vec![0; len]; 305 | for suffix_len in 1..pattern.len() { 306 | let mut found_suffix = false; 307 | for i in (0..len - suffix_len).rev() { 308 | // either 0 or a previous match for a 1-smaller suffix 309 | if suffixes[i + suffix_len - 1] == suffix_len - 1 310 | && pattern[i] == pattern[len - suffix_len] 311 | { 312 | suffixes[i + suffix_len - 1] = suffix_len; 313 | found_suffix = true; 314 | } 315 | } 316 | 317 | if !found_suffix { 318 | break; 319 | } 320 | } 321 | 322 | suffixes 323 | } 324 | /// Builds the "good suffix table," 325 | /// which is an essential part of the Boyer-Moore algorithm's optimization. 326 | /// 327 | /// It's used to determine how far to jump along the text when a mismatch occurs 328 | /// in the pattern after some matches. 329 | /// 330 | /// ### Description 331 | /// 332 | /// This method takes the suffix table computed by `get_suffix_table` 333 | /// and builds a table that directly tells the algorithm how far to jump 334 | /// in case of a mismatch at a given position. 335 | /// 336 | /// ### How it works: 337 | /// 338 | /// 1. Initializes a table with the pattern's length minus one at all positions. 339 | /// 2. Updates the table using the suffixes from get_suffix_table, 340 | /// making sure that the jumps are optimized according to the pattern's internal structure. 341 | /// 3. Specifically sets the last element of the table to 1, 342 | /// as the jump should always be at least one character. 343 | /// 344 | /// Using the pattern "ABAB" and assuming the suffix table as `[0, 0, 2, 0]`: 345 | /// 346 | /// Step 1: Initialize the table with the length of the needle minus one (3) 347 | /// 348 | /// pattern: A B A B 349 | /// needle: 3 3 3 3 350 | /// 351 | /// Step 2: Iterate through the suffixes table and update the entries 352 | /// 353 | /// - suffix length 2 at index 2, skip 2 positions 354 | /// A B A B 355 | /// 3 3 2 3 356 | /// 357 | /// Step 3: Set the last entry to 1 358 | /// 359 | /// A B A B 360 | /// 3 3 2 1 361 | pub fn build_suffixes_table(pattern: &[T]) -> Vec { 362 | let suffixes = get_suffix_table(pattern); 363 | let len = pattern.len(); 364 | let mut table = vec![len - 1; len]; 365 | 366 | for (i, suffix_len) in suffixes.into_iter().enumerate() { 367 | let needle_index = len - suffix_len - 1; 368 | let skip = len - i - 1; 369 | if table[needle_index] > skip { 370 | table[needle_index] = skip; 371 | } 372 | } 373 | 374 | table[len - 1] = 1; 375 | table 376 | } 377 | 378 | pub trait SkipSearch { 379 | fn skip_offset(&self, bad_char: T, pattern_pos: usize, text: &[T], text_pos: usize) -> usize; 380 | fn len(&self) -> usize; 381 | fn at(&self, index: usize) -> T; 382 | fn is_empty(&self) -> bool { 383 | self.len() == 0 384 | } 385 | } 386 | 387 | pub fn find_from_position<'a, T, U>( 388 | pattern: &'a U, 389 | text: &'a [T], 390 | mut position: usize, 391 | ) -> Option 392 | where 393 | T: PartialEq + Copy + Into, 394 | U: SkipSearch, 395 | { 396 | if pattern.len() > text.len() { 397 | return None; 398 | } 399 | 400 | let max_position = text.len() - pattern.len(); 401 | while position <= max_position { 402 | let mut pattern_pos = pattern.len() - 1; 403 | 404 | while text[position + pattern_pos] == pattern.at(pattern_pos) { 405 | if pattern_pos == 0 { 406 | return Some(position); 407 | } 408 | 409 | pattern_pos -= 1; 410 | } 411 | 412 | let bad_char = text[position + pattern.len() - 1]; 413 | position += pattern.skip_offset(bad_char, pattern_pos, text, position); 414 | } 415 | 416 | None 417 | } 418 | 419 | impl<'a, T> SkipSearch for &'a BoyerMooreSearch<'a, T> 420 | where 421 | T: Copy + Into, 422 | { 423 | fn skip_offset(&self, bad_char: T, pattern_pos: usize, _text: &[T], _text_pos: usize) -> usize { 424 | let bad_char_shift = self.bad_character_table[bad_char.into()]; 425 | let good_suffix_shift = self.good_suffixes_table[pattern_pos]; 426 | 427 | std::cmp::max(bad_char_shift, good_suffix_shift) 428 | } 429 | 430 | fn len(&self) -> usize { 431 | self.pattern.len() 432 | } 433 | 434 | fn at(&self, pos: usize) -> T { 435 | self.pattern[pos] 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /src/commands/grep.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | use std::{io, path::Path}; 3 | 4 | use regex::Regex; 5 | use tokio::fs::File; 6 | use tokio::io::{AsyncBufReadExt, BufReader}; 7 | 8 | use serde::{Deserialize, Serialize}; 9 | 10 | use crate::utils::suggest_subcommand; 11 | 12 | use super::pattern_search::PatternTree; 13 | 14 | #[derive(Debug, Serialize, Deserialize, Default)] 15 | pub struct GrepReport { 16 | pub directories: Vec, 17 | } 18 | 19 | #[derive(Debug, Serialize, Deserialize)] 20 | pub struct Directory { 21 | name: String, 22 | files: Vec, 23 | } 24 | 25 | #[derive(Debug, Serialize, Deserialize)] 26 | pub struct GrepFile { 27 | pub name: String, 28 | pub items: Vec, 29 | } 30 | 31 | #[derive(Debug, Serialize, Deserialize)] 32 | pub struct GrepLine { 33 | line: usize, 34 | content: String, 35 | position: Vec, 36 | } 37 | 38 | impl GrepReport { 39 | pub fn new() -> Self { 40 | Default::default() 41 | } 42 | 43 | fn process_line( 44 | &mut self, 45 | line: String, 46 | index: usize, 47 | path: &Path, 48 | pattern_tree: &mut PatternTree, 49 | patterns: &Vec, 50 | ) { 51 | let (found, positions) = pattern_tree.selective_search(patterns, &line); 52 | 53 | if found { 54 | // search file in list of files 55 | let dir_name = path.parent().unwrap().display().to_string(); 56 | let file_name = path.display().to_string(); 57 | 58 | let dir_index = self.directories.iter().position(|d| d.name == dir_name); 59 | 60 | if dir_index.is_none() { 61 | self.directories.push(Directory { 62 | name: dir_name.clone(), 63 | files: Vec::new(), 64 | }); 65 | } 66 | 67 | let dir = self 68 | .directories 69 | .iter_mut() 70 | .find(|d| d.name == dir_name) 71 | .unwrap(); 72 | 73 | let file_index = dir.files.iter().position(|f| f.name == file_name); 74 | 75 | if file_index.is_none() { 76 | dir.files.push(GrepFile { 77 | name: file_name.clone(), 78 | items: Vec::new(), 79 | }); 80 | } 81 | 82 | let file = dir.files.iter_mut().find(|f| f.name == file_name).unwrap(); 83 | 84 | let line = GrepLine { 85 | line: index + 1, 86 | content: line, 87 | position: positions, 88 | }; 89 | file.items.push(line); 90 | } 91 | } 92 | 93 | pub async fn grep_file( 94 | &mut self, 95 | path: &Path, 96 | pattern_tree: &mut PatternTree, 97 | patterns: &Vec, 98 | ) -> io::Result<()> { 99 | let file = File::open(path).await?; 100 | let mut reader = BufReader::new(file); 101 | 102 | let mut line_bytes = Vec::new(); 103 | let mut i = 0; 104 | 105 | while reader.read_until(b'\n', &mut line_bytes).await? > 0 { 106 | let line = String::from_utf8_lossy(&line_bytes).to_string(); 107 | self.process_line(line, i, path, pattern_tree, patterns); 108 | line_bytes.clear(); 109 | i += 1; 110 | } 111 | 112 | Ok(()) 113 | } 114 | 115 | // TODO 116 | pub fn format_tree(&self, ident_size: usize) -> String { 117 | let mut result = String::new(); 118 | 119 | for directory in &self.directories { 120 | result.push_str(&format!("{}\n", directory.name)); 121 | 122 | for file in &directory.files { 123 | for item in &file.items { 124 | let file_relative_path = 125 | GrepReport::display_relative_path(&directory.name, &file.name); 126 | 127 | result.push_str(&format!( 128 | "{:ident$}{}:{}:{} - {}\n", 129 | "", 130 | file_relative_path, 131 | item.line, 132 | item.position[0], 133 | item.content.trim_start(), 134 | ident = ident_size, 135 | )); 136 | } 137 | } 138 | } 139 | 140 | result 141 | } 142 | 143 | fn format_plain( 144 | &self, 145 | hide_path: bool, 146 | list_of_files: bool, 147 | count: bool, 148 | patterns_to_search: Vec, 149 | colorize: bool, 150 | ) -> String { 151 | let mut result = String::new(); 152 | let mut counter: usize = 0; 153 | 154 | if !count { 155 | for dir in &self.directories { 156 | let path = Path::new(&dir.name); 157 | 158 | if !hide_path { 159 | dir_path_pretty(path, &mut result); 160 | } 161 | 162 | for file in &dir.files { 163 | if !hide_path { 164 | let file_name = Path::new(&file.name); 165 | let last_two = last_two(file_name); 166 | result.push_str(&format!("{}\n", last_two[0])); 167 | } 168 | 169 | if !list_of_files { 170 | for item in &file.items { 171 | if colorize { 172 | // `(?i)` is for case insensitive search 173 | let pattern = 174 | Regex::new(&format!(r"(?i){}", patterns_to_search.join(" "))) 175 | .unwrap(); 176 | let text = &item.content; 177 | 178 | let colored_text = pattern 179 | .replace_all(text, |caps: ®ex::Captures| { 180 | format!("\x1b[31m{}\x1b[0m", &caps[0]) 181 | }); 182 | 183 | result.push_str(&format!( 184 | "{} {}", 185 | item.line, 186 | colored_text.trim_start() 187 | )); 188 | } else { 189 | result.push_str(&format!( 190 | "{} {}", 191 | item.line, 192 | item.content.trim_start() 193 | )); 194 | } 195 | counter += 1; 196 | } 197 | result.push('\n'); 198 | } else { 199 | counter += file.items.len(); 200 | } 201 | } 202 | } 203 | result.push_str(&format!("\nTotal {} lines found\n", counter)); 204 | } else { 205 | counter = self 206 | .directories 207 | .iter() 208 | .map(|dir| dir.files.iter().map(|file| file.items.len()).sum::()) 209 | .sum(); 210 | result = format!("Total {} lines found\n", counter); 211 | } 212 | 213 | result 214 | } 215 | 216 | #[allow(clippy::too_many_arguments)] 217 | pub fn report_formatting( 218 | &mut self, 219 | format: Option, 220 | hide_path: bool, 221 | list_of_files: bool, 222 | count: bool, 223 | patterns_to_search: Vec, 224 | colorize: bool, 225 | ) -> String { 226 | let default = "plain".to_string(); 227 | let format = format.unwrap_or(default); 228 | 229 | match format.as_str() { 230 | "json" => serde_json::to_string_pretty(self).unwrap(), 231 | "plain" => self.format_plain( 232 | hide_path, 233 | list_of_files, 234 | count, 235 | patterns_to_search, 236 | colorize, 237 | ), 238 | // "tree" => self.format_tree(4), 239 | _ => { 240 | let suggest = suggest_subcommand(&format).unwrap(); 241 | format!("Unknown format: '{}'. Did you mean '{}'?", format, suggest) 242 | } 243 | } 244 | } 245 | 246 | fn display_relative_path(directory: &str, file_name: &str) -> String { 247 | let base_path = Path::new(directory); 248 | let path = Path::new(file_name); 249 | 250 | let relative_path = path.strip_prefix(base_path).unwrap(); 251 | let mut display_path = PathBuf::new(); 252 | 253 | for _ in 1..base_path.components().count() - 2 { 254 | display_path.push(".."); 255 | } 256 | 257 | display_path.push(relative_path); 258 | 259 | display_path.display().to_string() 260 | } 261 | } 262 | 263 | fn last_two(path: &Path) -> Vec<&str> { 264 | path.iter() 265 | .rev() 266 | .take(2) 267 | .map(|s| s.to_str().unwrap()) 268 | .collect() 269 | } 270 | 271 | fn dir_path_pretty(path: &Path, result: &mut String) { 272 | let last_two: Vec<&str> = last_two(path); 273 | 274 | if last_two.len() == 2 { 275 | result.push_str(&format!("{}/{}\n", last_two[1], last_two[0])); 276 | } 277 | 278 | result.push_str(&format!("{}\n", last_two[0])); 279 | } 280 | -------------------------------------------------------------------------------- /src/commands/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod boyer_moore; 2 | pub mod grep; 3 | pub mod pattern_search; 4 | -------------------------------------------------------------------------------- /src/commands/pattern_search.rs: -------------------------------------------------------------------------------- 1 | use crate::commands::boyer_moore::{BoyerMooreSearch, SearchIn}; 2 | use aho_corasick::AhoCorasick; 3 | use regex::Regex; 4 | 5 | #[derive(Debug, Clone)] 6 | pub struct PatternTree { 7 | pub ignore_case: bool, 8 | pub regex_flag: bool, 9 | } 10 | 11 | type PatternPosition = (bool, Vec); 12 | 13 | #[allow(clippy::new_without_default)] 14 | impl PatternTree { 15 | pub fn new() -> Self { 16 | PatternTree { 17 | ignore_case: false, 18 | regex_flag: false, 19 | } 20 | } 21 | 22 | /// Call all search methods based on the given patterns 23 | /// 24 | /// If the pattern is single, then call `boyer_moore_search` method. 25 | /// Because BM algorithm is known as the fastest algorithm for single pattern search. 26 | /// 27 | /// Whereas, if the pattern is multiple, then call `aho_corasick_search` method. 28 | /// AC is known as the fastest algorithm for multiple pattern search. 29 | pub fn selective_search(&self, patterns: &Vec, text: &str) -> PatternPosition { 30 | if self.regex_flag { 31 | return self.regex(text, &patterns[0]); 32 | } 33 | 34 | match patterns.len() { 35 | 0 => (false, vec![]), 36 | 1 => match self.ignore_case { 37 | true => self.boyer_moore_search(&text.to_lowercase(), &patterns[0].to_lowercase()), 38 | false => self.boyer_moore_search(text, &patterns[0]), 39 | }, 40 | _ => { 41 | if self.ignore_case { 42 | let mut lower_patterns: Vec = Vec::new(); 43 | patterns 44 | .iter() 45 | .for_each(|pattern| lower_patterns.push(pattern.to_lowercase())); 46 | self.aho_corasick_search(&text.to_lowercase(), &lower_patterns) 47 | } else { 48 | self.aho_corasick_search(text, patterns) 49 | } 50 | } 51 | } 52 | } 53 | 54 | pub fn aho_corasick_search(&self, text: &str, patterns: &Vec) -> PatternPosition { 55 | let ac = AhoCorasick::new(patterns).unwrap(); 56 | let mut result: Vec = Vec::new(); 57 | 58 | for matched in ac.find_iter(text) { 59 | result.push(matched.start()); 60 | } 61 | 62 | (!result.is_empty(), result) 63 | } 64 | 65 | pub fn boyer_moore_search(&self, text: &str, pattern: &String) -> PatternPosition { 66 | let searcher = BoyerMooreSearch::new(pattern.as_bytes()); 67 | let result: Vec = searcher.find_in(text.as_bytes()).collect(); 68 | 69 | (!result.is_empty(), result) 70 | } 71 | 72 | pub fn regex(&self, text: &str, pattern: &String) -> PatternPosition { 73 | let re = match self.ignore_case { 74 | true => Regex::new(&format!(r"(?i){}", pattern)).unwrap(), 75 | false => Regex::new(pattern).unwrap(), 76 | }; 77 | 78 | let mut result: Vec = Vec::new(); 79 | 80 | for matched in re.find_iter(text) { 81 | result.push(matched.start()); 82 | } 83 | 84 | (!result.is_empty(), result) 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | // This is referred from the helix codebase: 2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/config.rs 3 | use std::str::from_utf8; 4 | 5 | /// Default built-in languages.toml. 6 | pub fn default_lang_config() -> toml::Value { 7 | let default_config = include_bytes!("../languages.toml"); 8 | toml::from_str(from_utf8(default_config).unwrap()) 9 | .expect("Could not parse built-in languages.toml to valid toml") 10 | } 11 | 12 | /// User configured languages.toml file, merged with the default config. 13 | pub fn user_lang_config() -> Result { 14 | let config = [ 15 | crate::config_dir(), 16 | crate::find_workspace().0.join(".balpan"), 17 | ] 18 | .into_iter() 19 | .map(|path| path.join("languages.toml")) 20 | .filter_map(|file| { 21 | std::fs::read_to_string(file) 22 | .map(|config| toml::from_str(&config)) 23 | .ok() 24 | }) 25 | .collect::, _>>()? 26 | .into_iter() 27 | .fold(default_lang_config(), |a, b| { 28 | // combines for example 29 | // b: 30 | // [[language]] 31 | // name = "toml" 32 | // language-server = { command = "taplo", args = ["lsp", "stdio"] } 33 | // 34 | // a: 35 | // [[language]] 36 | // language-server = { command = "/usr/bin/taplo" } 37 | // 38 | // into: 39 | // [[language]] 40 | // name = "toml" 41 | // language-server = { command = "/usr/bin/taplo" } 42 | // 43 | // thus it overrides the third depth-level of b with values of a if they exist, but otherwise merges their values 44 | crate::merge_toml_values(a, b, 3) 45 | }); 46 | 47 | Ok(config) 48 | } 49 | -------------------------------------------------------------------------------- /src/grammar.rs: -------------------------------------------------------------------------------- 1 | // This is referred from the helix codebase: 2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/grammar.rs 3 | use anyhow::{anyhow, bail, Context, Result}; 4 | use serde::{Deserialize, Serialize}; 5 | use std::time::SystemTime; 6 | use std::{ 7 | collections::HashSet, 8 | path::{Path, PathBuf}, 9 | process::Command, 10 | sync::mpsc::channel, 11 | }; 12 | use std::{fs, thread}; 13 | use tempfile::TempPath; 14 | use tree_sitter::Language; 15 | 16 | #[cfg(unix)] 17 | const DYLIB_EXTENSION: &str = "so"; 18 | 19 | #[cfg(windows)] 20 | const DYLIB_EXTENSION: &str = "dll"; 21 | 22 | #[cfg(target_arch = "wasm32")] 23 | const DYLIB_EXTENSION: &str = "wasm"; 24 | 25 | #[derive(Debug, Serialize, Deserialize)] 26 | struct Configuration { 27 | #[serde(rename = "use-grammars")] 28 | pub grammar_selection: Option, 29 | pub grammar: Vec, 30 | } 31 | 32 | #[derive(Debug, Serialize, Deserialize)] 33 | #[serde(rename_all = "lowercase", untagged)] 34 | pub enum GrammarSelection { 35 | Only { only: HashSet }, 36 | Except { except: HashSet }, 37 | } 38 | 39 | #[derive(Debug, Serialize, Deserialize)] 40 | #[serde(deny_unknown_fields)] 41 | pub struct GrammarConfiguration { 42 | #[serde(rename = "name")] 43 | pub grammar_id: String, 44 | pub source: GrammarSource, 45 | } 46 | 47 | #[derive(Debug, Serialize, Deserialize)] 48 | #[serde(rename_all = "lowercase", untagged)] 49 | pub enum GrammarSource { 50 | Local { 51 | path: String, 52 | }, 53 | Git { 54 | #[serde(rename = "git")] 55 | remote: String, 56 | #[serde(rename = "rev")] 57 | revision: String, 58 | subpath: Option, 59 | }, 60 | } 61 | 62 | const BUILD_TARGET: &str = "x86_64-unknown-linux-gnu"; // env!("BUILD_TARGET"); 63 | const REMOTE_NAME: &str = "origin"; 64 | 65 | #[cfg(target_arch = "wasm32")] 66 | pub fn get_language(name: &str) -> Result { 67 | unimplemented!() 68 | } 69 | 70 | #[cfg(not(target_arch = "wasm32"))] 71 | pub fn get_language(name: &str) -> Result { 72 | use libloading::{Library, Symbol}; 73 | let mut rel_library_path = PathBuf::new().join("grammars").join(name); 74 | rel_library_path.set_extension(DYLIB_EXTENSION); 75 | let library_path = crate::runtime_file(&rel_library_path); 76 | 77 | let library = unsafe { Library::new(&library_path) } 78 | .with_context(|| format!("Error opening dynamic library {:?}", library_path))?; 79 | let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_")); 80 | let language = unsafe { 81 | let language_fn: Symbol Language> = library 82 | .get(language_fn_name.as_bytes()) 83 | .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; 84 | language_fn() 85 | }; 86 | std::mem::forget(library); 87 | Ok(language) 88 | } 89 | 90 | pub fn fetch_grammars() -> Result<()> { 91 | // We do not need to fetch local grammars. 92 | let mut grammars = get_grammar_configs()?; 93 | grammars.retain(|grammar| !matches!(grammar.source, GrammarSource::Local { .. })); 94 | 95 | println!("Fetching {} grammars", grammars.len()); 96 | let results = run_parallel(grammars, fetch_grammar); 97 | 98 | let mut errors = Vec::new(); 99 | let mut git_updated = Vec::new(); 100 | let mut git_up_to_date = 0; 101 | let mut non_git = Vec::new(); 102 | 103 | for (grammar_id, res) in results { 104 | match res { 105 | Ok(FetchStatus::GitUpToDate) => git_up_to_date += 1, 106 | Ok(FetchStatus::GitUpdated { revision }) => git_updated.push((grammar_id, revision)), 107 | Ok(FetchStatus::NonGit) => non_git.push(grammar_id), 108 | Err(e) => errors.push((grammar_id, e)), 109 | } 110 | } 111 | 112 | non_git.sort_unstable(); 113 | git_updated.sort_unstable_by(|a, b| a.0.cmp(&b.0)); 114 | 115 | if git_up_to_date != 0 { 116 | println!("{} up to date git grammars", git_up_to_date); 117 | } 118 | 119 | if !non_git.is_empty() { 120 | println!("{} non git grammars", non_git.len()); 121 | println!("\t{:?}", non_git); 122 | } 123 | 124 | if !git_updated.is_empty() { 125 | println!("{} updated grammars", git_updated.len()); 126 | // We checked the vec is not empty, unwrapping will not panic 127 | let longest_id = git_updated.iter().map(|x| x.0.len()).max().unwrap(); 128 | for (id, rev) in git_updated { 129 | println!( 130 | "\t{id:width$} now on {rev}", 131 | id = id, 132 | width = longest_id, 133 | rev = rev 134 | ); 135 | } 136 | } 137 | 138 | if !errors.is_empty() { 139 | let len = errors.len(); 140 | for (i, (grammar, error)) in errors.into_iter().enumerate() { 141 | println!("Failure {}/{len}: {grammar} {error}", i + 1); 142 | } 143 | bail!("{len} grammars failed to fetch"); 144 | } 145 | 146 | Ok(()) 147 | } 148 | 149 | pub fn build_grammars(target: Option) -> Result<()> { 150 | let grammars = get_grammar_configs()?; 151 | println!("Building {} grammars", grammars.len()); 152 | let results = run_parallel(grammars, move |grammar| { 153 | build_grammar(grammar, target.as_deref()) 154 | }); 155 | 156 | let mut errors = Vec::new(); 157 | let mut already_built = 0; 158 | let mut built = Vec::new(); 159 | 160 | for (grammar_id, res) in results { 161 | match res { 162 | Ok(BuildStatus::AlreadyBuilt) => already_built += 1, 163 | Ok(BuildStatus::Built) => built.push(grammar_id), 164 | Err(e) => errors.push((grammar_id, e)), 165 | } 166 | } 167 | 168 | built.sort_unstable(); 169 | 170 | if already_built != 0 { 171 | println!("{} grammars already built", already_built); 172 | } 173 | 174 | if !built.is_empty() { 175 | println!("{} grammars built now", built.len()); 176 | println!("\t{:?}", built); 177 | } 178 | 179 | if !errors.is_empty() { 180 | let len = errors.len(); 181 | for (i, (grammar_id, error)) in errors.into_iter().enumerate() { 182 | println!("Failure {}/{len}: {grammar_id} {error}", i + 1); 183 | } 184 | bail!("{len} grammars failed to build"); 185 | } 186 | 187 | Ok(()) 188 | } 189 | 190 | // Returns the set of grammar configurations the user requests. 191 | // Grammars are configured in the default and user `languages.toml` and are 192 | // merged. The `grammar_selection` key of the config is then used to filter 193 | // down all grammars into a subset of the user's choosing. 194 | fn get_grammar_configs() -> Result> { 195 | let config: Configuration = crate::config::user_lang_config() 196 | .context("Could not parse languages.toml")? 197 | .try_into()?; 198 | 199 | let grammars = match config.grammar_selection { 200 | Some(GrammarSelection::Only { only: selections }) => config 201 | .grammar 202 | .into_iter() 203 | .filter(|grammar| selections.contains(&grammar.grammar_id)) 204 | .collect(), 205 | Some(GrammarSelection::Except { except: rejections }) => config 206 | .grammar 207 | .into_iter() 208 | .filter(|grammar| !rejections.contains(&grammar.grammar_id)) 209 | .collect(), 210 | None => config.grammar, 211 | }; 212 | 213 | Ok(grammars) 214 | } 215 | 216 | fn run_parallel(grammars: Vec, job: F) -> Vec<(String, Result)> 217 | where 218 | F: Fn(GrammarConfiguration) -> Result + Send + 'static + Clone, 219 | Res: Send + 'static, 220 | { 221 | let (tx, rx) = channel(); 222 | let mut handles = Vec::new(); 223 | 224 | for grammar in grammars { 225 | let tx = tx.to_owned(); 226 | let job = job.to_owned(); 227 | 228 | let handle = thread::spawn(move || { 229 | let result = (grammar.grammar_id.clone(), job(grammar)); 230 | let _ = tx.send(result); 231 | }); 232 | 233 | handles.push(handle); 234 | } 235 | 236 | for handle in handles { 237 | let _ = handle.join(); 238 | } 239 | 240 | drop(tx); // not necessary, but makes it explicit that we're done with the sender 241 | rx.iter().collect() 242 | } 243 | 244 | enum FetchStatus { 245 | GitUpToDate, 246 | GitUpdated { revision: String }, 247 | NonGit, 248 | } 249 | 250 | fn fetch_grammar(grammar: GrammarConfiguration) -> Result { 251 | if let GrammarSource::Git { 252 | remote, revision, .. 253 | } = grammar.source 254 | { 255 | let grammar_dir = crate::runtime_dirs() 256 | .first() 257 | .expect("No runtime directories provided") // guaranteed by post-condition 258 | .join("grammars") 259 | .join("sources") 260 | .join(&grammar.grammar_id); 261 | 262 | fs::create_dir_all(&grammar_dir).context(format!( 263 | "Could not create grammar directory {:?}", 264 | grammar_dir 265 | ))?; 266 | 267 | // create the grammar dir contains a git directory 268 | if !grammar_dir.join(".git").exists() { 269 | git(&grammar_dir, ["init"])?; 270 | } 271 | 272 | // ensure the remote matches the configured remote 273 | if get_remote_url(&grammar_dir).map_or(true, |s| s != remote) { 274 | set_remote(&grammar_dir, &remote)?; 275 | } 276 | 277 | // ensure the revision matches the configured revision 278 | if get_revision(&grammar_dir).map_or(true, |s| s != revision) { 279 | // Fetch the exact revision from the remote. 280 | // Supported by server-side git since v2.5.0 (July 2015), 281 | // enabled by default on major git hosts. 282 | git( 283 | &grammar_dir, 284 | ["fetch", "--depth", "1", REMOTE_NAME, &revision], 285 | )?; 286 | git(&grammar_dir, ["checkout", &revision])?; 287 | 288 | Ok(FetchStatus::GitUpdated { revision }) 289 | } else { 290 | Ok(FetchStatus::GitUpToDate) 291 | } 292 | } else { 293 | Ok(FetchStatus::NonGit) 294 | } 295 | } 296 | 297 | // Sets the remote for a repository to the given URL, creating the remote if 298 | // it does not yet exist. 299 | fn set_remote(repository_dir: &Path, remote_url: &str) -> Result { 300 | git( 301 | repository_dir, 302 | ["remote", "set-url", REMOTE_NAME, remote_url], 303 | ) 304 | .or_else(|_| git(repository_dir, ["remote", "add", REMOTE_NAME, remote_url])) 305 | } 306 | 307 | fn get_remote_url(repository_dir: &Path) -> Option { 308 | git(repository_dir, ["remote", "get-url", REMOTE_NAME]).ok() 309 | } 310 | 311 | fn get_revision(repository_dir: &Path) -> Option { 312 | git(repository_dir, ["rev-parse", "HEAD"]).ok() 313 | } 314 | 315 | // A wrapper around 'git' commands which returns stdout in success and a 316 | // helpful error message showing the command, stdout, and stderr in error. 317 | fn git(repository_dir: &Path, args: I) -> Result 318 | where 319 | I: IntoIterator, 320 | S: AsRef, 321 | { 322 | let output = Command::new("git") 323 | .args(args) 324 | .current_dir(repository_dir) 325 | .output()?; 326 | 327 | if output.status.success() { 328 | Ok(String::from_utf8_lossy(&output.stdout) 329 | .trim_end() 330 | .to_owned()) 331 | } else { 332 | // TODO: figure out how to display the git command using `args` 333 | Err(anyhow!( 334 | "Git command failed.\nStdout: {}\nStderr: {}", 335 | String::from_utf8_lossy(&output.stdout), 336 | String::from_utf8_lossy(&output.stderr), 337 | )) 338 | } 339 | } 340 | 341 | enum BuildStatus { 342 | AlreadyBuilt, 343 | Built, 344 | } 345 | 346 | fn build_grammar(grammar: GrammarConfiguration, target: Option<&str>) -> Result { 347 | let grammar_dir = if let GrammarSource::Local { path } = &grammar.source { 348 | PathBuf::from(&path) 349 | } else { 350 | crate::runtime_dirs() 351 | .first() 352 | .expect("No runtime directories provided") // guaranteed by post-condition 353 | .join("grammars") 354 | .join("sources") 355 | .join(&grammar.grammar_id) 356 | }; 357 | 358 | let grammar_dir_entries = grammar_dir.read_dir().with_context(|| { 359 | format!( 360 | "Failed to read directory {:?}. Did you use 'hx --grammar fetch'?", 361 | grammar_dir 362 | ) 363 | })?; 364 | 365 | if grammar_dir_entries.count() == 0 { 366 | return Err(anyhow!( 367 | "Directory {:?} is empty. Did you use 'hx --grammar fetch'?", 368 | grammar_dir 369 | )); 370 | }; 371 | 372 | let path = match &grammar.source { 373 | GrammarSource::Git { 374 | subpath: Some(subpath), 375 | .. 376 | } => grammar_dir.join(subpath), 377 | _ => grammar_dir, 378 | } 379 | .join("src"); 380 | 381 | build_tree_sitter_library(&path, grammar, target) 382 | } 383 | 384 | fn build_tree_sitter_library( 385 | src_path: &Path, 386 | grammar: GrammarConfiguration, 387 | target: Option<&str>, 388 | ) -> Result { 389 | let header_path = src_path; 390 | let parser_path = src_path.join("parser.c"); 391 | let mut scanner_path = src_path.join("scanner.c"); 392 | 393 | let scanner_path = if scanner_path.exists() { 394 | Some(scanner_path) 395 | } else { 396 | scanner_path.set_extension("cc"); 397 | if scanner_path.exists() { 398 | Some(scanner_path) 399 | } else { 400 | None 401 | } 402 | }; 403 | let parser_lib_path = crate::runtime_dirs() 404 | .first() 405 | .expect("No runtime directories provided") // guaranteed by post-condition 406 | .join("grammars"); 407 | let mut library_path = parser_lib_path.join(&grammar.grammar_id); 408 | library_path.set_extension(DYLIB_EXTENSION); 409 | 410 | // if we are running inside a buildscript emit cargo metadata 411 | // to detect if we are running from a buildscript check some env variables 412 | // that cargo only sets for build scripts 413 | if std::env::var("OUT_DIR").is_ok() && std::env::var("CARGO").is_ok() { 414 | if let Some(scanner_path) = scanner_path.as_ref().and_then(|path| path.to_str()) { 415 | println!("cargo:rerun-if-changed={scanner_path}"); 416 | } 417 | if let Some(parser_path) = parser_path.to_str() { 418 | println!("cargo:rerun-if-changed={parser_path}"); 419 | } 420 | } 421 | 422 | let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) 423 | .context("Failed to compare source and binary timestamps")?; 424 | 425 | if !recompile { 426 | return Ok(BuildStatus::AlreadyBuilt); 427 | } 428 | 429 | let mut config = cc::Build::new(); 430 | config 431 | .cpp(true) 432 | .opt_level(3) 433 | .cargo_metadata(false) 434 | .host(BUILD_TARGET) 435 | .target(target.unwrap_or(BUILD_TARGET)); 436 | let compiler = config.get_compiler(); 437 | let mut command = Command::new(compiler.path()); 438 | command.current_dir(src_path); 439 | for (key, value) in compiler.env() { 440 | command.env(key, value); 441 | } 442 | 443 | command.args(compiler.args()); 444 | // used to delay dropping the temporary object file until after the compilation is complete 445 | let _path_guard; 446 | 447 | if compiler.is_like_msvc() { 448 | command 449 | .args(["/nologo", "/LD", "/I"]) 450 | .arg(header_path) 451 | .arg("/Od") 452 | .arg("/utf-8") 453 | .arg("/std:c11"); 454 | if let Some(scanner_path) = scanner_path.as_ref() { 455 | if scanner_path.extension() == Some("c".as_ref()) { 456 | command.arg(scanner_path); 457 | } else { 458 | let mut cpp_command = Command::new(compiler.path()); 459 | cpp_command.current_dir(src_path); 460 | for (key, value) in compiler.env() { 461 | cpp_command.env(key, value); 462 | } 463 | cpp_command.args(compiler.args()); 464 | let object_file = 465 | library_path.with_file_name(format!("{}_scanner.obj", &grammar.grammar_id)); 466 | cpp_command 467 | .args(["/nologo", "/LD", "/I"]) 468 | .arg(header_path) 469 | .arg("/Od") 470 | .arg("/utf-8") 471 | .arg("/std:c++14") 472 | .arg(format!("/Fo{}", object_file.display())) 473 | .arg("/c") 474 | .arg(scanner_path); 475 | let output = cpp_command 476 | .output() 477 | .context("Failed to execute C++ compiler")?; 478 | 479 | if !output.status.success() { 480 | return Err(anyhow!( 481 | "Parser compilation failed.\nStdout: {}\nStderr: {}", 482 | String::from_utf8_lossy(&output.stdout), 483 | String::from_utf8_lossy(&output.stderr) 484 | )); 485 | } 486 | command.arg(&object_file); 487 | _path_guard = TempPath::from_path(object_file); 488 | } 489 | } 490 | 491 | command 492 | .arg(parser_path) 493 | .arg("/link") 494 | .arg(format!("/out:{}", library_path.to_str().unwrap())); 495 | } else { 496 | command 497 | .arg("-shared") 498 | .arg("-fPIC") 499 | .arg("-fno-exceptions") 500 | .arg("-I") 501 | .arg(header_path) 502 | .arg("-o") 503 | .arg(&library_path); 504 | 505 | if let Some(scanner_path) = scanner_path.as_ref() { 506 | if scanner_path.extension() == Some("c".as_ref()) { 507 | command.arg("-xc").arg("-std=c11").arg(scanner_path); 508 | } else { 509 | let mut cpp_command = Command::new(compiler.path()); 510 | cpp_command.current_dir(src_path); 511 | for (key, value) in compiler.env() { 512 | cpp_command.env(key, value); 513 | } 514 | cpp_command.args(compiler.args()); 515 | let object_file = 516 | library_path.with_file_name(format!("{}_scanner.o", &grammar.grammar_id)); 517 | cpp_command 518 | .arg("-fPIC") 519 | .arg("-fno-exceptions") 520 | .arg("-I") 521 | .arg(header_path) 522 | .arg("-o") 523 | .arg(&object_file) 524 | .arg("-std=c++14") 525 | .arg("-c") 526 | .arg(scanner_path); 527 | let output = cpp_command 528 | .output() 529 | .context("Failed to execute C++ compiler")?; 530 | if !output.status.success() { 531 | return Err(anyhow!( 532 | "Parser compilation failed.\nStdout: {}\nStderr: {}", 533 | String::from_utf8_lossy(&output.stdout), 534 | String::from_utf8_lossy(&output.stderr) 535 | )); 536 | } 537 | 538 | command.arg(&object_file); 539 | _path_guard = TempPath::from_path(object_file); 540 | } 541 | } 542 | command.arg("-xc").arg("-std=c11").arg(parser_path); 543 | if cfg!(all( 544 | unix, 545 | not(any(target_os = "macos", target_os = "illumos")) 546 | )) { 547 | command.arg("-Wl,-z,relro,-z,now"); 548 | } 549 | } 550 | 551 | let output = command 552 | .output() 553 | .context("Failed to execute C/C++ compiler")?; 554 | if !output.status.success() { 555 | return Err(anyhow!( 556 | "Parser compilation failed.\nStdout: {}\nStderr: {}", 557 | String::from_utf8_lossy(&output.stdout), 558 | String::from_utf8_lossy(&output.stderr) 559 | )); 560 | } 561 | 562 | Ok(BuildStatus::Built) 563 | } 564 | 565 | fn needs_recompile( 566 | lib_path: &Path, 567 | parser_c_path: &Path, 568 | scanner_path: &Option, 569 | ) -> Result { 570 | if !lib_path.exists() { 571 | return Ok(true); 572 | } 573 | let lib_mtime = mtime(lib_path)?; 574 | if mtime(parser_c_path)? > lib_mtime { 575 | return Ok(true); 576 | } 577 | if let Some(scanner_path) = scanner_path { 578 | if mtime(scanner_path)? > lib_mtime { 579 | return Ok(true); 580 | } 581 | } 582 | Ok(false) 583 | } 584 | 585 | fn mtime(path: &Path) -> Result { 586 | Ok(fs::metadata(path)?.modified()?) 587 | } 588 | 589 | /// Gives the contents of a file from a language's `runtime/queries/` 590 | /// directory 591 | pub fn load_runtime_file(language: &str, filename: &str) -> Result { 592 | let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename)); 593 | std::fs::read_to_string(path) 594 | } 595 | -------------------------------------------------------------------------------- /src/language.rs: -------------------------------------------------------------------------------- 1 | #[derive(PartialEq)] 2 | pub enum Language { 3 | Rust, 4 | Python, 5 | Ruby, 6 | Cpp, 7 | TypeScript, 8 | JavaScript, 9 | Other(String), 10 | } 11 | 12 | impl Language { 13 | pub fn as_str(&self) -> &str { 14 | match self { 15 | Self::Rust => "rust", 16 | Self::Python => "python", 17 | Self::Ruby => "ruby", 18 | Self::Cpp => "cpp", 19 | Self::TypeScript => "typescript", 20 | Self::JavaScript => "javascript", 21 | Self::Other(ref language) => language.as_str(), 22 | } 23 | } 24 | 25 | #[inline] 26 | pub fn from_extension(extension: &str) -> Self { 27 | match extension { 28 | "rs" => Self::Rust, 29 | "py" => Self::Python, 30 | "rb" => Self::Ruby, 31 | "cpp" => Self::Cpp, 32 | "h" => Self::Cpp, 33 | "hpp" => Self::Cpp, 34 | "ts" => Self::TypeScript, 35 | "js" => Self::JavaScript, 36 | other_extension => Self::Other(other_extension.to_string()), 37 | } 38 | } 39 | 40 | /// language specific tree-sitter node types 41 | pub fn top_level_node_type(&self) -> &str { 42 | match self { 43 | Language::Rust => "source_file", 44 | Language::Python => "module", 45 | Language::Ruby | Language::JavaScript | Language::TypeScript => "program", 46 | Language::Cpp => "translation_unit", 47 | _ => "", 48 | } 49 | } 50 | 51 | pub fn decorator_node_type(&self) -> &str { 52 | match self { 53 | Language::Rust => "attribute_item", 54 | Language::Python | Language::Ruby | Language::Cpp => "null", 55 | Language::TypeScript | Language::JavaScript => "decorator", 56 | _ => "", 57 | } 58 | } 59 | 60 | pub fn comment_node_type(&self) -> &str { 61 | match self { 62 | Language::Rust => "line_comment", 63 | Language::Python 64 | | Language::Ruby 65 | | Language::Cpp 66 | | Language::TypeScript 67 | | Language::JavaScript => "comment", 68 | _ => "", 69 | } 70 | } 71 | 72 | pub fn scannable_node_types(&self) -> Vec<&str> { 73 | let mut scannable = self.ignorable_node_types(); 74 | let mut commentable = self.commentable_node_types(); 75 | scannable.append(&mut commentable); 76 | scannable 77 | } 78 | 79 | pub fn ignorable_node_types(&self) -> Vec<&str> { 80 | match self { 81 | Language::Rust => vec![ 82 | "type_item", 83 | "static_item", 84 | "extern_crate_declaration", 85 | "const_item", 86 | "use_declaration", 87 | "expression_statement", 88 | "macro_invocation", 89 | "foreign_mod_item", // extern "C" 90 | ], 91 | Language::TypeScript | Language::JavaScript => { 92 | vec!["string_fragment", "import_specifier", "named_imports"] 93 | } 94 | _ => vec![], 95 | } 96 | } 97 | 98 | pub fn commentable_node_types(&self) -> Vec<&str> { 99 | match self { 100 | Language::Rust => vec![ 101 | "attribute_item", 102 | "mod_item", 103 | "enum_item", 104 | "impl_item", 105 | "function_item", 106 | "struct_item", 107 | "trait_item", 108 | "macro_definition", 109 | ], 110 | Language::Python => vec![ 111 | "class_definition", 112 | "function_definition", 113 | "decorated_definition", 114 | ], 115 | Language::Ruby => vec!["class", "method", "function", "module"], 116 | Language::Cpp => vec![ 117 | "namespace_definition", 118 | "function_definition", 119 | "class_specifier", 120 | ], 121 | Language::TypeScript | Language::JavaScript => vec![ 122 | "enum_declaration", 123 | "function_declaration", 124 | "class_declaration", 125 | "method_definition", 126 | "interface_declaration", 127 | "export_statement", 128 | // "variable_declaration", 129 | "expression_statement", // namespace 130 | ], 131 | _ => vec![], 132 | } 133 | } 134 | 135 | pub fn nested_traversable_symbols(&self) -> Vec<&str> { 136 | match self { 137 | Language::Rust => vec!["mod_item", "impl_item"], 138 | Language::Python => vec!["class_definition"], 139 | Language::Ruby => vec!["class", "module"], 140 | Language::Cpp => vec!["namespace_definition", "class_specifier"], 141 | Language::TypeScript | Language::JavaScript => vec![ 142 | "class_declaration", 143 | "expression_statement", 144 | "internal_module", 145 | ], 146 | _ => vec![], 147 | } 148 | } 149 | } 150 | 151 | impl From<&str> for Language { 152 | fn from(language_name: &str) -> Self { 153 | match language_name { 154 | "rust" => Self::Rust, 155 | "python" => Self::Python, 156 | "ruby" => Self::Ruby, 157 | "cpp" => Self::Cpp, 158 | "typescript" => Self::TypeScript, 159 | "javascript" => Self::JavaScript, 160 | other_language => Self::Other(other_language.to_string()), 161 | } 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // This is referred from the helix codebase: 2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/lib.rs 3 | pub mod analyzer; 4 | pub mod commands; 5 | pub mod config; 6 | pub mod grammar; 7 | pub mod language; 8 | pub mod scanner; 9 | pub mod tokens; 10 | pub mod tree_sitter_extended; 11 | pub mod utils; 12 | 13 | use etcetera::base_strategy::{choose_base_strategy, BaseStrategy}; 14 | use std::path::{Path, PathBuf}; 15 | use toml::{map::Map, Value}; 16 | 17 | static RUNTIME_DIRS: once_cell::sync::Lazy> = 18 | once_cell::sync::Lazy::new(prioritize_runtime_dirs); 19 | 20 | static CONFIG_FILE: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); 21 | 22 | pub fn initialize_config_file(specified_file: Option) { 23 | let config_file = specified_file.unwrap_or_else(|| { 24 | let config_dir = config_dir(); 25 | 26 | if !config_dir.exists() { 27 | std::fs::create_dir_all(&config_dir).ok(); 28 | } 29 | 30 | config_dir.join("config.toml") 31 | }); 32 | 33 | // We should only initialize this value once. 34 | CONFIG_FILE.set(config_file).ok(); 35 | } 36 | 37 | /// A list of runtime directories from highest to lowest priority 38 | /// 39 | /// The priority is: 40 | /// 41 | /// 1. sibling directory to `CARGO_MANIFEST_DIR` (if environment variable is set) 42 | /// 2. subdirectory of user config directory (always included) 43 | /// 3. `BALPAN_RUNTIME` (if environment variable is set) 44 | /// 4. subdirectory of path to balpan executable (always included) 45 | /// 46 | /// Postcondition: returns at least two paths (they might not exist). 47 | fn prioritize_runtime_dirs() -> Vec { 48 | const RT_DIR: &str = "runtime"; 49 | // Adding higher priority first 50 | let mut rt_dirs = Vec::new(); 51 | if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") { 52 | // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent 53 | let path = PathBuf::from(dir).parent().unwrap().join(RT_DIR); 54 | log::debug!("runtime dir: {}", path.to_string_lossy()); 55 | rt_dirs.push(path); 56 | } 57 | 58 | let conf_rt_dir = config_dir().join(RT_DIR); 59 | rt_dirs.push(conf_rt_dir); 60 | 61 | if let Ok(dir) = std::env::var("BALPAN_RUNTIME") { 62 | rt_dirs.push(dir.into()); 63 | } 64 | 65 | // fallback to location of the executable being run 66 | // canonicalize the path in case the executable is symlinked 67 | let exe_rt_dir = std::env::current_exe() 68 | .ok() 69 | .and_then(|path| std::fs::canonicalize(path).ok()) 70 | .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR))) 71 | .unwrap(); 72 | rt_dirs.push(exe_rt_dir); 73 | rt_dirs 74 | } 75 | 76 | /// Runtime directories ordered from highest to lowest priority 77 | /// 78 | /// All directories should be checked when looking for files. 79 | /// 80 | /// Postcondition: returns at least one path (it might not exist). 81 | pub fn runtime_dirs() -> &'static [PathBuf] { 82 | &RUNTIME_DIRS 83 | } 84 | 85 | /// Find file with path relative to runtime directory 86 | /// 87 | /// `rel_path` should be the relative path from within the `runtime/` directory. 88 | /// The valid runtime directories are searched in priority order and the first 89 | /// file found to exist is returned, otherwise None. 90 | fn find_runtime_file(rel_path: &Path) -> Option { 91 | RUNTIME_DIRS.iter().find_map(|rt_dir| { 92 | let path = rt_dir.join(rel_path); 93 | if path.exists() { 94 | return Some(path); 95 | } 96 | 97 | None 98 | }) 99 | } 100 | 101 | /// Find file with path relative to runtime directory 102 | /// 103 | /// `rel_path` should be the relative path from within the `runtime/` directory. 104 | /// The valid runtime directories are searched in priority order and the first 105 | /// file found to exist is returned, otherwise the path to the final attempt 106 | /// that failed. 107 | pub fn runtime_file(rel_path: &Path) -> PathBuf { 108 | find_runtime_file(rel_path).unwrap_or_else(|| { 109 | RUNTIME_DIRS 110 | .last() 111 | .map(|dir| dir.join(rel_path)) 112 | .unwrap_or_default() 113 | }) 114 | } 115 | 116 | enum StrategyType { 117 | Config, 118 | Cache, 119 | } 120 | 121 | fn get_dir(target: StrategyType) -> PathBuf { 122 | let target_str = match target { 123 | StrategyType::Config => "config", 124 | StrategyType::Cache => "cache", 125 | }; 126 | 127 | // Check if the directory override environment variable is set 128 | if let Ok(dir) = std::env::var(format!("BALPAN_{}_DIR", target_str.to_uppercase())) { 129 | return PathBuf::from(dir); 130 | } 131 | 132 | let strategy = choose_base_strategy() 133 | .unwrap_or_else(|_| panic!("Unable to find the {target_str} directory strategy!")); 134 | let mut path = match target { 135 | StrategyType::Config => strategy.config_dir(), 136 | StrategyType::Cache => strategy.cache_dir(), 137 | }; 138 | 139 | path.push("balpan"); 140 | 141 | path 142 | } 143 | 144 | pub fn config_dir() -> PathBuf { 145 | get_dir(StrategyType::Config) 146 | } 147 | 148 | pub fn cache_dir() -> PathBuf { 149 | get_dir(StrategyType::Cache) 150 | } 151 | 152 | pub fn config_file() -> PathBuf { 153 | CONFIG_FILE 154 | .get() 155 | .map(|path| path.to_path_buf()) 156 | .unwrap_or_else(|| config_dir().join("config.toml")) 157 | } 158 | 159 | pub fn workspace_config_file() -> PathBuf { 160 | find_workspace().0.join(".balpan").join("config.toml") 161 | } 162 | 163 | pub fn lang_config_file() -> PathBuf { 164 | config_dir().join("languages.toml") 165 | } 166 | 167 | pub fn log_file() -> PathBuf { 168 | cache_dir().join("balpan.log") 169 | } 170 | 171 | fn get_name(v: &Value) -> Option<&str> { 172 | v.get("name").and_then(Value::as_str) 173 | } 174 | 175 | /// Merge two TOML documents, merging values from `right` onto `left` 176 | /// 177 | /// When an array exists in both `left` and `right`, `right`'s array is 178 | /// used. When a table exists in both `left` and `right`, the merged table 179 | /// consists of all keys in `left`'s table unioned with all keys in `right` 180 | /// with the values of `right` being merged recursively onto values of 181 | /// `left`. 182 | /// 183 | /// `merge_toplevel_arrays` controls whether a top-level array in the TOML 184 | /// document is merged instead of overridden. This is useful for TOML 185 | /// documents that use a top-level array of values like the `languages.toml`, 186 | /// where one usually wants to override or add to the array instead of 187 | /// replacing it altogether. 188 | pub fn merge_toml_values(left: toml::Value, right: toml::Value, merge_depth: usize) -> toml::Value { 189 | match (left, right) { 190 | (Value::Array(left_items), Value::Array(right_items)) => { 191 | toml_array_value(merge_depth, left_items, right_items) 192 | } 193 | (Value::Table(left_map), Value::Table(right_map)) => { 194 | toml_table_value(merge_depth, left_map, right_map) 195 | } 196 | // Catch everything else we didn't handle, and use the right value 197 | (_, value) => value, 198 | } 199 | } 200 | 201 | fn toml_array_value( 202 | merge_depth: usize, 203 | mut left_items: Vec, 204 | right_items: Vec, 205 | ) -> toml::Value { 206 | // The top-level arrays should be merged but nested arrays should 207 | // act as overrides. For the `languages.toml` config, this means 208 | // that you can specify a sub-set of languages in an overriding 209 | // `languages.toml` but that nested arrays like Language Server 210 | // arguments are replaced instead of merged. 211 | if merge_depth == 0 { 212 | return Value::Array(right_items); 213 | } 214 | 215 | left_items.reserve(right_items.len()); 216 | 217 | for r_val in right_items { 218 | let l_val = get_name(&r_val) 219 | .and_then(|r_name| left_items.iter().position(|v| get_name(v) == Some(r_name))) 220 | .map(|l_pos| left_items.remove(l_pos)); 221 | 222 | let m_val = match l_val { 223 | Some(l) => merge_toml_values(l, r_val, merge_depth - 1), 224 | None => r_val, 225 | }; 226 | 227 | left_items.push(m_val); 228 | } 229 | 230 | Value::Array(left_items) 231 | } 232 | 233 | fn toml_table_value( 234 | merge_depth: usize, 235 | mut left_map: Map, 236 | right_map: Map, 237 | ) -> toml::Value { 238 | if merge_depth == 0 { 239 | return Value::Table(right_map); 240 | } 241 | 242 | for (r_name, r_val) in right_map { 243 | match left_map.remove(&r_name) { 244 | Some(l_val) => { 245 | let merged_val = merge_toml_values(l_val, r_val, merge_depth - 1); 246 | left_map.insert(r_name, merged_val); 247 | } 248 | None => { 249 | left_map.insert(r_name, r_val); 250 | } 251 | } 252 | } 253 | 254 | Value::Table(left_map) 255 | } 256 | 257 | /// Finds the current workspace folder. 258 | /// Used as a ceiling dir for LSP root resolution, the filepicker and potentially as a future filewatching root 259 | /// 260 | /// This function starts searching the FS upward from the CWD 261 | /// and returns the first directory that contains either `.git` or `.balpan`. 262 | /// If no workspace was found returns (CWD, true). 263 | /// Otherwise (workspace, false) is returned 264 | pub fn find_workspace() -> (PathBuf, bool) { 265 | let current_dir = std::env::current_dir().expect("unable to determine current directory"); 266 | for ancestor in current_dir.ancestors() { 267 | if ancestor.join(".git").exists() || ancestor.join(".balpan").exists() { 268 | return (ancestor.to_owned(), false); 269 | } 270 | } 271 | 272 | (current_dir, true) 273 | } 274 | 275 | #[cfg(test)] 276 | mod merge_toml_tests { 277 | use std::str; 278 | 279 | use super::merge_toml_values; 280 | use toml::Value; 281 | 282 | #[test] 283 | fn language_toml_map_merges() { 284 | const USER: &str = r#" 285 | [[language]] 286 | name = "nix" 287 | test = "bbb" 288 | indent = { tab-width = 4, unit = " ", test = "aaa" } 289 | "#; 290 | 291 | let base = include_bytes!("../languages.toml"); 292 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config"); 293 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config"); 294 | let user: Value = toml::from_str(USER).unwrap(); 295 | 296 | let merged = merge_toml_values(base, user, 3); 297 | let languages = merged.get("language").unwrap().as_array().unwrap(); 298 | let nix = languages 299 | .iter() 300 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix") 301 | .unwrap(); 302 | let nix_indent = nix.get("indent").unwrap(); 303 | 304 | // We changed tab-width and unit in indent so check them if they are the new values 305 | assert_eq!( 306 | nix_indent.get("tab-width").unwrap().as_integer().unwrap(), 307 | 4 308 | ); 309 | assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " "); 310 | // We added a new keys, so check them 311 | assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb"); 312 | assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa"); 313 | // We didn't change comment-token so it should be same 314 | assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#"); 315 | } 316 | 317 | #[test] 318 | fn language_toml_nested_array_merges() { 319 | const USER: &str = r#" 320 | [[language]] 321 | name = "typescript" 322 | language-server = { command = "deno", args = ["lsp"] } 323 | "#; 324 | 325 | let base = include_bytes!("../languages.toml"); 326 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config"); 327 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config"); 328 | let user: Value = toml::from_str(USER).unwrap(); 329 | 330 | let merged = merge_toml_values(base, user, 3); 331 | let languages = merged.get("language").unwrap().as_array().unwrap(); 332 | let ts = languages 333 | .iter() 334 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "typescript") 335 | .unwrap(); 336 | assert_eq!( 337 | ts.get("language-server") 338 | .unwrap() 339 | .get("args") 340 | .unwrap() 341 | .as_array() 342 | .unwrap(), 343 | &vec![Value::String("lsp".into())] 344 | ) 345 | } 346 | 347 | #[test] 348 | fn allow_env_variable_override() { 349 | const USER: &str = r#" 350 | [[language]] 351 | name = "typescript" 352 | language-server = { command = "deno", args = ["lsp"] } 353 | "#; 354 | 355 | let base = include_bytes!("../languages.toml"); 356 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config"); 357 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config"); 358 | let user: Value = toml::from_str(USER).unwrap(); 359 | 360 | std::env::set_var("BALPAN_CONFIG_DIR", "/tmp"); 361 | let merged = merge_toml_values(base, user, 3); 362 | std::env::remove_var("BALPAN_CONFIG_DIR"); 363 | 364 | let languages = merged.get("language").unwrap().as_array().unwrap(); 365 | let ts = languages 366 | .iter() 367 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "typescript") 368 | .unwrap(); 369 | assert_eq!( 370 | ts.get("language-server") 371 | .unwrap() 372 | .get("args") 373 | .unwrap() 374 | .as_array() 375 | .unwrap(), 376 | &vec![Value::String("lsp".into())] 377 | ) 378 | } 379 | } 380 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::time::Instant; 3 | 4 | use balpan::commands::pattern_search::PatternTree; 5 | use clap::{Parser, Subcommand}; 6 | use glob::glob; 7 | 8 | use balpan::commands::grep::GrepReport; 9 | use balpan::scanner::Scanner; 10 | use balpan::utils::{get_current_repository, list_available_files, suggest_subcommand}; 11 | use git2::Repository; 12 | use tokio::runtime::{Builder, Runtime}; 13 | 14 | #[derive(Debug, Parser)] 15 | #[command(author, about, version, long_about = None)] 16 | struct BalpanApp { 17 | #[clap(subcommand)] 18 | command: BalpanCommand, 19 | } 20 | 21 | #[derive(Debug, Subcommand)] 22 | enum BalpanCommand { 23 | #[clap(about = "Setup environment for Balpan and fetch all available treesitter parsers")] 24 | Init, 25 | #[clap(about = "Reset environment for Balpan and removes all TODO comments")] 26 | Reset, 27 | #[clap( 28 | about = "Searches a particular pattern of characters, and displays all lines that contain that pattern" 29 | )] 30 | Grep { 31 | #[clap(short = 'f', long, help = "Specific file to scan")] 32 | file: Option, 33 | #[clap(short = 'p', long, help = "Specific pattern to search")] 34 | pattern: Option, 35 | #[clap( 36 | long, 37 | help = "Apply formatting to the output. Available options: json, tree, plain (default)" 38 | )] 39 | #[clap( 40 | short = 'i', 41 | long = "ignore", 42 | help = "ignores the case(upper or lower) of the pattern." 43 | )] 44 | ignore_case: Option>, 45 | #[clap( 46 | short = 'H', 47 | help = "Display the matched lines, but do not display the filenames." 48 | )] 49 | hide_path: bool, 50 | #[clap( 51 | short = 'l', 52 | help = "Display the names of files that contain matches, without displaying the matched lines." 53 | )] 54 | list_of_files: bool, 55 | #[clap( 56 | short = 'c', 57 | help = "This prints only a count of the lines that match a pattern." 58 | )] 59 | count: bool, 60 | #[clap( 61 | short = 'T', 62 | long = "time", 63 | help = "Display the elapsed time during the execution of the command." 64 | )] 65 | show_elapsed_time: bool, 66 | #[clap(short = 'o', help = "Colorize the matched pattern in the output.")] 67 | colorize: bool, 68 | #[clap( 69 | short = 'E', 70 | help = "Treats pattern as an extended regular expression (ERE)." 71 | )] 72 | extended_regex: bool, 73 | format: Option, 74 | }, 75 | #[clap(about = "Generate a TODO comment for specific file")] 76 | Analyze { 77 | #[clap(short, long, help = "Specific file to scan")] 78 | pattern: Option, 79 | }, 80 | } 81 | 82 | fn create_runtime() -> Runtime { 83 | Builder::new_current_thread().enable_all().build().unwrap() 84 | } 85 | 86 | fn main() { 87 | let app = BalpanApp::parse(); 88 | 89 | // verify that the subcommand entered is correct. 90 | let user_input: Option = std::env::args().nth(1); 91 | 92 | if let Some(input) = user_input { 93 | if suggest_subcommand(&input).is_some() { 94 | println!("Did you mean '{}'?", suggest_subcommand(&input).unwrap()); 95 | } 96 | } 97 | 98 | match app.command { 99 | BalpanCommand::Init => { 100 | let runtime = create_runtime(); 101 | 102 | runtime.block_on(async { handle_init().await }) 103 | } 104 | BalpanCommand::Reset => handle_reset(), 105 | BalpanCommand::Grep { 106 | file, 107 | pattern, 108 | format, 109 | ignore_case, 110 | hide_path, 111 | list_of_files, 112 | count, 113 | colorize, 114 | extended_regex, 115 | show_elapsed_time: elapsed, 116 | } => { 117 | let time = Instant::now(); 118 | let runtime = create_runtime(); 119 | 120 | let patterns: Option> = 121 | pattern.map(|p| p.split_whitespace().map(|s| s.to_string()).collect()); 122 | 123 | runtime.block_on(async { 124 | let mut report = GrepReport::new(); 125 | handle_grep( 126 | file, 127 | patterns, 128 | &mut report, 129 | format, 130 | ignore_case, 131 | hide_path, 132 | list_of_files, 133 | count, 134 | colorize, 135 | extended_regex, 136 | ) 137 | .await; 138 | }); 139 | 140 | if elapsed { 141 | println!("time: {:?}", time.elapsed()); 142 | } 143 | } 144 | BalpanCommand::Analyze { pattern } => { 145 | match pattern { 146 | Some(ref p) => { 147 | if !p.starts_with('"') || !p.ends_with('"') { 148 | panic!("Invalid file path. Please include double quotes(`\"`) in the path.") 149 | } 150 | } 151 | None => panic!("No file specified. Please specify a file path to analyze"), 152 | } 153 | 154 | let runtime = create_runtime(); 155 | 156 | runtime.block_on(async { 157 | handle_analyze(pattern).await; 158 | }); 159 | } 160 | } 161 | } 162 | 163 | fn git(args: Vec) { 164 | std::process::Command::new("git") 165 | .args(args) 166 | .output() 167 | .unwrap(); 168 | } 169 | 170 | fn find_branch<'a>(repository: &Repository, target: &'a str) -> Option<&'a str> { 171 | let mut iter = repository.branches(None); 172 | 173 | while let Some(Ok((ref branch, _))) = &iter.as_mut().expect("???").next() { 174 | if let Ok(Some(branch_name)) = branch.name() { 175 | if target == branch_name { 176 | return Some(target); 177 | } 178 | } 179 | } 180 | 181 | None 182 | } 183 | 184 | fn find_main_or_master_branch<'a>(repo: &'a Repository, branches: &[&'a str]) -> String { 185 | if branches.is_empty() { 186 | panic!("No main or master branch found"); 187 | } 188 | 189 | if let Some(branch) = find_branch(repo, branches[0]) { 190 | return branch.to_string(); 191 | } 192 | 193 | find_main_or_master_branch(repo, &branches[1..]) 194 | } 195 | 196 | fn handle_reset() { 197 | let repo = get_current_repository().unwrap(); 198 | //let onboarding_branch = find_branch(&repo, "onboarding").to_string(); 199 | let is_already_setup: bool; 200 | 201 | let onboarding_branch = match find_branch(&repo, "onboarding") { 202 | Some(branch) => { 203 | is_already_setup = true; 204 | branch.to_string() 205 | } 206 | None => panic!("No onboarding branch found"), 207 | }; 208 | 209 | let main_branch = find_main_or_master_branch(&repo, &["main", "master"]); 210 | 211 | if is_already_setup { 212 | git(vec!["switch".to_owned(), main_branch]); 213 | git(vec![ 214 | "branch".to_owned(), 215 | "-d".to_owned(), 216 | onboarding_branch, 217 | ]); 218 | } 219 | } 220 | 221 | async fn handle_init() { 222 | let repo = get_current_repository().unwrap(); 223 | let mut is_already_setup: bool = false; 224 | 225 | let _onboarding_branch = match find_branch(&repo, "onboarding") { 226 | Some(branch) => { 227 | is_already_setup = true; 228 | branch.to_string() 229 | } 230 | None => String::new(), 231 | }; 232 | 233 | let main_branch = find_main_or_master_branch(&repo, &["main", "master"]); 234 | 235 | if !is_already_setup { 236 | git(vec!["switch".to_owned(), main_branch.clone()]); 237 | git(vec![ 238 | "switch".to_owned(), 239 | "-c".to_owned(), 240 | "onboarding".to_owned(), 241 | ]); 242 | } 243 | 244 | git(vec!["switch".to_owned(), main_branch]); 245 | git(vec!["switch".to_owned(), "onboarding".to_owned()]); 246 | 247 | Scanner::scan(&repo).await; 248 | println!("init!"); 249 | } 250 | 251 | #[allow(clippy::too_many_arguments)] 252 | async fn handle_grep( 253 | file: Option, 254 | pattern: Option>, 255 | report: &mut GrepReport, 256 | format: Option, 257 | ignore_case: Option>, 258 | hide_path: bool, 259 | list_of_files: bool, 260 | count: bool, 261 | colorize: bool, 262 | extends_regex: bool, 263 | ) { 264 | let mut pattern_tree = PatternTree::new(); 265 | let default_patterns = vec!["[TODO]".to_string(), "[DONE]".to_string()]; 266 | 267 | let patterns_to_search: Vec; 268 | 269 | if extends_regex { 270 | pattern_tree.ignore_case = true; 271 | pattern_tree.regex_flag = true; 272 | } 273 | 274 | match ignore_case { 275 | Some(ignore_patterns) => { 276 | pattern_tree.ignore_case = true; 277 | patterns_to_search = ignore_patterns; 278 | } 279 | None => { 280 | patterns_to_search = pattern.unwrap_or(default_patterns); 281 | } 282 | } 283 | 284 | match file { 285 | Some(file_path) => { 286 | scan_specific_file(file_path, report, &mut pattern_tree, &patterns_to_search).await 287 | } 288 | None => scan_project_directory(report, pattern_tree, patterns_to_search.clone()).await, 289 | } 290 | 291 | let formatting = report.report_formatting( 292 | format, 293 | hide_path, 294 | list_of_files, 295 | count, 296 | patterns_to_search, 297 | colorize, 298 | ); 299 | println!("{}", formatting); 300 | } 301 | 302 | async fn handle_analyze(pattern: Option) { 303 | if pattern.is_none() { 304 | panic!("No file specified. Please specify a file path to analyze") 305 | } 306 | 307 | let file_pattern_str = pattern.unwrap(); 308 | let filter = glob(&file_pattern_str).expect("Failed to read file pattern"); 309 | 310 | for entry in filter { 311 | match entry { 312 | Ok(path) => Scanner::scan_specific_file(path).await, 313 | Err(e) => println!("Error while reading file pattern: {}", e), 314 | } 315 | } 316 | } 317 | 318 | async fn scan_project_directory( 319 | report: &mut GrepReport, 320 | mut pattern_tree: PatternTree, 321 | patterns_to_search: Vec, 322 | ) { 323 | let repo = get_current_repository().expect("No repository found"); 324 | let repo_path = repo.workdir().expect("No workdir found").to_str().unwrap(); 325 | 326 | let available_files: Vec = list_available_files(repo_path).await; 327 | 328 | for file in available_files { 329 | let path = Path::new(&file); 330 | update_report(report, path, &mut pattern_tree, &patterns_to_search).await; 331 | } 332 | } 333 | 334 | async fn scan_specific_file( 335 | file_path: String, 336 | report: &mut GrepReport, 337 | pattern_tree: &mut PatternTree, 338 | patterns_to_search: &Vec, 339 | ) { 340 | let path = Path::new(&file_path); 341 | update_report(report, path, pattern_tree, patterns_to_search).await; 342 | } 343 | 344 | async fn update_report( 345 | report: &mut GrepReport, 346 | path: &Path, 347 | pattern_tree: &mut PatternTree, 348 | patterns_to_search: &Vec, 349 | ) { 350 | report 351 | .grep_file(path, pattern_tree, patterns_to_search) 352 | .await 353 | .unwrap(); 354 | } 355 | -------------------------------------------------------------------------------- /src/scanner.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{Read, Seek, Write}; 3 | use std::path::{Path, PathBuf}; 4 | 5 | use git2::Repository; 6 | 7 | use crate::analyzer::Analyzer; 8 | use crate::grammar::{build_grammars, fetch_grammars}; 9 | use crate::language::Language; 10 | use crate::utils::list_available_files; 11 | 12 | pub struct Scanner; 13 | 14 | impl Scanner { 15 | pub async fn scan(repo: &Repository) { 16 | fetch_grammars().unwrap(); 17 | build_grammars(None).unwrap(); 18 | 19 | if let Some(workdir) = repo.workdir() { 20 | let repo_root = workdir.to_string_lossy(); 21 | let filenames = list_available_files(&repo_root); 22 | for filename in filenames.await { 23 | if filename.contains("test") { 24 | continue; 25 | } 26 | let path = Path::new(&filename); 27 | let language = match path.extension() { 28 | Some(os_str) => Language::from_extension(os_str.to_str().unwrap()), 29 | _ => Language::Other("".to_string()), 30 | }; 31 | 32 | if let Language::Other(_) = language { 33 | continue; 34 | } 35 | 36 | if let Ok(mut file) = File::options().read(true).write(true).open(path) { 37 | let mut source_code = String::new(); 38 | file.read_to_string(&mut source_code).unwrap(); 39 | let with_empty_line = source_code.ends_with('\n'); 40 | let analyzer = Analyzer { 41 | source_code, 42 | language, 43 | }; 44 | 45 | let writer_queue = &analyzer.analyze(); 46 | let mut lines = vec![]; 47 | 48 | for line in writer_queue { 49 | lines.push(String::from(line)); 50 | } 51 | 52 | if with_empty_line { 53 | lines.push(String::new()); 54 | } 55 | 56 | file.set_len(0).unwrap(); 57 | file.rewind().unwrap(); 58 | file.write_all(lines.join("\n").as_bytes()).unwrap(); 59 | } 60 | } 61 | } 62 | } 63 | 64 | /// Scan a specific file and add TODO comments 65 | pub async fn scan_specific_file(path: PathBuf) { 66 | fetch_grammars().unwrap(); 67 | build_grammars(None).unwrap(); 68 | 69 | if let Ok(mut file) = File::options().read(true).write(true).open(path.clone()) { 70 | let mut source_code = String::new(); 71 | file.read_to_string(&mut source_code).unwrap(); 72 | let with_empty_line = source_code.ends_with('\n'); 73 | 74 | let language = match path.extension() { 75 | Some(p) => Language::from_extension(p.to_str().unwrap()), 76 | _ => Language::Other(String::new()), 77 | }; 78 | 79 | let analyzer = Analyzer { 80 | source_code, 81 | language, 82 | }; 83 | 84 | let writer_queue = &analyzer.analyze(); 85 | let mut lines: Vec = vec![]; 86 | 87 | for line in writer_queue { 88 | lines.push(String::from(line)); 89 | } 90 | 91 | if with_empty_line { 92 | lines.push(String::new()); 93 | } 94 | 95 | file.set_len(0).unwrap(); 96 | file.rewind().unwrap(); 97 | file.write_all(lines.join("\n").as_bytes()).unwrap(); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/tokens.rs: -------------------------------------------------------------------------------- 1 | use crate::language::Language; 2 | 3 | pub enum CommentToken { 4 | TripleSlashTODO, 5 | DoubleSlashTODO, 6 | HashTODO, 7 | Other, 8 | } 9 | 10 | impl CommentToken { 11 | pub fn from_language(language: &Language) -> Self { 12 | match language { 13 | Language::Rust | Language::Cpp => CommentToken::TripleSlashTODO, 14 | Language::Python | Language::Ruby => CommentToken::HashTODO, 15 | Language::JavaScript | Language::TypeScript => CommentToken::DoubleSlashTODO, 16 | _ => CommentToken::Other, 17 | } 18 | } 19 | 20 | pub fn to_str(&self) -> &str { 21 | match self { 22 | CommentToken::TripleSlashTODO => "/// [TODO]", 23 | CommentToken::DoubleSlashTODO => "// [TODO]", 24 | CommentToken::HashTODO => "# [TODO]", 25 | CommentToken::Other => "", 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/tree_sitter_extended.rs: -------------------------------------------------------------------------------- 1 | use tree_sitter::{Node, Point, Range}; 2 | 3 | pub trait MembershipCheck { 4 | fn is_before(&self, range: Range) -> bool; 5 | fn is_after(&self, range: Range) -> bool; 6 | fn is_member_of(&self, range: Range) -> bool; 7 | } 8 | 9 | impl MembershipCheck for Point { 10 | fn is_before(&self, range: Range) -> bool { 11 | let start_point = range.start_point; 12 | 13 | if self.row < start_point.row { 14 | return true; 15 | } 16 | 17 | if self.row > start_point.row { 18 | return false; 19 | } 20 | 21 | self.column < start_point.column 22 | } 23 | 24 | fn is_after(&self, range: Range) -> bool { 25 | let end_point = range.end_point; 26 | 27 | if self.row < end_point.row { 28 | return false; 29 | } 30 | 31 | if self.row > end_point.row { 32 | return true; 33 | } 34 | 35 | self.column > end_point.column 36 | } 37 | 38 | fn is_member_of(&self, range: Range) -> bool { 39 | if self.is_before(range) { 40 | return false; 41 | } 42 | 43 | if self.is_after(range) { 44 | return false; 45 | } 46 | 47 | true 48 | } 49 | } 50 | 51 | pub trait RangeFactory { 52 | fn from_node(node: Node) -> Range; 53 | } 54 | 55 | impl RangeFactory for Range { 56 | #[inline] 57 | fn from_node(node: Node) -> Range { 58 | Range { 59 | start_byte: node.start_byte(), 60 | end_byte: node.end_byte(), 61 | start_point: node.start_position(), 62 | end_point: node.end_position(), 63 | } 64 | } 65 | } 66 | 67 | pub trait ResolveSymbol { 68 | fn identifier_range(&self) -> (usize, usize, usize); 69 | } 70 | 71 | impl ResolveSymbol for Node<'_> { 72 | fn identifier_range(&self) -> (usize, usize, usize) { 73 | let simple_cases = [ 74 | "attribute_item", 75 | "use_declaration", 76 | "macro_invocation", 77 | "expression_statement", 78 | "foreign_mod_item", 79 | ]; 80 | 81 | if simple_cases.contains(&self.kind()) { 82 | return (0, 0, 0); 83 | } 84 | 85 | let mut node = self.child_by_field_name("name"); 86 | 87 | if self.kind() == "namespace_definition" && node.is_none() { 88 | return (0, 0, 0); 89 | } 90 | 91 | if self.kind() == "function_definition" { 92 | if let Some(child) = self.child_by_field_name("declarator") { 93 | node = child.child_by_field_name("declarator"); 94 | } 95 | } 96 | 97 | if self.kind() == "method_definition" { 98 | node = self.child_by_field_name("name"); 99 | } 100 | 101 | // case of decorated_definition 102 | if self.kind() == "decorated_definition" { 103 | let definition_node = self.child_by_field_name("definition").unwrap(); 104 | node = definition_node.child_by_field_name("name"); 105 | } 106 | 107 | // case of impl_item 108 | if self.kind() == "impl_item" { 109 | node = self.child_by_field_name("trait"); // impl Foo for Bar 110 | node = match node { 111 | None => self.child_by_field_name("type"), // impl Foo 112 | result => result, 113 | } 114 | } 115 | 116 | // e.g. `export function foo() {}` 117 | if self.kind() == "export_statement" { 118 | // this case handles import statement especially `export * from './compiler_facade_interface';` things. 119 | // I think this is not a good way to handle this case, but I don't know how to handle this case. 120 | if self.child_by_field_name("source").is_some() { 121 | return (0, 0, 0); 122 | } 123 | 124 | if let Some(child) = self.child_by_field_name("declaration") { 125 | node = child.child_by_field_name("name"); 126 | } 127 | } 128 | 129 | let identifier_node = 130 | node.unwrap_or_else(|| panic!("`{}` is an invalid identifier node type", self.kind())); 131 | 132 | let from = identifier_node.start_position().column; 133 | let row = identifier_node.end_position().row; 134 | let to = identifier_node.end_position().column; 135 | 136 | (row, from, to) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | use std::env; 3 | use std::fs::File; 4 | 5 | use git2::Repository; 6 | use ignore::{DirEntry, WalkBuilder}; 7 | use once_cell::sync::Lazy; 8 | use strsim::levenshtein; 9 | 10 | #[rustfmt::skip] 11 | static IGNORED_EXTENSIONS: Lazy> = Lazy::new(|| { 12 | [ 13 | ".tmp", ".bak", ".swp", ".old", ".new", ".orig", ".patch", ".diff", // temporary 14 | ".proj", ".sln", ".classpath", ".project", // project 15 | ".obj", ".exe", ".dll", ".class", ".o", ".e", // binary 16 | ".toml", ".lock", ".json", ".md", ".yaml", ".yml", ".xml", ".ini", // dev config 17 | ".zip", ".tar", ".gz", ".rar", ".7z", ".tgz", ".xz", ".bz2", // compressed 18 | ".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif", // image 19 | ".wav", ".mp3", ".mp4", ".avi", ".mov", ".flv", ".ogg", // audio/video 20 | ".doc", ".docx", ".pdf", ".ppt", ".pptx", ".xls", "xlsx", ".odt", // document 21 | ".yml", ".xml", ".ini", // config 22 | ".log", ".dat", // log 23 | ".yarn", ".npm", // package manager 24 | ] 25 | .iter() 26 | .map(|&s| s.into()) 27 | .collect() 28 | }); 29 | 30 | static IGNORED_PREFIXES: Lazy> = Lazy::new(|| { 31 | ["."].iter().map(|&s| s.into()).collect() // hidden files start with '.' 32 | }); 33 | 34 | pub fn get_current_repository() -> Option { 35 | let current_dir = env::current_dir().ok()?; 36 | let repo = Repository::discover(current_dir).ok()?; 37 | 38 | Some(repo) 39 | } 40 | 41 | pub async fn list_available_files(repo_path: &str) -> Vec { 42 | let mut result = Vec::new(); 43 | 44 | let is_ignored = move |entry: &DirEntry| { 45 | let extension = entry 46 | .path() 47 | .extension() 48 | .and_then(|s| s.to_str()) 49 | .unwrap_or(""); 50 | let file_name = entry 51 | .path() 52 | .file_name() 53 | .and_then(|s| s.to_str()) 54 | .unwrap_or(""); 55 | 56 | IGNORED_EXTENSIONS.contains(&format!(".{}", extension)) 57 | || IGNORED_PREFIXES 58 | .iter() 59 | .any(|prefix| file_name.starts_with(prefix)) 60 | }; 61 | 62 | let walker = WalkBuilder::new(repo_path) 63 | .hidden(true) 64 | .git_ignore(true) 65 | .parents(false) 66 | .filter_entry(move |f| !is_ignored(f)) 67 | .build(); 68 | 69 | for entry in walker.flatten() { 70 | match entry.file_type() { 71 | Some(file_type) if file_type.is_file() => { 72 | if let Ok(_file) = File::open(entry.path()) { 73 | result.push(entry.path().to_string_lossy().to_string()); 74 | } 75 | } 76 | // if file type is directory or other things, just skip it 77 | _ => continue, 78 | } 79 | } 80 | 81 | result 82 | } 83 | 84 | #[rustfmt::skip] 85 | static DICTIONARY: Lazy> = Lazy::new(|| { 86 | vec![ 87 | "init", "reset", "grep", "help", "file", "pattern", "format", "json", "plain", 88 | ] 89 | }); 90 | 91 | pub fn suggest_subcommand(input: &str) -> Option { 92 | let mut closest = None; 93 | let mut smallest_distance = 80; // default maximum line length setting for COBOL 94 | const THRESHOLD: usize = 3; 95 | 96 | for item in &*DICTIONARY { 97 | let distance = levenshtein(input, *item); 98 | match distance { 99 | 0 => return None, 100 | 1..=THRESHOLD if distance < smallest_distance => { 101 | smallest_distance = distance; 102 | closest = Some((*item).to_string()); 103 | } 104 | _ => {} 105 | } 106 | } 107 | 108 | closest 109 | } -------------------------------------------------------------------------------- /tests/analyzer_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod analyzer_test { 3 | mod analyze_test; 4 | } 5 | -------------------------------------------------------------------------------- /tests/analyzer_test/analyze_test.rs: -------------------------------------------------------------------------------- 1 | use balpan::analyzer::Analyzer; 2 | use balpan::grammar::{build_grammars, fetch_grammars}; 3 | use balpan::language::Language; 4 | use indoc::indoc; 5 | 6 | fn assert_analyzed_source_code(source_code: &str, expected: &str, language: &str) { 7 | fetch_grammars().unwrap(); 8 | build_grammars(None).unwrap(); 9 | 10 | let analyzer = Analyzer { 11 | source_code: source_code.to_string(), 12 | language: Language::from(language), 13 | }; 14 | 15 | let writer_queue = &analyzer.analyze(); 16 | let mut string_vector = vec![]; 17 | 18 | for line in writer_queue { 19 | string_vector.push(String::from(line)); 20 | } 21 | 22 | let actual: String = string_vector 23 | // .iter() 24 | // .map( |str| { *str } ) 25 | // .collect::>() 26 | .join("\n"); 27 | 28 | assert_eq!(expected, actual); 29 | } 30 | 31 | #[test] 32 | fn test_stacked_macros() { 33 | let source_code = indoc! {r#" 34 | #[derive(Deserialize)] 35 | #[serde(bound(deserialize = "T: Deserialize<'de>"))] 36 | struct List { 37 | #[serde(deserialize_with = "deserialize_vec")] 38 | items: Vec, 39 | }"#}; 40 | 41 | let result = indoc! {r#" 42 | /// [TODO] List 43 | #[derive(Deserialize)] 44 | #[serde(bound(deserialize = "T: Deserialize<'de>"))] 45 | struct List { 46 | #[serde(deserialize_with = "deserialize_vec")] 47 | items: Vec, 48 | }"#}; 49 | 50 | assert_analyzed_source_code(source_code, result, "rust") 51 | } 52 | 53 | #[test] 54 | fn test_idempotency() { 55 | let source_code = indoc! {r#" 56 | /// [TODO] List 57 | #[derive(Deserialize)] 58 | #[serde(bound(deserialize = "T: Deserialize<'de>"))] 59 | struct List { 60 | #[serde(deserialize_with = "deserialize_vec")] 61 | items: Vec, 62 | }"#}; 63 | 64 | let result = indoc! {r#" 65 | /// [TODO] List 66 | #[derive(Deserialize)] 67 | #[serde(bound(deserialize = "T: Deserialize<'de>"))] 68 | struct List { 69 | #[serde(deserialize_with = "deserialize_vec")] 70 | items: Vec, 71 | }"#}; 72 | 73 | assert_analyzed_source_code(source_code, result, "rust") 74 | } 75 | 76 | #[test] 77 | fn test_idempotency_within_nested_scope() { 78 | let source_code = indoc! {" 79 | # [TODO] Post 80 | class Post(models.Model): 81 | user = models.ForeignKey(User) 82 | 83 | # [TODO] Post > Meta 84 | class Meta: 85 | table_name = 'posts' 86 | 87 | # [TODO] Post > count 88 | @staticmethod 89 | def count(cls): 90 | return cls.count 91 | 92 | # [TODO] Post > author 93 | def author(self): 94 | return self.user 95 | 96 | # [TODO] Comment 97 | class Comment(models.Model): 98 | user = models.ForeignKey(User) 99 | 100 | # [TODO] Comment > Meta 101 | class Meta: 102 | table_name = 'comments' 103 | 104 | # [TODO] Comment > count 105 | @staticmethod 106 | def count(cls): 107 | return cls.count 108 | 109 | # [TODO] Comment > author 110 | def author(self): 111 | return self.user"}; 112 | 113 | let result = indoc! {" 114 | # [TODO] Post 115 | class Post(models.Model): 116 | user = models.ForeignKey(User) 117 | 118 | # [TODO] Post > Meta 119 | class Meta: 120 | table_name = 'posts' 121 | 122 | # [TODO] Post > count 123 | @staticmethod 124 | def count(cls): 125 | return cls.count 126 | 127 | # [TODO] Post > author 128 | def author(self): 129 | return self.user 130 | 131 | # [TODO] Comment 132 | class Comment(models.Model): 133 | user = models.ForeignKey(User) 134 | 135 | # [TODO] Comment > Meta 136 | class Meta: 137 | table_name = 'comments' 138 | 139 | # [TODO] Comment > count 140 | @staticmethod 141 | def count(cls): 142 | return cls.count 143 | 144 | # [TODO] Comment > author 145 | def author(self): 146 | return self.user"}; 147 | 148 | assert_analyzed_source_code(source_code, result, "python") 149 | } 150 | 151 | #[test] 152 | fn test_ignore_todo_test_macro() { 153 | let source_code = indoc! {" 154 | #[cfg(test)] 155 | mod tests { 156 | use super::*; 157 | 158 | #[test] 159 | fn test_foo() { 160 | assert_eq!(foo(), 1); 161 | } 162 | }"}; 163 | 164 | let result = indoc! {" 165 | /// [TODO] tests 166 | #[cfg(test)] 167 | mod tests { 168 | use super::*; 169 | 170 | /// [TODO] tests > test_foo 171 | #[test] 172 | fn test_foo() { 173 | assert_eq!(foo(), 1); 174 | } 175 | }"}; 176 | 177 | assert_analyzed_source_code(source_code, result, "rust") 178 | } 179 | 180 | #[test] 181 | fn test_ignore_doc_macro() { 182 | let source_code = indoc! {r#" 183 | #[doc = "This is a doc comment"] 184 | fn foo() { 185 | println!("foo"); 186 | }"#}; 187 | 188 | let result = indoc! {r#" 189 | /// [TODO] foo 190 | #[doc = "This is a doc comment"] 191 | fn foo() { 192 | println!("foo"); 193 | }"#}; 194 | 195 | assert_analyzed_source_code(source_code, result, "rust") 196 | } 197 | 198 | #[test] 199 | fn test_trait_and_impl() { 200 | let source_code = indoc! { " 201 | pub trait RangeFactory { 202 | fn from_node(node: Node) -> Range; 203 | } 204 | 205 | impl RangeFactory for Range { 206 | #[inline] 207 | fn from_node(node: Node) -> Range { 208 | Range { 209 | start_byte: node.start_byte(), 210 | end_byte: node.end_byte(), 211 | start_point: node.start_position(), 212 | end_point: node.end_position(), 213 | } 214 | } 215 | }"}; 216 | 217 | let result = indoc! { " 218 | /// [TODO] RangeFactory 219 | pub trait RangeFactory { 220 | fn from_node(node: Node) -> Range; 221 | } 222 | 223 | /// [TODO] RangeFactory 224 | impl RangeFactory for Range { 225 | /// [TODO] RangeFactory > from_node 226 | #[inline] 227 | fn from_node(node: Node) -> Range { 228 | Range { 229 | start_byte: node.start_byte(), 230 | end_byte: node.end_byte(), 231 | start_point: node.start_position(), 232 | end_point: node.end_position(), 233 | } 234 | } 235 | }"}; 236 | 237 | assert_analyzed_source_code(source_code, result, "rust") 238 | } 239 | 240 | #[test] 241 | fn test_trait_and_impl_with_mod() { 242 | let source_code = indoc! { " 243 | mod tree_sitter_extended { 244 | pub trait RangeFactory { 245 | fn from_node(node: Node) -> Range; 246 | } 247 | 248 | impl RangeFactory for Range { 249 | #[inline] 250 | fn from_node(node: Node) -> Range { 251 | Range { 252 | start_byte: node.start_byte(), 253 | end_byte: node.end_byte(), 254 | start_point: node.start_position(), 255 | end_point: node.end_position(), 256 | } 257 | } 258 | } 259 | }"}; 260 | 261 | let result = indoc! { " 262 | /// [TODO] tree_sitter_extended 263 | mod tree_sitter_extended { 264 | /// [TODO] tree_sitter_extended > RangeFactory 265 | pub trait RangeFactory { 266 | fn from_node(node: Node) -> Range; 267 | } 268 | 269 | /// [TODO] tree_sitter_extended > RangeFactory 270 | impl RangeFactory for Range { 271 | /// [TODO] tree_sitter_extended > RangeFactory > from_node 272 | #[inline] 273 | fn from_node(node: Node) -> Range { 274 | Range { 275 | start_byte: node.start_byte(), 276 | end_byte: node.end_byte(), 277 | start_point: node.start_position(), 278 | end_point: node.end_position(), 279 | } 280 | } 281 | } 282 | }"}; 283 | 284 | assert_analyzed_source_code(source_code, result, "rust") 285 | } 286 | -------------------------------------------------------------------------------- /tests/integration_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod integration_test { 3 | use balpan::analyzer::Analyzer; 4 | use balpan::grammar::{build_grammars, fetch_grammars}; 5 | use balpan::language::Language; 6 | 7 | mod analyze_command_test; 8 | // mod toggle_command_test; 9 | 10 | pub fn assert_analyzed_source_code(source_code: &str, expected: &str, language: &str) { 11 | fetch_grammars().unwrap(); 12 | build_grammars(None).unwrap(); 13 | 14 | let analyzer = Analyzer { 15 | source_code: source_code.to_string(), 16 | language: Language::from(language), 17 | }; 18 | 19 | let writer_queue = &analyzer.analyze(); 20 | let mut string_vector = vec![]; 21 | 22 | for line in writer_queue { 23 | string_vector.push(String::from(line)); 24 | } 25 | 26 | let actual: String = string_vector.join("\n"); 27 | 28 | if actual != expected { 29 | println!("expected: {}\n\n", expected); 30 | println!("actual: {}\n\n", actual); 31 | } 32 | 33 | assert_eq!(expected, actual); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod python_test; 3 | 4 | #[cfg(test)] 5 | mod rust_test; 6 | 7 | #[cfg(test)] 8 | mod ruby_test; 9 | 10 | #[cfg(test)] 11 | mod cpp_test; 12 | 13 | #[cfg(test)] 14 | mod c_test; 15 | 16 | #[cfg(test)] 17 | mod typescript_test; 18 | 19 | #[cfg(test)] 20 | mod javascript_test; 21 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/c_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod neovim_case_test; 3 | 4 | #[cfg(test)] 5 | mod redis_case_test; 6 | 7 | #[cfg(test)] 8 | mod nginx_case_test; 9 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/c_test/neovim_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_function_definition() { 6 | let source_code = indoc! { r#" 7 | static OptVal object_as_optval(Object o, bool *error) 8 | { 9 | switch (o.type) { 10 | case kObjectTypeNil: 11 | return NIL_OPTVAL; 12 | case kObjectTypeBoolean: 13 | return BOOLEAN_OPTVAL(o.data.boolean); 14 | case kObjectTypeInteger: 15 | return NUMBER_OPTVAL(o.data.integer); 16 | case kObjectTypeString: 17 | return STRING_OPTVAL(o.data.string); 18 | default: 19 | *error = true; 20 | return NIL_OPTVAL; 21 | } 22 | }"#}; 23 | 24 | let result = indoc! { r#" 25 | /// [TODO] object_as_optval 26 | static OptVal object_as_optval(Object o, bool *error) 27 | { 28 | switch (o.type) { 29 | case kObjectTypeNil: 30 | return NIL_OPTVAL; 31 | case kObjectTypeBoolean: 32 | return BOOLEAN_OPTVAL(o.data.boolean); 33 | case kObjectTypeInteger: 34 | return NUMBER_OPTVAL(o.data.integer); 35 | case kObjectTypeString: 36 | return STRING_OPTVAL(o.data.string); 37 | default: 38 | *error = true; 39 | return NIL_OPTVAL; 40 | } 41 | }"#}; 42 | 43 | assert_analyzed_source_code(source_code, result, "cpp"); 44 | } 45 | 46 | #[test] 47 | fn test_function_definition_with_conditional_compilation() { 48 | let source_code = indoc! { r#" 49 | int path_is_absolute(const char *fname) 50 | { 51 | #ifdef MSWIN 52 | if (*fname == NUL) { 53 | return false; 54 | } 55 | // A name like "d:/foo" and "//server/share" is absolute 56 | return ((isalpha((uint8_t)fname[0]) && fname[1] == ':' && vim_ispathsep_nocolon(fname[2])) 57 | || (vim_ispathsep_nocolon(fname[0]) && fname[0] == fname[1])); 58 | #else 59 | // UNIX: This just checks if the file name starts with '/' or '~'. 60 | return *fname == '/' || *fname == '~'; 61 | #endif 62 | }"#}; 63 | 64 | let result = indoc! { r#" 65 | /// [TODO] path_is_absolute 66 | int path_is_absolute(const char *fname) 67 | { 68 | #ifdef MSWIN 69 | if (*fname == NUL) { 70 | return false; 71 | } 72 | // A name like "d:/foo" and "//server/share" is absolute 73 | return ((isalpha((uint8_t)fname[0]) && fname[1] == ':' && vim_ispathsep_nocolon(fname[2])) 74 | || (vim_ispathsep_nocolon(fname[0]) && fname[0] == fname[1])); 75 | #else 76 | // UNIX: This just checks if the file name starts with '/' or '~'. 77 | return *fname == '/' || *fname == '~'; 78 | #endif 79 | }"#}; 80 | 81 | assert_analyzed_source_code(source_code, result, "cpp"); 82 | } 83 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/c_test/nginx_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_function_definition_with_nginx_convention() { 6 | let source_code = indoc! { r#" 7 | static int 8 | ngx_stream_ssl_alpn_select(ngx_ssl_conn_t *ssl_conn, const unsigned char **out, 9 | unsigned char *outlen, const unsigned char *in, unsigned int inlen, 10 | void *arg) 11 | { 12 | ngx_str_t *alpn; 13 | #if (NGX_DEBUG) 14 | unsigned int i; 15 | ngx_connection_t *c; 16 | 17 | c = ngx_ssl_get_connection(ssl_conn); 18 | 19 | for (i = 0; i < inlen; i += in[i] + 1) { 20 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0, 21 | "SSL ALPN supported by client: %*s", 22 | (size_t) in[i], &in[i + 1]); 23 | } 24 | 25 | #endif 26 | 27 | alpn = arg; 28 | 29 | if (SSL_select_next_proto((unsigned char **) out, outlen, alpn->data, 30 | alpn->len, in, inlen) 31 | != OPENSSL_NPN_NEGOTIATED) 32 | { 33 | return SSL_TLSEXT_ERR_ALERT_FATAL; 34 | } 35 | 36 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0, 37 | "SSL ALPN selected: %*s", (size_t) *outlen, *out); 38 | 39 | return SSL_TLSEXT_ERR_OK; 40 | }"#}; 41 | 42 | let result = indoc! { r#" 43 | /// [TODO] ngx_stream_ssl_alpn_select 44 | static int 45 | ngx_stream_ssl_alpn_select(ngx_ssl_conn_t *ssl_conn, const unsigned char **out, 46 | unsigned char *outlen, const unsigned char *in, unsigned int inlen, 47 | void *arg) 48 | { 49 | ngx_str_t *alpn; 50 | #if (NGX_DEBUG) 51 | unsigned int i; 52 | ngx_connection_t *c; 53 | 54 | c = ngx_ssl_get_connection(ssl_conn); 55 | 56 | for (i = 0; i < inlen; i += in[i] + 1) { 57 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0, 58 | "SSL ALPN supported by client: %*s", 59 | (size_t) in[i], &in[i + 1]); 60 | } 61 | 62 | #endif 63 | 64 | alpn = arg; 65 | 66 | if (SSL_select_next_proto((unsigned char **) out, outlen, alpn->data, 67 | alpn->len, in, inlen) 68 | != OPENSSL_NPN_NEGOTIATED) 69 | { 70 | return SSL_TLSEXT_ERR_ALERT_FATAL; 71 | } 72 | 73 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0, 74 | "SSL ALPN selected: %*s", (size_t) *outlen, *out); 75 | 76 | return SSL_TLSEXT_ERR_OK; 77 | }"#}; 78 | 79 | assert_analyzed_source_code(source_code, result, "cpp"); 80 | } 81 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/c_test/redis_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_declaration_of_function() { 6 | let source_code = indoc! { r#" 7 | list *listCreate(void); 8 | void listRelease(list *list); 9 | void listEmpty(list *list); 10 | list *listAddNodeHead(list *list, void *value); 11 | list *listAddNodeTail(list *list, void *value); 12 | list *listInsertNode(list *list, listNode *old_node, void *value, int after); 13 | void listDelNode(list *list, listNode *node); 14 | listIter *listGetIterator(list *list, int direction); 15 | listNode *listNext(listIter *iter); 16 | void listReleaseIterator(listIter *iter); 17 | list *listDup(list *orig); 18 | listNode *listSearchKey(list *list, void *key); 19 | listNode *listIndex(list *list, long index); 20 | void listRewind(list *list, listIter *li); 21 | void listRewindTail(list *list, listIter *li); 22 | void listRotateTailToHead(list *list); 23 | void listRotateHeadToTail(list *list); 24 | void listJoin(list *l, list *o); 25 | void listInitNode(listNode *node, void *value); 26 | void listLinkNodeHead(list *list, listNode *node); 27 | void listLinkNodeTail(list *list, listNode *node); 28 | void listUnlinkNode(list *list, listNode *node);"#}; 29 | 30 | let result = indoc! { r#" 31 | list *listCreate(void); 32 | void listRelease(list *list); 33 | void listEmpty(list *list); 34 | list *listAddNodeHead(list *list, void *value); 35 | list *listAddNodeTail(list *list, void *value); 36 | list *listInsertNode(list *list, listNode *old_node, void *value, int after); 37 | void listDelNode(list *list, listNode *node); 38 | listIter *listGetIterator(list *list, int direction); 39 | listNode *listNext(listIter *iter); 40 | void listReleaseIterator(listIter *iter); 41 | list *listDup(list *orig); 42 | listNode *listSearchKey(list *list, void *key); 43 | listNode *listIndex(list *list, long index); 44 | void listRewind(list *list, listIter *li); 45 | void listRewindTail(list *list, listIter *li); 46 | void listRotateTailToHead(list *list); 47 | void listRotateHeadToTail(list *list); 48 | void listJoin(list *l, list *o); 49 | void listInitNode(listNode *node, void *value); 50 | void listLinkNodeHead(list *list, listNode *node); 51 | void listLinkNodeTail(list *list, listNode *node); 52 | void listUnlinkNode(list *list, listNode *node);"#}; 53 | 54 | assert_analyzed_source_code(source_code, result, "cpp"); 55 | } 56 | 57 | #[ignore] 58 | fn test_function_definition_together_with_macro_combined() { 59 | let source_code = indoc! {r#" 60 | REDIS_NO_SANITIZE("bounds") 61 | clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { 62 | 63 | uint32_t channel_len, message_len; 64 | 65 | channel = getDecodedObject(channel); 66 | message = getDecodedObject(message); 67 | channel_len = sdslen(channel->ptr); 68 | message_len = sdslen(message->ptr); 69 | 70 | size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData); 71 | msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; 72 | clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); 73 | 74 | clusterMsg *hdr = &msgblock->msg; 75 | hdr->data.publish.msg.channel_len = htonl(channel_len); 76 | hdr->data.publish.msg.message_len = htonl(message_len); 77 | memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr)); 78 | memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr), 79 | message->ptr,sdslen(message->ptr)); 80 | 81 | decrRefCount(channel); 82 | decrRefCount(message); 83 | 84 | return msgblock; 85 | }"#}; 86 | 87 | let result = indoc! {r#" 88 | /// [TODO] clusterCreatePublishMsgBlock 89 | REDIS_NO_SANITIZE("bounds") 90 | clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) { 91 | 92 | uint32_t channel_len, message_len; 93 | 94 | channel = getDecodedObject(channel); 95 | message = getDecodedObject(message); 96 | channel_len = sdslen(channel->ptr); 97 | message_len = sdslen(message->ptr); 98 | 99 | size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData); 100 | msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len; 101 | clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen); 102 | 103 | clusterMsg *hdr = &msgblock->msg; 104 | hdr->data.publish.msg.channel_len = htonl(channel_len); 105 | hdr->data.publish.msg.message_len = htonl(message_len); 106 | memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr)); 107 | memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr), 108 | message->ptr,sdslen(message->ptr)); 109 | 110 | decrRefCount(channel); 111 | decrRefCount(message); 112 | 113 | return msgblock; 114 | }"#}; 115 | 116 | assert_analyzed_source_code(source_code, result, "cpp"); 117 | } 118 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/cpp_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod blazingmq_case_test; 3 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/cpp_test/blazingmq_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_function_definition_with_nested_scope() { 6 | let result = indoc! { r#" 7 | /// [TODO] BloombergLP 8 | namespace BloombergLP { 9 | /// [TODO] BloombergLP > bmqimp 10 | namespace bmqimp { 11 | /// [TODO] BloombergLP > bmqimp > anonymous 12 | namespace { 13 | // CONSTANTS 14 | const double k_RECONNECT_INTERVAL_MS = 500; 15 | const int k_RECONNECT_COUNT = bsl::numeric_limits::max(); 16 | const bsls::Types::Int64 k_CHANNEL_LOW_WATERMARK = 512 * 1024; 17 | 18 | /// Create the StatContextConfiguration to use, from the specified 19 | /// `options`, and using the specified `allocator` for memory allocations. 20 | /// [TODO] BloombergLP > bmqimp > anonymous > statContextConfiguration 21 | mwcst::StatContextConfiguration 22 | statContextConfiguration(const bmqt::SessionOptions& options, 23 | bslma::Allocator* allocator) 24 | { 25 | mwcst::StatContextConfiguration config("stats", allocator); 26 | if (options.statsDumpInterval() != bsls::TimeInterval()) { 27 | // Stats configuration: 28 | // we snapshot every second 29 | // first level keeps 30s of history 30 | // second level keeps enough for the dump interval 31 | // Because some stats require range computation, second level actually 32 | // has to be of size 1 more than the dump interval 33 | config.defaultHistorySize( 34 | 30, 35 | (options.statsDumpInterval().seconds() / 30) + 1); 36 | } 37 | else { 38 | config.defaultHistorySize(2); 39 | } 40 | 41 | return config; 42 | } 43 | } 44 | } 45 | }"#}; 46 | 47 | let source_code = indoc! { r#" 48 | namespace BloombergLP { 49 | namespace bmqimp { 50 | namespace { 51 | // CONSTANTS 52 | const double k_RECONNECT_INTERVAL_MS = 500; 53 | const int k_RECONNECT_COUNT = bsl::numeric_limits::max(); 54 | const bsls::Types::Int64 k_CHANNEL_LOW_WATERMARK = 512 * 1024; 55 | 56 | /// Create the StatContextConfiguration to use, from the specified 57 | /// `options`, and using the specified `allocator` for memory allocations. 58 | mwcst::StatContextConfiguration 59 | statContextConfiguration(const bmqt::SessionOptions& options, 60 | bslma::Allocator* allocator) 61 | { 62 | mwcst::StatContextConfiguration config("stats", allocator); 63 | if (options.statsDumpInterval() != bsls::TimeInterval()) { 64 | // Stats configuration: 65 | // we snapshot every second 66 | // first level keeps 30s of history 67 | // second level keeps enough for the dump interval 68 | // Because some stats require range computation, second level actually 69 | // has to be of size 1 more than the dump interval 70 | config.defaultHistorySize( 71 | 30, 72 | (options.statsDumpInterval().seconds() / 30) + 1); 73 | } 74 | else { 75 | config.defaultHistorySize(2); 76 | } 77 | 78 | return config; 79 | } 80 | } 81 | } 82 | }"#}; 83 | 84 | assert_analyzed_source_code(source_code, result, "cpp"); 85 | } 86 | 87 | #[test] 88 | fn test_class_declaration_with_nested_scope() { 89 | let source_code = indoc! { r#" 90 | namespace m_bmqbrkr { 91 | class Task_AllocatorManager { 92 | private: 93 | mqbcfg::AllocatorType::Value d_type; 94 | 95 | bsls::ObjectBuffer d_store; 96 | private: 97 | Task_AllocatorManager(const Task_AllocatorManager&); // = delete; 98 | public: 99 | explicit Task_AllocatorManager(mqbcfg::AllocatorType::Value type); 100 | 101 | ~Task_AllocatorManager(); 102 | }; 103 | }"#}; 104 | 105 | let result = indoc! { r#" 106 | /// [TODO] m_bmqbrkr 107 | namespace m_bmqbrkr { 108 | /// [TODO] m_bmqbrkr > Task_AllocatorManager 109 | class Task_AllocatorManager { 110 | private: 111 | mqbcfg::AllocatorType::Value d_type; 112 | 113 | bsls::ObjectBuffer d_store; 114 | private: 115 | Task_AllocatorManager(const Task_AllocatorManager&); // = delete; 116 | public: 117 | explicit Task_AllocatorManager(mqbcfg::AllocatorType::Value type); 118 | 119 | ~Task_AllocatorManager(); 120 | }; 121 | }"#}; 122 | 123 | assert_analyzed_source_code(source_code, result, "cpp"); 124 | } 125 | 126 | #[ignore] 127 | fn test_templated_function_definition() { 128 | let source_code = indoc! { r#" 129 | template 130 | bool parseCommand(CMD* command, const bsl::string& jsonInput) 131 | { 132 | bsl::istringstream is(jsonInput); 133 | baljsn::DecoderOptions options; 134 | options.setSkipUnknownElements(true); 135 | baljsn::Decoder decoder; 136 | int rc = decoder.decode(is, command, options); 137 | if (rc != 0) { 138 | BALL_LOG_ERROR << "Unable to decode: " << jsonInput << bsl::endl 139 | << decoder.loggedMessages(); 140 | return false; // RETURN 141 | } 142 | 143 | return true; 144 | } 145 | 146 | template 147 | inline bool Value::is() const 148 | { 149 | return d_value.is(); 150 | } 151 | 152 | template 153 | inline const TYPE& Value::the() const 154 | { 155 | return d_value.the(); 156 | } 157 | 158 | template 159 | inline typename VISITOR::ResultType Value::apply(const VISITOR& visitor) const 160 | { 161 | return d_value.apply(visitor); 162 | } 163 | "#}; 164 | 165 | let result = indoc! { r#" 166 | /// [TODO] parseCommand 167 | template 168 | bool parseCommand(CMD* command, const bsl::string& jsonInput) 169 | { 170 | bsl::istringstream is(jsonInput); 171 | baljsn::DecoderOptions options; 172 | options.setSkipUnknownElements(true); 173 | baljsn::Decoder decoder; 174 | int rc = decoder.decode(is, command, options); 175 | if (rc != 0) { 176 | BALL_LOG_ERROR << "Unable to decode: " << jsonInput << bsl::endl 177 | << decoder.loggedMessages(); 178 | return false; // RETURN 179 | } 180 | 181 | return true; 182 | } 183 | 184 | /// [TODO] Value::is 185 | template 186 | inline bool Value::is() const 187 | { 188 | return d_value.is(); 189 | } 190 | 191 | /// [TODO] Value::the 192 | template 193 | inline const TYPE& Value::the() const 194 | { 195 | return d_value.the(); 196 | } 197 | 198 | /// [TODO] Value::apply 199 | template 200 | inline typename VISITOR::ResultType Value::apply(const VISITOR& visitor) const 201 | { 202 | return d_value.apply(visitor); 203 | }"#}; 204 | 205 | assert_analyzed_source_code(source_code, result, "cpp"); 206 | } 207 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/javascript_test.rs: -------------------------------------------------------------------------------- 1 | mod react_native_case_test; 2 | #[cfg(test)] 3 | mod svelt_cast_test; 4 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/javascript_test/react_native_case_test.rs: -------------------------------------------------------------------------------- 1 | use indoc::indoc; 2 | 3 | use crate::integration_test::assert_analyzed_source_code; 4 | 5 | #[test] 6 | fn test_function_declaration() { 7 | let source_code = indoc! {r#" 8 | function getPackageName(file /*: string */) /*: string */ { 9 | return path.relative(PACKAGES_DIR, file).split(path.sep)[0]; 10 | } 11 | 12 | function getBuildPath(file /*: string */) /*: string */ { 13 | const packageDir = path.join(PACKAGES_DIR, getPackageName(file)); 14 | 15 | return path.join( 16 | packageDir, 17 | file.replace(path.join(packageDir, SRC_DIR), BUILD_DIR), 18 | ); 19 | } 20 | 21 | async function rewritePackageExports(packageName /*: string */) { 22 | const packageJsonPath = path.join(PACKAGES_DIR, packageName, 'package.json'); 23 | const pkg = JSON.parse(await fs.readFile(packageJsonPath, 'utf8')); 24 | 25 | await fs.writeFile( 26 | packageJsonPath, 27 | prettier.format(JSON.stringify(pkg), {parser: 'json'}), 28 | ); 29 | }"#}; 30 | 31 | let expected = indoc! {r#" 32 | // [TODO] getPackageName 33 | function getPackageName(file /*: string */) /*: string */ { 34 | return path.relative(PACKAGES_DIR, file).split(path.sep)[0]; 35 | } 36 | 37 | // [TODO] getBuildPath 38 | function getBuildPath(file /*: string */) /*: string */ { 39 | const packageDir = path.join(PACKAGES_DIR, getPackageName(file)); 40 | 41 | return path.join( 42 | packageDir, 43 | file.replace(path.join(packageDir, SRC_DIR), BUILD_DIR), 44 | ); 45 | } 46 | 47 | // [TODO] rewritePackageExports 48 | async function rewritePackageExports(packageName /*: string */) { 49 | const packageJsonPath = path.join(PACKAGES_DIR, packageName, 'package.json'); 50 | const pkg = JSON.parse(await fs.readFile(packageJsonPath, 'utf8')); 51 | 52 | await fs.writeFile( 53 | packageJsonPath, 54 | prettier.format(JSON.stringify(pkg), {parser: 'json'}), 55 | ); 56 | }"#}; 57 | 58 | assert_analyzed_source_code(source_code, expected, "javascript") 59 | } 60 | 61 | #[test] 62 | fn test_class() { 63 | let source_code = indoc! {r#" 64 | export class KeyPressHandler { 65 | _isInterceptingKeyStrokes = false; 66 | _isHandlingKeyPress = false; 67 | _onPress: (key: string) => Promise; 68 | 69 | constructor(onPress: (key: string) => Promise) { 70 | this._onPress = onPress; 71 | } 72 | 73 | /** Start intercepting all key strokes and passing them to the input `onPress` method. */ 74 | startInterceptingKeyStrokes() { 75 | if (this._isInterceptingKeyStrokes) { 76 | return; 77 | } 78 | this._isInterceptingKeyStrokes = true; 79 | const {stdin} = process; 80 | // $FlowFixMe[prop-missing] 81 | stdin.setRawMode(true); 82 | stdin.resume(); 83 | stdin.setEncoding('utf8'); 84 | stdin.on('data', this._handleKeypress); 85 | } 86 | 87 | /** Stop intercepting all key strokes. */ 88 | stopInterceptingKeyStrokes() { 89 | if (!this._isInterceptingKeyStrokes) { 90 | return; 91 | } 92 | this._isInterceptingKeyStrokes = false; 93 | const {stdin} = process; 94 | stdin.removeListener('data', this._handleKeypress); 95 | // $FlowFixMe[prop-missing] 96 | stdin.setRawMode(false); 97 | stdin.resume(); 98 | } 99 | }"#}; 100 | 101 | let expected = indoc! {r#" 102 | // [TODO] KeyPressHandler 103 | export class KeyPressHandler { 104 | _isInterceptingKeyStrokes = false; 105 | _isHandlingKeyPress = false; 106 | _onPress: (key: string) => Promise; 107 | 108 | constructor(onPress: (key: string) => Promise) { 109 | this._onPress = onPress; 110 | } 111 | 112 | /** Start intercepting all key strokes and passing them to the input `onPress` method. */ 113 | startInterceptingKeyStrokes() { 114 | if (this._isInterceptingKeyStrokes) { 115 | return; 116 | } 117 | this._isInterceptingKeyStrokes = true; 118 | const {stdin} = process; 119 | // $FlowFixMe[prop-missing] 120 | stdin.setRawMode(true); 121 | stdin.resume(); 122 | stdin.setEncoding('utf8'); 123 | stdin.on('data', this._handleKeypress); 124 | } 125 | 126 | /** Stop intercepting all key strokes. */ 127 | stopInterceptingKeyStrokes() { 128 | if (!this._isInterceptingKeyStrokes) { 129 | return; 130 | } 131 | this._isInterceptingKeyStrokes = false; 132 | const {stdin} = process; 133 | stdin.removeListener('data', this._handleKeypress); 134 | // $FlowFixMe[prop-missing] 135 | stdin.setRawMode(false); 136 | stdin.resume(); 137 | } 138 | }"#}; 139 | 140 | assert_analyzed_source_code(source_code, expected, "javascript") 141 | } 142 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/javascript_test/svelt_cast_test.rs: -------------------------------------------------------------------------------- 1 | use indoc::indoc; 2 | 3 | use crate::integration_test::assert_analyzed_source_code; 4 | 5 | #[test] 6 | #[ignore = "TODO: arrow function"] 7 | fn test_arrow_function() { 8 | let source_code = indoc! {r#" 9 | export const parse = (source) => 10 | code_red.parse(source, { 11 | sourceType: 'module', 12 | ecmaVersion: 13, 13 | locations: true 14 | }); 15 | 16 | /** 17 | * @param {string} source 18 | * @param {number} index 19 | */ 20 | export const parse_expression_at = (source, index) => 21 | code_red.parseExpressionAt(source, index, { 22 | sourceType: 'module', 23 | ecmaVersion: 13, 24 | locations: true 25 | });"#}; 26 | 27 | let expected = indoc! {r#" 28 | // [TODO] parse 29 | export const parse = (source) => 30 | code_red.parse(source, { 31 | sourceType: 'module', 32 | ecmaVersion: 13, 33 | locations: true 34 | }); 35 | 36 | /** 37 | * @param {string} source 38 | * @param {number} index 39 | */ 40 | // [TODO] parse_expression_at 41 | export const parse_expression_at = (source, index) => 42 | code_red.parseExpressionAt(source, index, { 43 | sourceType: 'module', 44 | ecmaVersion: 13, 45 | locations: true 46 | });"#}; 47 | 48 | assert_analyzed_source_code(source_code, expected, "javascript") 49 | } 50 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/python_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod rustpython_case_test; 3 | 4 | #[cfg(test)] 5 | mod python_dependency_injector_case_test; 6 | 7 | #[cfg(test)] 8 | mod django_case_test; 9 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/python_test/django_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_class_definition_within_class() { 6 | let source_code = indoc! {r#" 7 | class Car(models.Model): 8 | name = models.CharField(max_length=20) 9 | default_parts = models.ManyToManyField(Part) 10 | optional_parts = models.ManyToManyField(Part, related_name="cars_optional") 11 | 12 | class Meta: 13 | ordering = ("name",)"#}; 14 | 15 | let result = indoc! {r#" 16 | # [TODO] Car 17 | class Car(models.Model): 18 | name = models.CharField(max_length=20) 19 | default_parts = models.ManyToManyField(Part) 20 | optional_parts = models.ManyToManyField(Part, related_name="cars_optional") 21 | 22 | # [TODO] Car > Meta 23 | class Meta: 24 | ordering = ("name",)"#}; 25 | 26 | assert_analyzed_source_code(source_code, result, "python") 27 | } 28 | 29 | #[test] 30 | fn test_decorated_definitions_within_class_definition() { 31 | let source_code = indoc! {r#" 32 | class Choices(enum.Enum, metaclass=ChoicesMeta): 33 | """Class for creating enumerated choices.""" 34 | 35 | @DynamicClassAttribute 36 | def label(self): 37 | return self._label_ 38 | 39 | @property 40 | def do_not_call_in_templates(self): 41 | return True"#}; 42 | 43 | let result = indoc! {r#" 44 | # [TODO] Choices 45 | class Choices(enum.Enum, metaclass=ChoicesMeta): 46 | """Class for creating enumerated choices.""" 47 | 48 | # [TODO] Choices > label 49 | @DynamicClassAttribute 50 | def label(self): 51 | return self._label_ 52 | 53 | # [TODO] Choices > do_not_call_in_templates 54 | @property 55 | def do_not_call_in_templates(self): 56 | return True"#}; 57 | 58 | assert_analyzed_source_code(source_code, result, "python") 59 | } 60 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/python_test/python_dependency_injector_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_decorated_definition() { 6 | let source_code = indoc! {r#" 7 | @app.route("/") 8 | @inject 9 | def index(service: Service = Provide[Container.service]): 10 | result = service.process() 11 | return jsonify({"result": result})"#}; 12 | 13 | let result = indoc! {r#" 14 | # [TODO] index 15 | @app.route("/") 16 | @inject 17 | def index(service: Service = Provide[Container.service]): 18 | result = service.process() 19 | return jsonify({"result": result})"#}; 20 | 21 | assert_analyzed_source_code(source_code, result, "python") 22 | } 23 | 24 | #[test] 25 | fn test_decorated_async_function_definition() { 26 | let source_code = indoc! {r#" 27 | @inject 28 | async def async_injection( 29 | resource1: object = Provide["resource1"], 30 | resource2: object = Provide["resource2"], 31 | ): 32 | return resource1, resource2 33 | 34 | @inject 35 | async def async_injection_with_closing( 36 | resource1: object = Closing[Provide["resource1"]], 37 | resource2: object = Closing[Provide["resource2"]], 38 | ): 39 | return resource1, resource2"#}; 40 | 41 | let result = indoc! {r#" 42 | # [TODO] async_injection 43 | @inject 44 | async def async_injection( 45 | resource1: object = Provide["resource1"], 46 | resource2: object = Provide["resource2"], 47 | ): 48 | return resource1, resource2 49 | 50 | # [TODO] async_injection_with_closing 51 | @inject 52 | async def async_injection_with_closing( 53 | resource1: object = Closing[Provide["resource1"]], 54 | resource2: object = Closing[Provide["resource2"]], 55 | ): 56 | return resource1, resource2"#}; 57 | 58 | assert_analyzed_source_code(source_code, result, "python") 59 | } 60 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/python_test/rustpython_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | /// Test stdlib 5 | /// 6 | #[test] 7 | fn test_class_definition() { 8 | let source_code = indoc! {r#" 9 | class FeedParser: 10 | """A feed-style parser of email.""" 11 | 12 | def __init__(self, _factory=None, *, policy=compat32): 13 | """_factory is called with no arguments to create a new message obj 14 | 15 | The policy keyword specifies a policy object that controls a number of 16 | aspects of the parser's operation. The default policy maintains 17 | backward compatibility. 18 | 19 | """ 20 | self.policy = policy 21 | self._old_style_factory = False 22 | if _factory is None: 23 | if policy.message_factory is None: 24 | from email.message import Message 25 | self._factory = Message 26 | else: 27 | self._factory = policy.message_factory 28 | else: 29 | self._factory = _factory 30 | try: 31 | _factory(policy=self.policy) 32 | except TypeError: 33 | # Assume this is an old-style factory 34 | self._old_style_factory = True 35 | self._input = BufferedSubFile() 36 | self._msgstack = [] 37 | self._parse = self._parsegen().__next__ 38 | self._cur = None 39 | self._last = None 40 | self._headersonly = False 41 | 42 | # Non-public interface for supporting Parser's headersonly flag 43 | def _set_headersonly(self): 44 | self._headersonly = True 45 | 46 | def feed(self, data): 47 | """Push more data into the parser.""" 48 | self._input.push(data) 49 | self._call_parse() 50 | 51 | def _call_parse(self): 52 | try: 53 | self._parse() 54 | except StopIteration: 55 | pass"#}; 56 | 57 | let result = indoc! {r#" 58 | # [TODO] FeedParser 59 | class FeedParser: 60 | """A feed-style parser of email.""" 61 | 62 | # [TODO] FeedParser > __init__ 63 | def __init__(self, _factory=None, *, policy=compat32): 64 | """_factory is called with no arguments to create a new message obj 65 | 66 | The policy keyword specifies a policy object that controls a number of 67 | aspects of the parser's operation. The default policy maintains 68 | backward compatibility. 69 | 70 | """ 71 | self.policy = policy 72 | self._old_style_factory = False 73 | if _factory is None: 74 | if policy.message_factory is None: 75 | from email.message import Message 76 | self._factory = Message 77 | else: 78 | self._factory = policy.message_factory 79 | else: 80 | self._factory = _factory 81 | try: 82 | _factory(policy=self.policy) 83 | except TypeError: 84 | # Assume this is an old-style factory 85 | self._old_style_factory = True 86 | self._input = BufferedSubFile() 87 | self._msgstack = [] 88 | self._parse = self._parsegen().__next__ 89 | self._cur = None 90 | self._last = None 91 | self._headersonly = False 92 | 93 | # Non-public interface for supporting Parser's headersonly flag 94 | # [TODO] FeedParser > _set_headersonly 95 | def _set_headersonly(self): 96 | self._headersonly = True 97 | 98 | # [TODO] FeedParser > feed 99 | def feed(self, data): 100 | """Push more data into the parser.""" 101 | self._input.push(data) 102 | self._call_parse() 103 | 104 | # [TODO] FeedParser > _call_parse 105 | def _call_parse(self): 106 | try: 107 | self._parse() 108 | except StopIteration: 109 | pass"#}; 110 | 111 | assert_analyzed_source_code(source_code, result, "python") 112 | } 113 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/ruby_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod mastodon_case_test; 3 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/ruby_test/mastodon_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_class_declaration_with_nested_scope() { 6 | let source_code = indoc! { r#" 7 | module Chewy 8 | class Strategy 9 | class Mastodon < Base 10 | def initialize 11 | super 12 | 13 | @stash = Hash.new { |hash, key| hash[key] = [] } 14 | end 15 | 16 | def update(type, objects, _options = {}) 17 | @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled? 18 | end 19 | 20 | def leave 21 | RedisConfiguration.with do |redis| 22 | redis.pipelined do |pipeline| 23 | @stash.each do |type, ids| 24 | pipeline.sadd("chewy:queue:#{type.name}", ids) 25 | end 26 | end 27 | end 28 | end 29 | end 30 | end 31 | end"#}; 32 | 33 | let result = indoc! { r#" 34 | # [TODO] Chewy 35 | module Chewy 36 | # [TODO] Chewy > Strategy 37 | class Strategy 38 | # [TODO] Chewy > Strategy > Mastodon 39 | class Mastodon < Base 40 | # [TODO] Chewy > Strategy > Mastodon > initialize 41 | def initialize 42 | super 43 | 44 | @stash = Hash.new { |hash, key| hash[key] = [] } 45 | end 46 | 47 | # [TODO] Chewy > Strategy > Mastodon > update 48 | def update(type, objects, _options = {}) 49 | @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled? 50 | end 51 | 52 | # [TODO] Chewy > Strategy > Mastodon > leave 53 | def leave 54 | RedisConfiguration.with do |redis| 55 | redis.pipelined do |pipeline| 56 | @stash.each do |type, ids| 57 | pipeline.sadd("chewy:queue:#{type.name}", ids) 58 | end 59 | end 60 | end 61 | end 62 | end 63 | end 64 | end"#}; 65 | 66 | assert_analyzed_source_code(source_code, result, "ruby"); 67 | } 68 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/rust_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod anyhow_case_test; 3 | 4 | #[cfg(test)] 5 | mod rustpython_case_test; 6 | 7 | #[cfg(test)] 8 | mod serde_case_test; 9 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/rust_test/anyhow_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_declaring_error_enum_with_macro() { 6 | let source_code = indoc! {r#" 7 | use thiserror::Error; 8 | 9 | #[derive(Error, Debug)] 10 | pub enum FormatError { 11 | #[error("Invalid header (expected {expected:?}, got {found:?})")] 12 | InvalidHeader { 13 | expected: String, 14 | found: String, 15 | }, 16 | #[error("Missing attribute: {0}")] 17 | MissingAttribute(String), 18 | }"#}; 19 | 20 | let result = indoc! {r#" 21 | use thiserror::Error; 22 | 23 | /// [TODO] FormatError 24 | #[derive(Error, Debug)] 25 | pub enum FormatError { 26 | #[error("Invalid header (expected {expected:?}, got {found:?})")] 27 | InvalidHeader { 28 | expected: String, 29 | found: String, 30 | }, 31 | #[error("Missing attribute: {0}")] 32 | MissingAttribute(String), 33 | }"#}; 34 | 35 | assert_analyzed_source_code(source_code, result, "rust") 36 | } 37 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/rust_test/rustpython_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/jit/src/lib.rs#L10-L28 5 | #[test] 6 | fn test_declaring_enum_with_stacked_attribute() { 7 | let source_code = indoc! {r#" 8 | #[derive(Debug, thiserror::Error)] 9 | #[non_exhaustive] 10 | pub enum JitCompileError { 11 | #[error("function can't be jitted")] 12 | NotSupported, 13 | #[error("bad bytecode")] 14 | BadBytecode, 15 | #[error("error while compiling to machine code: {0}")] 16 | CraneliftError(#[from] ModuleError), 17 | } 18 | 19 | #[derive(Debug, thiserror::Error, Eq, PartialEq)] 20 | #[non_exhaustive] 21 | pub enum JitArgumentError { 22 | #[error("argument is of wrong type")] 23 | ArgumentTypeMismatch, 24 | #[error("wrong number of arguments")] 25 | WrongNumberOfArguments, 26 | }"#}; 27 | 28 | let result = indoc! {r#" 29 | /// [TODO] JitCompileError 30 | #[derive(Debug, thiserror::Error)] 31 | #[non_exhaustive] 32 | pub enum JitCompileError { 33 | #[error("function can't be jitted")] 34 | NotSupported, 35 | #[error("bad bytecode")] 36 | BadBytecode, 37 | #[error("error while compiling to machine code: {0}")] 38 | CraneliftError(#[from] ModuleError), 39 | } 40 | 41 | /// [TODO] JitArgumentError 42 | #[derive(Debug, thiserror::Error, Eq, PartialEq)] 43 | #[non_exhaustive] 44 | pub enum JitArgumentError { 45 | #[error("argument is of wrong type")] 46 | ArgumentTypeMismatch, 47 | #[error("wrong number of arguments")] 48 | WrongNumberOfArguments, 49 | }"#}; 50 | 51 | assert_analyzed_source_code(source_code, result, "rust") 52 | } 53 | 54 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/vm/src/compiler.rs#L5C1-L6 55 | #[test] 56 | fn test_macro_above_use_declaration_should_be_ignored() { 57 | let source_code = indoc! { r#" 58 | #[cfg(feature = "rustpython-compiler")] 59 | use rustpython_compiler::*;"#}; 60 | 61 | let result = indoc! { r#" 62 | #[cfg(feature = "rustpython-compiler")] 63 | use rustpython_compiler::*;"#}; 64 | 65 | assert_analyzed_source_code(source_code, result, "rust") 66 | } 67 | 68 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/wasm/lib/src/js_module.rs#L24-L55 69 | #[test] 70 | fn test_macro_above_extern_c_module() { 71 | let source_code = indoc! { r#" 72 | #[wasm_bindgen(inline_js = " 73 | export function has_prop(target, prop) { return prop in Object(target); } 74 | export function get_prop(target, prop) { return target[prop]; } 75 | export function set_prop(target, prop, value) { target[prop] = value; } 76 | export function type_of(a) { return typeof a; } 77 | export function instance_of(lhs, rhs) { return lhs instanceof rhs; } 78 | ")] 79 | extern "C" { 80 | #[wasm_bindgen(catch)] 81 | fn has_prop(target: &JsValue, prop: &JsValue) -> Result; 82 | #[wasm_bindgen(catch)] 83 | fn get_prop(target: &JsValue, prop: &JsValue) -> Result; 84 | #[wasm_bindgen(catch)] 85 | fn set_prop(target: &JsValue, prop: &JsValue, value: &JsValue) -> Result<(), JsValue>; 86 | #[wasm_bindgen] 87 | fn type_of(a: &JsValue) -> String; 88 | #[wasm_bindgen(catch)] 89 | fn instance_of(lhs: &JsValue, rhs: &JsValue) -> Result; 90 | }"#}; 91 | 92 | let result = indoc! { r#" 93 | #[wasm_bindgen(inline_js = " 94 | export function has_prop(target, prop) { return prop in Object(target); } 95 | export function get_prop(target, prop) { return target[prop]; } 96 | export function set_prop(target, prop, value) { target[prop] = value; } 97 | export function type_of(a) { return typeof a; } 98 | export function instance_of(lhs, rhs) { return lhs instanceof rhs; } 99 | ")] 100 | extern "C" { 101 | #[wasm_bindgen(catch)] 102 | fn has_prop(target: &JsValue, prop: &JsValue) -> Result; 103 | #[wasm_bindgen(catch)] 104 | fn get_prop(target: &JsValue, prop: &JsValue) -> Result; 105 | #[wasm_bindgen(catch)] 106 | fn set_prop(target: &JsValue, prop: &JsValue, value: &JsValue) -> Result<(), JsValue>; 107 | #[wasm_bindgen] 108 | fn type_of(a: &JsValue) -> String; 109 | #[wasm_bindgen(catch)] 110 | fn instance_of(lhs: &JsValue, rhs: &JsValue) -> Result; 111 | }"#}; 112 | 113 | assert_analyzed_source_code(source_code, result, "rust") 114 | } 115 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/rust_test/serde_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_several_impl_declaration() { 6 | let source_code = indoc! {" 7 | impl PartialEq for Ident { 8 | fn eq(&self, word: &Symbol) -> bool { 9 | self == word.0 10 | } 11 | } 12 | 13 | impl<'a> PartialEq for &'a Ident { 14 | fn eq(&self, word: &Symbol) -> bool { 15 | *self == word.0 16 | } 17 | } 18 | 19 | impl PartialEq for Path { 20 | fn eq(&self, word: &Symbol) -> bool { 21 | self.is_ident(word.0) 22 | } 23 | }"}; 24 | 25 | let result = indoc! {" 26 | /// [TODO] PartialEq 27 | impl PartialEq for Ident { 28 | /// [TODO] PartialEq > eq 29 | fn eq(&self, word: &Symbol) -> bool { 30 | self == word.0 31 | } 32 | } 33 | 34 | /// [TODO] PartialEq 35 | impl<'a> PartialEq for &'a Ident { 36 | /// [TODO] PartialEq > eq 37 | fn eq(&self, word: &Symbol) -> bool { 38 | *self == word.0 39 | } 40 | } 41 | 42 | /// [TODO] PartialEq 43 | impl PartialEq for Path { 44 | /// [TODO] PartialEq > eq 45 | fn eq(&self, word: &Symbol) -> bool { 46 | self.is_ident(word.0) 47 | } 48 | }"}; 49 | 50 | assert_analyzed_source_code(source_code, result, "rust") 51 | } 52 | 53 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/lib.rs#L155-L156 54 | #[test] 55 | fn test_macro_above_extern_crate_declaration_should_be_ignored() { 56 | let source_code = indoc! { r#" 57 | #[cfg(feature = "alloc")] 58 | extern crate alloc;"#}; 59 | 60 | let result = indoc! { r#" 61 | #[cfg(feature = "alloc")] 62 | extern crate alloc;"#}; 63 | 64 | assert_analyzed_source_code(source_code, result, "rust"); 65 | } 66 | 67 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/precompiled/bin/main.rs#L11-L12 68 | #[test] 69 | fn test_macro_above_static_variable_should_be_ignored() { 70 | let source_code = indoc! {r#" 71 | #[global_allocator] 72 | static ALLOCATOR: MonotonicAllocator = MonotonicAllocator;"#}; 73 | 74 | let result = indoc! {r#" 75 | #[global_allocator] 76 | static ALLOCATOR: MonotonicAllocator = MonotonicAllocator;"#}; 77 | 78 | assert_analyzed_source_code(source_code, result, "rust") 79 | } 80 | 81 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/de/impls.rs#L1783-L1793 82 | #[test] 83 | fn test_macro_above_macro_invocation_should_be_ignored() { 84 | let source_code = indoc! { r#" 85 | #[cfg(any(feature = "std", feature = "alloc"))] 86 | forwarded_impl!((T), Box, Box::new); 87 | 88 | #[cfg(any(feature = "std", feature = "alloc"))] 89 | forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice); 90 | 91 | #[cfg(any(feature = "std", feature = "alloc"))] 92 | forwarded_impl!((), Box, String::into_boxed_str); 93 | 94 | #[cfg(all(feature = "std", any(unix, windows)))] 95 | forwarded_impl!((), Box, OsString::into_boxed_os_str);"#}; 96 | 97 | let result = indoc! { r#" 98 | #[cfg(any(feature = "std", feature = "alloc"))] 99 | forwarded_impl!((T), Box, Box::new); 100 | 101 | #[cfg(any(feature = "std", feature = "alloc"))] 102 | forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice); 103 | 104 | #[cfg(any(feature = "std", feature = "alloc"))] 105 | forwarded_impl!((), Box, String::into_boxed_str); 106 | 107 | #[cfg(all(feature = "std", any(unix, windows)))] 108 | forwarded_impl!((), Box, OsString::into_boxed_os_str);"#}; 109 | 110 | assert_analyzed_source_code(source_code, result, "rust") 111 | } 112 | 113 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/de/mod.rs#L119-L126 114 | #[test] 115 | fn test_ignore_mod_items_in_a_row() { 116 | let source_code = indoc! { r#" 117 | pub mod value; 118 | 119 | #[cfg(not(no_integer128))] 120 | mod format; 121 | mod ignored_any; 122 | mod impls; 123 | pub(crate) mod size_hint; 124 | mod utf8;"#}; 125 | 126 | let result = indoc! { r#" 127 | pub mod value; 128 | 129 | #[cfg(not(no_integer128))] 130 | mod format; 131 | mod ignored_any; 132 | mod impls; 133 | pub(crate) mod size_hint; 134 | mod utf8;"#}; 135 | 136 | assert_analyzed_source_code(source_code, result, "rust"); 137 | } 138 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/typescript_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod typescript_case_test; 3 | 4 | #[cfg(test)] 5 | mod angular_case_test; 6 | 7 | #[cfg(test)] 8 | mod async_case_test; 9 | 10 | #[cfg(test)] 11 | mod svelt_case_test; 12 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/typescript_test/angular_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_angular_code() { 6 | let source_code = indoc! {r#" 7 | export const enum JitCompilerUsage { 8 | Decorator, 9 | PartialDeclaration, 10 | } 11 | 12 | export interface JitCompilerUsageRequest { 13 | usage: JitCompilerUsage; 14 | kind: 'directive'|'component'|'pipe'|'injectable'|'NgModule'; 15 | type: Type; 16 | } 17 | 18 | export function getCompilerFacade(request: JitCompilerUsageRequest): CompilerFacade { 19 | const globalNg: ExportedCompilerFacade = global['ng']; 20 | if (globalNg && globalNg.ɵcompilerFacade) { 21 | return globalNg.ɵcompilerFacade; 22 | } 23 | 24 | if (typeof ngDevMode === 'undefined' || ngDevMode) { 25 | console.error(`JIT compilation failed for ${request.kind}`, request.type); 26 | 27 | let message = `The ${request.kind} '${ 28 | request 29 | .type.name}' needs to be compiled using the JIT compiler, but '@angular/compiler' is not available.\n\n`; 30 | if (request.usage === JitCompilerUsage.PartialDeclaration) { 31 | message += `The ${request.kind} is part of a library that has been partially compiled.\n`; 32 | message += 33 | `However, the Angular Linker has not processed the library such that JIT compilation is used as fallback.\n`; 34 | message += '\n'; 35 | message += 36 | `Ideally, the library is processed using the Angular Linker to become fully AOT compiled.\n`; 37 | } else { 38 | message += 39 | `JIT compilation is discouraged for production use-cases! Consider using AOT mode instead.\n`; 40 | } 41 | message += 42 | `Alternatively, the JIT compiler should be loaded by bootstrapping using '@angular/platform-browser-dynamic' or '@angular/platform-server',\n`; 43 | message += 44 | `or manually provide the compiler with 'import "@angular/compiler";' before bootstrapping.`; 45 | throw new Error(message); 46 | } else { 47 | throw new Error('JIT compiler unavailable'); 48 | } 49 | }"#}; 50 | 51 | let expected = indoc! {r#" 52 | // [TODO] JitCompilerUsage 53 | export const enum JitCompilerUsage { 54 | Decorator, 55 | PartialDeclaration, 56 | } 57 | 58 | // [TODO] JitCompilerUsageRequest 59 | export interface JitCompilerUsageRequest { 60 | usage: JitCompilerUsage; 61 | kind: 'directive'|'component'|'pipe'|'injectable'|'NgModule'; 62 | type: Type; 63 | } 64 | 65 | // [TODO] getCompilerFacade 66 | export function getCompilerFacade(request: JitCompilerUsageRequest): CompilerFacade { 67 | const globalNg: ExportedCompilerFacade = global['ng']; 68 | if (globalNg && globalNg.ɵcompilerFacade) { 69 | return globalNg.ɵcompilerFacade; 70 | } 71 | 72 | if (typeof ngDevMode === 'undefined' || ngDevMode) { 73 | console.error(`JIT compilation failed for ${request.kind}`, request.type); 74 | 75 | let message = `The ${request.kind} '${ 76 | request 77 | .type.name}' needs to be compiled using the JIT compiler, but '@angular/compiler' is not available.\n\n`; 78 | if (request.usage === JitCompilerUsage.PartialDeclaration) { 79 | message += `The ${request.kind} is part of a library that has been partially compiled.\n`; 80 | message += 81 | `However, the Angular Linker has not processed the library such that JIT compilation is used as fallback.\n`; 82 | message += '\n'; 83 | message += 84 | `Ideally, the library is processed using the Angular Linker to become fully AOT compiled.\n`; 85 | } else { 86 | message += 87 | `JIT compilation is discouraged for production use-cases! Consider using AOT mode instead.\n`; 88 | } 89 | message += 90 | `Alternatively, the JIT compiler should be loaded by bootstrapping using '@angular/platform-browser-dynamic' or '@angular/platform-server',\n`; 91 | message += 92 | `or manually provide the compiler with 'import "@angular/compiler";' before bootstrapping.`; 93 | throw new Error(message); 94 | } else { 95 | throw new Error('JIT compiler unavailable'); 96 | } 97 | }"#}; 98 | 99 | assert_analyzed_source_code(source_code, expected, "typescript") 100 | } 101 | 102 | #[test] 103 | #[ignore = "Should not be add TODO comment to above the `export ... from` statement"] 104 | fn test_angular_import_statement() { 105 | let source_code = indoc! {r#" 106 | import {global} from '../util/global'; 107 | import {CompilerFacade, ExportedCompilerFacade, Type} from './compiler_facade_interface'; 108 | export * from './compiler_facade_interface'; 109 | "#}; 110 | 111 | let _should_fix_output = indoc! {r#" 112 | import {global} from '../util/global'; 113 | import {CompilerFacade, ExportedCompilerFacade, Type} from './compiler_facade_interface'; 114 | // [TODO] anonymous 115 | export * from './compiler_facade_interface';"#}; 116 | 117 | assert_analyzed_source_code(source_code, source_code, "typescript") 118 | } 119 | 120 | #[test] 121 | fn test_abstract_class_statement() { 122 | let source_code = indoc! {r#" 123 | export abstract class RendererFactory2 { 124 | abstract createRenderer(hostElement: any, type: RendererType2|null): Renderer2; 125 | abstract begin?(): void; 126 | abstract end?(): void; 127 | abstract whenRenderingDone?(): Promise; 128 | }"#}; 129 | 130 | let exptected = indoc! {r#" 131 | // [TODO] RendererFactory2 132 | export abstract class RendererFactory2 { 133 | abstract createRenderer(hostElement: any, type: RendererType2|null): Renderer2; 134 | abstract begin?(): void; 135 | abstract end?(): void; 136 | abstract whenRenderingDone?(): Promise; 137 | }"#}; 138 | 139 | assert_analyzed_source_code(source_code, exptected, "typescript") 140 | } 141 | 142 | #[test] 143 | fn test_normal_class_statement() { 144 | let source_code = indoc! {r#" 145 | export class TransferState { 146 | static ɵprov = 147 | ɵɵdefineInjectable({ 148 | token: TransferState, 149 | providedIn: 'root', 150 | factory: initTransferState, 151 | }); 152 | 153 | /** @internal */ 154 | store: Record = {}; 155 | 156 | private onSerializeCallbacks: {[k: string]: () => unknown | undefined} = {}; 157 | 158 | /** 159 | * Get the value corresponding to a key. Return `defaultValue` if key is not found. 160 | */ 161 | get(key: StateKey, defaultValue: T): T { 162 | return this.store[key] !== undefined ? this.store[key] as T : defaultValue; 163 | } 164 | 165 | /** 166 | * Set the value corresponding to a key. 167 | */ 168 | set(key: StateKey, value: T): void { 169 | this.store[key] = value; 170 | } 171 | 172 | /** 173 | * Remove a key from the store. 174 | */ 175 | remove(key: StateKey): void { 176 | delete this.store[key]; 177 | } 178 | 179 | /** 180 | * Test whether a key exists in the store. 181 | */ 182 | hasKey(key: StateKey): boolean { 183 | return this.store.hasOwnProperty(key); 184 | } 185 | 186 | /** 187 | * Indicates whether the state is empty. 188 | */ 189 | get isEmpty(): boolean { 190 | return Object.keys(this.store).length === 0; 191 | } 192 | 193 | /** 194 | * Register a callback to provide the value for a key when `toJson` is called. 195 | */ 196 | onSerialize(key: StateKey, callback: () => T): void { 197 | this.onSerializeCallbacks[key] = callback; 198 | } 199 | }"#}; 200 | 201 | let expected = indoc! {r#" 202 | // [TODO] TransferState 203 | export class TransferState { 204 | static ɵprov = 205 | ɵɵdefineInjectable({ 206 | token: TransferState, 207 | providedIn: 'root', 208 | factory: initTransferState, 209 | }); 210 | 211 | /** @internal */ 212 | store: Record = {}; 213 | 214 | private onSerializeCallbacks: {[k: string]: () => unknown | undefined} = {}; 215 | 216 | /** 217 | * Get the value corresponding to a key. Return `defaultValue` if key is not found. 218 | */ 219 | get(key: StateKey, defaultValue: T): T { 220 | return this.store[key] !== undefined ? this.store[key] as T : defaultValue; 221 | } 222 | 223 | /** 224 | * Set the value corresponding to a key. 225 | */ 226 | set(key: StateKey, value: T): void { 227 | this.store[key] = value; 228 | } 229 | 230 | /** 231 | * Remove a key from the store. 232 | */ 233 | remove(key: StateKey): void { 234 | delete this.store[key]; 235 | } 236 | 237 | /** 238 | * Test whether a key exists in the store. 239 | */ 240 | hasKey(key: StateKey): boolean { 241 | return this.store.hasOwnProperty(key); 242 | } 243 | 244 | /** 245 | * Indicates whether the state is empty. 246 | */ 247 | get isEmpty(): boolean { 248 | return Object.keys(this.store).length === 0; 249 | } 250 | 251 | /** 252 | * Register a callback to provide the value for a key when `toJson` is called. 253 | */ 254 | onSerialize(key: StateKey, callback: () => T): void { 255 | this.onSerializeCallbacks[key] = callback; 256 | } 257 | }"#}; 258 | 259 | assert_analyzed_source_code(source_code, expected, "typescript") 260 | } 261 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/typescript_test/async_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_async_function_expression() { 6 | let source_code = indoc! {r#" 7 | async function foo (){ 8 | const dddd = await asyncBusby(22); 9 | console.log(dddd); 10 | }"#}; 11 | 12 | let expected = indoc! {r#" 13 | // [TODO] foo 14 | async function foo (){ 15 | const dddd = await asyncBusby(22); 16 | console.log(dddd); 17 | }"#}; 18 | 19 | assert_analyzed_source_code(source_code, expected, "typescript") 20 | } 21 | 22 | #[test] 23 | #[ignore = "TODO: Support arrow function"] 24 | fn test_async_arrow_function() { 25 | let source_code = indoc! {r#" 26 | const foo = async () => { 27 | const dddd = await asyncBusby(22); 28 | console.log(dddd); 29 | }"#}; 30 | 31 | let expected = indoc! {r#" 32 | // [TODO] foo 33 | const foo = async () => { 34 | const dddd = await asyncBusby(22); 35 | console.log(dddd); 36 | }"#}; 37 | 38 | assert_analyzed_source_code(source_code, expected, "typescript") 39 | } 40 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/typescript_test/svelt_case_test.rs: -------------------------------------------------------------------------------- 1 | use indoc::indoc; 2 | 3 | use crate::integration_test::assert_analyzed_source_code; 4 | 5 | #[test] 6 | fn test_interface_and_type_extends_with_exports() { 7 | let source_code = indoc! {r#" 8 | interface BaseNode { 9 | start: number; 10 | end: number; 11 | type: string; 12 | children?: TemplateNode[]; 13 | [prop_name: string]: any; 14 | } 15 | 16 | export type DirectiveType = 17 | | 'Action' 18 | | 'Animation' 19 | | 'Binding' 20 | | 'Class' 21 | | 'StyleDirective' 22 | | 'EventHandler' 23 | | 'Let' 24 | | 'Ref' 25 | | 'Transition'; 26 | 27 | export interface BaseDirective extends BaseNode { 28 | type: DirectiveType; 29 | name: string; 30 | }"#}; 31 | 32 | let expected = indoc! {r#" 33 | // [TODO] BaseNode 34 | interface BaseNode { 35 | start: number; 36 | end: number; 37 | type: string; 38 | children?: TemplateNode[]; 39 | [prop_name: string]: any; 40 | } 41 | 42 | // [TODO] DirectiveType 43 | export type DirectiveType = 44 | | 'Action' 45 | | 'Animation' 46 | | 'Binding' 47 | | 'Class' 48 | | 'StyleDirective' 49 | | 'EventHandler' 50 | | 'Let' 51 | | 'Ref' 52 | | 'Transition'; 53 | 54 | // [TODO] BaseDirective 55 | export interface BaseDirective extends BaseNode { 56 | type: DirectiveType; 57 | name: string; 58 | }"#}; 59 | 60 | assert_analyzed_source_code(source_code, expected, "typescript") 61 | } 62 | -------------------------------------------------------------------------------- /tests/integration_test/analyze_command_test/typescript_test/typescript_case_test.rs: -------------------------------------------------------------------------------- 1 | use crate::integration_test::assert_analyzed_source_code; 2 | use indoc::indoc; 3 | 4 | #[test] 5 | fn test_typescript_export_functions() { 6 | let source_code = indoc! {r#" 7 | export function parseBindingIdentifier(privateIdentifierDiagnosticMessage?: DiagnosticMessage) { 8 | return createIdentifier(isBindingIdentifier(), /*diagnosticMessage*/ undefined, privateIdentifierDiagnosticMessage); 9 | } 10 | export function parseIdentifier(diagnosticMessage?: DiagnosticMessage, privateIdentifierDiagnosticMessage?: DiagnosticMessage): Identifier { 11 | return createIdentifier(isIdentifier(), diagnosticMessage, privateIdentifierDiagnosticMessage); 12 | } 13 | export function parseIdentifierName(diagnosticMessage?: DiagnosticMessage): Identifier { 14 | return createIdentifier(tokenIsIdentifierOrKeyword(token()), diagnosticMessage); 15 | }"#}; 16 | 17 | let expected = indoc! {r#" 18 | // [TODO] parseBindingIdentifier 19 | export function parseBindingIdentifier(privateIdentifierDiagnosticMessage?: DiagnosticMessage) { 20 | return createIdentifier(isBindingIdentifier(), /*diagnosticMessage*/ undefined, privateIdentifierDiagnosticMessage); 21 | } 22 | // [TODO] parseIdentifier 23 | export function parseIdentifier(diagnosticMessage?: DiagnosticMessage, privateIdentifierDiagnosticMessage?: DiagnosticMessage): Identifier { 24 | return createIdentifier(isIdentifier(), diagnosticMessage, privateIdentifierDiagnosticMessage); 25 | } 26 | // [TODO] parseIdentifierName 27 | export function parseIdentifierName(diagnosticMessage?: DiagnosticMessage): Identifier { 28 | return createIdentifier(tokenIsIdentifierOrKeyword(token()), diagnosticMessage); 29 | }"#}; 30 | 31 | assert_analyzed_source_code(source_code, expected, "typescript") 32 | } 33 | 34 | #[test] 35 | fn test_typescript_functions() { 36 | let source_code = indoc! {r#" 37 | function setContextFlag(val: boolean, flag: NodeFlags) { 38 | if (val) { 39 | contextFlags |= flag; 40 | } 41 | else { 42 | contextFlags &= ~flag; 43 | } 44 | } 45 | 46 | function setDisallowInContext(val: boolean) { 47 | setContextFlag(val, NodeFlags.DisallowInContext); 48 | } 49 | 50 | function setYieldContext(val: boolean) { 51 | setContextFlag(val, NodeFlags.YieldContext); 52 | } 53 | 54 | function setDecoratorContext(val: boolean) { 55 | setContextFlag(val, NodeFlags.DecoratorContext); 56 | } 57 | 58 | function setAwaitContext(val: boolean) { 59 | setContextFlag(val, NodeFlags.AwaitContext); 60 | }"#}; 61 | 62 | let expected = indoc! {r#" 63 | // [TODO] setContextFlag 64 | function setContextFlag(val: boolean, flag: NodeFlags) { 65 | if (val) { 66 | contextFlags |= flag; 67 | } 68 | else { 69 | contextFlags &= ~flag; 70 | } 71 | } 72 | 73 | // [TODO] setDisallowInContext 74 | function setDisallowInContext(val: boolean) { 75 | setContextFlag(val, NodeFlags.DisallowInContext); 76 | } 77 | 78 | // [TODO] setYieldContext 79 | function setYieldContext(val: boolean) { 80 | setContextFlag(val, NodeFlags.YieldContext); 81 | } 82 | 83 | // [TODO] setDecoratorContext 84 | function setDecoratorContext(val: boolean) { 85 | setContextFlag(val, NodeFlags.DecoratorContext); 86 | } 87 | 88 | // [TODO] setAwaitContext 89 | function setAwaitContext(val: boolean) { 90 | setContextFlag(val, NodeFlags.AwaitContext); 91 | }"#}; 92 | 93 | assert_analyzed_source_code(source_code, expected, "typescript") 94 | } 95 | -------------------------------------------------------------------------------- /tests/pattern_search_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod pattern_search_test { 3 | #[test] 4 | fn test_search_multiple_pattern() { 5 | use balpan::commands::pattern_search::PatternTree; 6 | 7 | let searcher = PatternTree::new(); 8 | 9 | let text = "ABAAABCDABCDABABCD"; 10 | let patterns = vec!["ABCD".to_string(), "BCD".to_string()]; 11 | 12 | let expected = (true, vec![4, 8, 14]); 13 | assert_eq!(searcher.aho_corasick_search(text, &patterns), expected); 14 | } 15 | 16 | #[test] 17 | fn test_search_todo_done_comments_using_aho_corasick() { 18 | use balpan::commands::pattern_search::PatternTree; 19 | 20 | let searcher = PatternTree::new(); 21 | let text = r#" 22 | //[TODO] ABC 23 | //some comment 24 | //struct ABC { 25 | // field: i32, 26 | // field2: i32, 27 | //} 28 | // 29 | //[DONE] DEF 30 | //some comment about DEF 31 | //fn DEF() { 32 | // unimplemented!(); 33 | //} 34 | //"#; 35 | 36 | let patterns = vec!["[TODO]".to_string(), "[DONE]".to_string()]; 37 | let expected = (true, vec![11, 154]); 38 | 39 | let result = searcher.aho_corasick_search(text, &patterns); 40 | 41 | assert_eq!(result, expected); 42 | } 43 | 44 | #[test] 45 | fn test_selective_search() { 46 | use balpan::commands::pattern_search::PatternTree; 47 | 48 | let searcher = PatternTree::new(); 49 | let text = r#" 50 | //[TODO] ABC 51 | //some comment 52 | //struct ABC { 53 | // field: i32, 54 | // field2: i32, 55 | //} 56 | // 57 | //[TODO] DEF 58 | //some comment about DEF 59 | //fn DEF() { 60 | // unimplemented!(); 61 | //} 62 | //"#; 63 | 64 | let pattern = vec!["[TODO]".to_string()]; 65 | let expected = (true, vec![11, 154]); 66 | 67 | let result = searcher.selective_search(&pattern, text); 68 | 69 | assert_eq!(result, expected); 70 | } 71 | } 72 | 73 | #[cfg(test)] 74 | mod boyer_moore_tests { 75 | use balpan::commands::boyer_moore::SearchIn; 76 | 77 | #[test] 78 | fn test_find_pending_character_index() { 79 | use balpan::commands::boyer_moore::find_pending_character_index; 80 | 81 | let chars = vec!['A', 'B', 'C', 'B', 'D']; 82 | let start = 1; 83 | let pattern = &'B'; 84 | 85 | let result = find_pending_character_index(&chars, start, pattern); 86 | 87 | assert_eq!(2, result); 88 | } 89 | 90 | #[test] 91 | fn test_suffix_table() { 92 | use balpan::commands::boyer_moore::get_suffix_table; 93 | 94 | let text = "GCAGAGAG".as_bytes(); 95 | 96 | let table = get_suffix_table(&text); 97 | let expected = vec![1, 0, 0, 2, 0, 4, 0, 0]; 98 | 99 | assert_eq!(table, expected); 100 | 101 | let text = "abcbabcabab".as_bytes(); 102 | 103 | let table = get_suffix_table(&text); 104 | let expected = vec![0, 2, 0, 1, 0, 3, 0, 0, 2, 0, 0]; 105 | 106 | assert_eq!(table, expected); 107 | } 108 | 109 | #[test] 110 | fn test_build_suffix_table() { 111 | use balpan::commands::boyer_moore::build_suffixes_table; 112 | 113 | let pattern = "GCAGAGAG".as_bytes(); 114 | 115 | let table = build_suffixes_table(&pattern); 116 | let expected = vec![7, 7, 7, 2, 7, 4, 7, 1]; 117 | 118 | assert_eq!(table, expected); 119 | 120 | let pattern = "abcbabcabab".as_bytes(); 121 | 122 | let table = build_suffixes_table(&pattern); 123 | let expected = vec![10, 10, 10, 10, 10, 10, 10, 5, 2, 7, 1]; 124 | 125 | assert_eq!(table, expected); 126 | } 127 | 128 | #[test] 129 | fn test_find_first_occurrence() { 130 | use balpan::commands::boyer_moore::BoyerMooreSearch; 131 | 132 | let searcher = BoyerMooreSearch::new(b"abc"); 133 | let text = "abababc"; 134 | 135 | assert_eq!(Some(4), searcher.find_first_position(text.as_bytes())); 136 | } 137 | 138 | #[test] 139 | fn test_overlapping() { 140 | use balpan::commands::boyer_moore::BoyerMooreSearch; 141 | 142 | let searcher = BoyerMooreSearch::new(b"aaba"); 143 | let text = b"aabaabaaba"; 144 | let result = searcher.find_overlapping_in(text).collect::>(); 145 | 146 | assert_eq!(vec![0, 3, 6], result); 147 | } 148 | 149 | #[test] 150 | fn test_no_pattern_found() { 151 | use balpan::commands::boyer_moore::BoyerMooreSearch; 152 | 153 | let searcher = BoyerMooreSearch::new(b"abc"); 154 | let text = "ababab"; 155 | 156 | assert_eq!(None, searcher.find_first_position(text.as_bytes())); 157 | } 158 | 159 | #[test] 160 | fn test_find_patterns_in_source_code() { 161 | use balpan::commands::boyer_moore::BoyerMooreSearch; 162 | 163 | let source = r#" 164 | //[TODO] main 165 | //comment for main 166 | fn main() { 167 | println!("Hello, world!"); 168 | } 169 | 170 | pub trait Foo<'a, T> { 171 | fn foo(&'a self) -> None; 172 | fn foo2(&'a self) -> bool; 173 | } 174 | 175 | impl <'a, T> Foo<'a, T> for Foo { 176 | fn foo(&'a self) -> None { 177 | None 178 | } 179 | 180 | fn foo2(&'a self) -> bool { 181 | true 182 | } 183 | } 184 | "# 185 | .as_bytes(); 186 | 187 | let searcher = BoyerMooreSearch::new(b"fn"); 188 | let result = searcher.find_in(source).collect::>(); 189 | 190 | assert_eq!(vec![58, 163, 201, 293, 368], result); 191 | } 192 | 193 | #[test] 194 | fn test_search_word() { 195 | use balpan::commands::boyer_moore::BoyerMooreSearch; 196 | 197 | let text = " 198 | MALCOM. 199 | 'Tis call'd the evil: 200 | A most miraculous work in this good king; 201 | Which often, since my here-remain in England, 202 | I have seen him do. How he solicits heaven, 203 | Himself best knows, but strangely-visited people, 204 | All swoln and ulcerous, pitiful to the eye, 205 | The mere despair of surgery, he cures; 206 | Hanging a golden stamp about their necks, 207 | Put on with holy prayers: and 'tis spoken, 208 | To the succeeding royalty he leaves 209 | The healing benediction. With this strange virtue, 210 | He hath a heavenly gift of prophecy; 211 | And sundry blessings hang about his throne, 212 | That speak him full of grace. 213 | 214 | MACDUFF. 215 | See, who comes here? 216 | 217 | MALCOLM. 218 | My countryman; but yet I know him not. 219 | 220 | MACDUFF. 221 | My ever-gentle cousin, welcome hither. 222 | 223 | MALCOLM. 224 | I know him now. Good God, betimes remove 225 | The means that makes us strangers! 226 | 227 | ROSS. 228 | Sir, amen. 229 | 230 | MACDUFF. 231 | Stands Scotland where it did? 232 | 233 | ROSS. 234 | Alas, poor country, 235 | Almost afraid to know itself! It cannot 236 | Be call'd our mother, but our grave, where nothing, 237 | But who knows nothing, is once seen to smile; 238 | Where sighs, and groans, and shrieks, that rent the air, 239 | Are made, not mark'd; where violent sorrow seems 240 | A modern ecstasy. The dead man's knell 241 | Is there scarce ask'd for who; and good men's lives 242 | Expire before the flowers in their caps, 243 | Dying or ere they sicken. 244 | 245 | MACDUFF. 246 | O, relation 247 | Too nice, and yet too true! 248 | 249 | MALCOLM. 250 | What’s the newest grief? 251 | 252 | ROSS. 253 | That of an hour’s age doth hiss the speaker; 254 | Each minute teems a new one. 255 | 256 | MACDUFF. 257 | How does my wife? 258 | 259 | ROSS. 260 | Why, well. 261 | 262 | MACDUFF. 263 | And all my children? 264 | 265 | ROSS. 266 | Well too. 267 | 268 | MACDUFF. 269 | The tyrant has not batter’d at their peace? 270 | 271 | ROSS. 272 | No; they were well at peace when I did leave ’em. 273 | 274 | MACDUFF. 275 | Be not a niggard of your speech: how goes’t? 276 | 277 | ROSS. 278 | When I came hither to transport the tidings, 279 | Which I have heavily borne, there ran a rumour 280 | Of many worthy fellows that were out; 281 | Which was to my belief witness’d the rather, 282 | For that I saw the tyrant’s power afoot. 283 | Now is the time of help. Your eye in Scotland 284 | Would create soldiers, make our women fight, 285 | To doff their dire distresses. 286 | 287 | MALCOLM. 288 | Be’t their comfort 289 | We are coming thither. Gracious England hath 290 | Lent us good Siward and ten thousand men; 291 | An older and a better soldier none 292 | That Christendom gives out. 293 | 294 | ROSS. 295 | Would I could answer 296 | This comfort with the like! But I have words 297 | That would be howl’d out in the desert air, 298 | Where hearing should not latch them. 299 | 300 | MACDUFF. 301 | What concern they? 302 | The general cause? or is it a fee-grief 303 | Due to some single breast? 304 | 305 | ROSS. 306 | No mind that’s honest 307 | But in it shares some woe, though the main part 308 | Pertains to you alone. 309 | 310 | MACDUFF. 311 | If it be mine, 312 | Keep it not from me, quickly let me have it. 313 | 314 | ROSS. 315 | Let not your ears despise my tongue for ever, 316 | Which shall possess them with the heaviest sound 317 | That ever yet they heard. 318 | 319 | MACDUFF. 320 | Humh! I guess at it. 321 | 322 | ROSS. 323 | Your castle is surpris’d; your wife and babes 324 | Savagely slaughter’d. To relate the manner 325 | Were, on the quarry of these murder’d deer, 326 | To add the death of you. 327 | 328 | MALCOLM. 329 | Merciful heaven!— 330 | What, man! ne’er pull your hat upon your brows. 331 | Give sorrow words. The grief that does not speak 332 | Whispers the o’er-fraught heart, and bids it break. 333 | 334 | MACDUFF. 335 | My children too? 336 | 337 | ROSS. 338 | Wife, children, servants, all 339 | That could be found. 340 | 341 | MACDUFF. 342 | And I must be from thence! 343 | My wife kill’d too? 344 | 345 | ROSS. 346 | I have said." 347 | .as_bytes(); 348 | 349 | let searcher = BoyerMooreSearch::new(b"MALCOM"); 350 | let first_occurrence = searcher.find_first_position(text); 351 | assert_eq!(Some(9), first_occurrence); 352 | 353 | let searcher = BoyerMooreSearch::new(b"MACDUFF"); 354 | let find_all = searcher.find_in(text).collect::>(); 355 | let expected = vec![ 356 | 716, 844, 1077, 1667, 1925, 2019, 2115, 2278, 3229, 3507, 3777, 4282, 4423, 357 | ]; 358 | assert_eq!(expected, find_all); 359 | } 360 | 361 | #[test] 362 | fn test_is_work_for_non_alphabet() { 363 | use balpan::commands::boyer_moore::BoyerMooreSearch; 364 | 365 | let pattern = "🦀🦀🐪🔥🐍✅".as_bytes(); 366 | let searcher = BoyerMooreSearch::new(pattern); 367 | let text = "🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅"; 368 | 369 | let result = searcher.find_in(text.as_bytes()).collect::>(); 370 | assert_eq!(vec![0, 23, 46, 69, 92], result); 371 | } 372 | } 373 | -------------------------------------------------------------------------------- /tests/tree_sitter_extended_test.rs: -------------------------------------------------------------------------------- 1 | #[cfg(test)] 2 | mod tree_sitter_extended_tests { 3 | use balpan::tree_sitter_extended::MembershipCheck; 4 | use tree_sitter::{Point, Range}; 5 | 6 | #[test] 7 | fn test_out_of_membership() { 8 | let cursor = Point { row: 2, column: 10 }; 9 | 10 | let function_scope = Range { 11 | start_byte: 0, 12 | end_byte: 0, 13 | start_point: Point { row: 4, column: 2 }, 14 | end_point: Point { row: 10, column: 2 }, 15 | }; 16 | 17 | assert!(cursor.is_before(function_scope)); 18 | assert!(!cursor.is_member_of(function_scope)); 19 | } 20 | 21 | #[test] 22 | fn test_membership_with_inline_code() { 23 | let cursor = Point { row: 2, column: 10 }; 24 | 25 | let inlined_scope = Range { 26 | start_byte: 0, 27 | end_byte: 0, 28 | start_point: Point { row: 2, column: 5 }, 29 | end_point: Point { row: 2, column: 30 }, 30 | }; 31 | 32 | assert!(cursor.is_member_of(inlined_scope)); 33 | } 34 | 35 | #[test] 36 | fn test_cursor_is_pointing_the_boundary_of_range() { 37 | let cursor_with_pointing_start = Point { row: 2, column: 2 }; 38 | 39 | let cursor_with_pointing_end = Point { row: 30, column: 2 }; 40 | 41 | let function_scope = Range { 42 | start_byte: 0, 43 | end_byte: 0, 44 | start_point: Point { row: 2, column: 2 }, 45 | end_point: Point { row: 30, column: 2 }, 46 | }; 47 | 48 | assert!(!cursor_with_pointing_start.is_before(function_scope)); 49 | assert!(cursor_with_pointing_start.is_member_of(function_scope)); 50 | assert!(cursor_with_pointing_end.is_member_of(function_scope)); 51 | assert!(!cursor_with_pointing_end.is_after(function_scope)); 52 | } 53 | 54 | #[test] 55 | fn test_cursor_is_pointing_outside_of_boundary() { 56 | let left_of_start_point = Point { row: 2, column: 1 }; 57 | 58 | let right_of_end_point = Point { row: 30, column: 3 }; 59 | 60 | let function_scope = Range { 61 | start_byte: 0, 62 | end_byte: 0, 63 | start_point: Point { row: 2, column: 2 }, 64 | end_point: Point { row: 30, column: 2 }, 65 | }; 66 | 67 | assert!(left_of_start_point.is_before(function_scope)); 68 | assert!(!left_of_start_point.is_member_of(function_scope)); 69 | assert!(!right_of_end_point.is_member_of(function_scope)); 70 | assert!(right_of_end_point.is_after(function_scope)); 71 | } 72 | } 73 | --------------------------------------------------------------------------------