├── .github
├── ISSUE_TEMPLATE.md
├── ISSUE_TEMPLATE
│ ├── bug.md
│ └── feature_request.md
├── PULL_REQUEST_TEMPLATE.md
└── workflows
│ └── rust.yml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── README.ko.md
├── README.md
├── assets
├── balpan-init-demo.gif
└── vhs
│ └── demo-balpan-init.tape
├── languages.toml
├── src
├── analyzer.rs
├── commands
│ ├── boyer_moore.rs
│ ├── grep.rs
│ ├── mod.rs
│ └── pattern_search.rs
├── config.rs
├── grammar.rs
├── language.rs
├── lib.rs
├── main.rs
├── scanner.rs
├── tokens.rs
├── tree_sitter_extended.rs
└── utils.rs
└── tests
├── analyzer_test.rs
├── analyzer_test
└── analyze_test.rs
├── integration_test.rs
├── integration_test
├── analyze_command_test.rs
└── analyze_command_test
│ ├── c_test.rs
│ ├── c_test
│ ├── neovim_case_test.rs
│ ├── nginx_case_test.rs
│ └── redis_case_test.rs
│ ├── cpp_test.rs
│ ├── cpp_test
│ └── blazingmq_case_test.rs
│ ├── javascript_test.rs
│ ├── javascript_test
│ ├── react_native_case_test.rs
│ └── svelt_cast_test.rs
│ ├── python_test.rs
│ ├── python_test
│ ├── django_case_test.rs
│ ├── python_dependency_injector_case_test.rs
│ └── rustpython_case_test.rs
│ ├── ruby_test.rs
│ ├── ruby_test
│ └── mastodon_case_test.rs
│ ├── rust_test.rs
│ ├── rust_test
│ ├── anyhow_case_test.rs
│ ├── rustpython_case_test.rs
│ └── serde_case_test.rs
│ ├── typescript_test.rs
│ └── typescript_test
│ ├── angular_case_test.rs
│ ├── async_case_test.rs
│ ├── svelt_case_test.rs
│ └── typescript_case_test.rs
├── pattern_search_test.rs
└── tree_sitter_extended_test.rs
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
2 | Please follow this guide when creating a new issue. This speeds up the process of replicating the issue and finding a solution.
3 |
4 | ### Environment details
5 |
6 | * Operation system:
7 | * Language:
8 |
9 | ### Expected behavior
10 |
11 | ### Actual behavior
12 |
13 | ### Steps to reproduce the behavior
14 |
15 | ### Additional details
16 |
17 | * screenshots or screencapture
18 |
19 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug
3 | about: Report a problem
4 | title: ''
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | * **OS version**:
11 | * **Language**:
12 |
13 | ### 🐛 Describe the bug
14 |
15 |
16 |
17 | ### Expected behaviour
18 |
19 |
20 |
21 | ### Steps to reproduce
22 |
23 |
24 |
25 | Steps to reproduce the behaviour:
26 | 1. Go to ...
27 | 2. ...
28 |
29 | ### Other details or context
30 |
31 |
32 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest a new feature.
4 | title: Add ...
5 | labels: enhancement
6 | assignees: ''
7 |
8 | ---
9 |
10 |
11 | ### Describe the new feature
12 |
13 |
14 |
15 | ### How does this help you?
16 |
17 |
18 |
19 | ### If we couldn't add this feature, is there a compromise you can think of?
20 |
21 |
22 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ## :star2: What does this PR do?
2 |
3 |
4 |
5 | ## :bug: Recommendations for testing
6 |
7 |
8 |
9 | ## :memo: Links to relevant issues or information
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
1 | name: Rust
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | env:
12 | CARGO_TERM_COLOR: always
13 |
14 | jobs:
15 | build:
16 |
17 | runs-on: ubuntu-latest
18 |
19 | steps:
20 | - uses: actions/checkout@v3
21 |
22 | - name: Set up Rust
23 | uses: actions-rs/toolchain@v1
24 | with:
25 | toolchain: stable
26 |
27 | - name: Build
28 | run: cargo build --verbose
29 |
30 | - name: Run tests
31 | run: cargo test --verbose
32 |
33 | - name: Run Clippy
34 | run: cargo clippy -- -D warnings
35 |
36 | - name: Run fmt
37 | run: cargo fmt --all -- --check
38 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | *.svg
3 | .DS_Store
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "balpan"
3 | version = "0.2.0"
4 | edition = "2021"
5 |
6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7 |
8 | [dependencies]
9 | aho-corasick = "1.0.4"
10 | anyhow = "1.0.71"
11 | cc = "1.0.79"
12 | clap = { version = "4.3.21", features = ["derive"] }
13 | etcetera = "0.8.0"
14 | git2 = "0.17.2"
15 | glob = "0.3.1"
16 | ignore = "0.4.20"
17 | indoc = "2.0.3"
18 | libloading = "0.8.0"
19 | log = "0.4.18"
20 | once_cell = "1.18.0"
21 | regex = "1.9.5"
22 | serde = { version = "1.0", features = ["derive"] }
23 | serde_json = "1.0.104"
24 | strsim = "0.10.0"
25 | tempfile = "3.8.0"
26 | tokio = { version = "1.32.0", features = ["full"] }
27 | toml = "0.7.4"
28 | tree-sitter = "0.20.10"
29 |
30 | [lib]
31 | doctest = false
32 |
--------------------------------------------------------------------------------
/README.ko.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
Balpan CLI
6 | 오픈소스 생태계에 기여하고자 하는 사람들의 온보딩을 돕는 "발판"
7 | 오픈소스 프로젝트의 가독성을 높이고, 누구나 기여할 수 있도록 하자
8 |
9 | [](https://www.rust-lang.org/)
10 | 
11 |
12 |
13 |
14 | ## Table of Contents
15 |
16 | - [Introduction](#introduction)
17 | - [Installation](#installation)
18 | - [Requirements](#requirements)
19 | - [Install using homebrew](#brew)
20 | - [Install using cargo](#cargo)
21 | - [Quickstart](#quickstart)
22 | - [Features](#features)
23 | - [Supported Language](#supported-languages)
24 | - [`balpan init`](#balpan-init)
25 |
26 | ## Introduction
27 |
28 | **balpan**은 오픈소스 생태계에 기여하고자 하는 사람들의 온보딩을 돕는 **발판** 이라는 의미로 시작했습니다. ([🔗](https://m.khan.co.kr/national/national-general/article/202109152114035#c2b))
29 |
30 | **balpan**은 [treesitter](https://tree-sitter.github.io)를 이용해서 소스코드를 트리 구조로 분석하여 시각화하고, 능동적으로 소스코드를 읽는 사람들에게는 좀 더 진입장벽을 낮추는 것을 목표로 합니다.
31 |
32 | 책을 읽을 때도 줄을 치면서 읽듯이, 소스코드를 읽을때도 역시 책갈피 관리하듯이 읽을 수 있도록 하는 것부터 시작합니다.
33 |
34 | ### Disclaimer
35 |
36 | * 트리시터를 설치하는 문제를 부분적으로 해결하기 위해 [helix](https://github.com/helix-editor/helix) 소스코드의 일부를 사용했습니다.
37 | * 관련 코드: [config.rs](https://github.com/malkoG/balpan/blob/main/src/config.rs), [grammar.rs](https://github.com/malkoG/balpan/blob/main/src/grammar.rs), [lib.rs](https://github.com/malkoG/balpan/blob/main/src/lib.rs)
38 |
39 | ## Installation
40 |
41 | ### Requirements
42 |
43 | - OS: Linux/macOS
44 | - Cargo (cargo를 이용해서 설치하는 경우)
45 |
46 | ### Install using homebrew
47 |
48 | ```bash
49 | $ brew install malkoG/x/balpan
50 | ```
51 | * ⚠️ 당장은 homebrew brew를 이용해서 설치하는 경우 `0.1.1` 릴리즈만 설치될 수 있습니다.
52 | * 릴리즈를 출시할때마다 homebrew에 배포하는 과정을 자동화하는 방법은 알아보고 있는 중입니다.
53 |
54 | ### Install using cargo
55 |
56 | ```bash
57 | $ cargo install --path .
58 | ```
59 |
60 | ### Quickstart
61 |
62 | **balpan**의 모든 명령어들은 소스코드를 트리구조의 형태로 분석하기 위해 treesitter 기반으로 생성된 파서를 이용합니다.
63 | **balpan**의 각 명령어를 사용하기 전에 분석하고자 하는 리포지토리의 홈 디텍토리에서 아래의 명령어를 실행해주세요.
64 |
65 | ```bash
66 | $ balpan init
67 | ```
68 |
69 | ## Features
70 |
71 | ### Supported Languages
72 |
73 | 당장은 지원하는 언어가 많이 없지만, 트리시터가 지원하는 언어라면 모두 지원할 수 있도록 하는 것을 지향합니다.
74 |
75 | - Rust
76 | - Python
77 | - Ruby
78 |
79 | ### `balpan init`
80 |
81 | 
82 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | * [한국어](./README.ko.md)
3 |
4 | # IN CONSTRUCTION 🚧
5 |
6 |
--------------------------------------------------------------------------------
/assets/balpan-init-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/balpan-rs/balpan/f1bd990b041f005116585913d6b92250e388cf15/assets/balpan-init-demo.gif
--------------------------------------------------------------------------------
/assets/vhs/demo-balpan-init.tape:
--------------------------------------------------------------------------------
1 | Output assets/balpan-init-demo.gif
2 |
3 | Set FontSize 15
4 | Set Width 1200
5 | Set Height 600
6 |
7 | Type "# This is demo animation for `balpan init`"
8 | Sleep 300ms
9 | Enter
10 |
11 | Type ""
12 | Sleep 300ms
13 | Enter
14 |
15 |
16 | Type "balpan init # Automatically generates TODO comments"
17 | Sleep 300ms
18 | Enter
19 |
20 | Type "rg ' \[TODO\] ' src"
21 | Sleep 300ms
22 | Enter
23 |
24 | Sleep 8s
25 |
--------------------------------------------------------------------------------
/src/analyzer.rs:
--------------------------------------------------------------------------------
1 | use std::cell::RefCell;
2 | use std::collections::VecDeque;
3 |
4 | use tree_sitter::{Node, Parser, Point, Range, Tree};
5 |
6 | use crate::grammar::get_language;
7 | use crate::language::Language;
8 | use crate::tokens::CommentToken;
9 | use crate::tree_sitter_extended::{MembershipCheck, RangeFactory, ResolveSymbol};
10 |
11 | pub struct Analyzer {
12 | pub source_code: String,
13 | pub language: Language,
14 | }
15 |
16 | impl<'tree> Analyzer {
17 | fn get_indent_comment_pool(&self) -> Vec {
18 | let comment_token = CommentToken::from_language(&self.language);
19 | let comment = comment_token.to_str();
20 | let ident = match self.language {
21 | Language::Ruby => " ",
22 | _ => " ",
23 | };
24 | let max_ident_level = 100;
25 |
26 | (0..max_ident_level)
27 | .map(|level| {
28 | let indent = ident.repeat(level);
29 | format!("{}{}", indent, comment)
30 | })
31 | .collect()
32 | }
33 |
34 | fn get_syntax_tree(&self) -> Tree {
35 | let parser = RefCell::new(Parser::new());
36 | let language = get_language(self.language.as_str()).unwrap();
37 |
38 | let mut ts_parser = parser.borrow_mut();
39 | ts_parser
40 | .set_language(language)
41 | .expect("treesitter parser for given language does not exists");
42 |
43 | let tree = ts_parser.parse(&self.source_code, None);
44 |
45 | tree.expect("Failed to parsing given source code")
46 | }
47 |
48 | pub fn analyze(&self) -> VecDeque {
49 | let tree = self.get_syntax_tree();
50 | let nodes = self.get_scannable_nodes(&tree);
51 |
52 | let ignorable_node_types = self.language.ignorable_node_types();
53 |
54 | let nested_traversable_symbols = self.language.nested_traversable_symbols();
55 |
56 | let mut writer_queue = VecDeque::new();
57 | let mut pending_queue = VecDeque::new();
58 | let mut nodes_queue = VecDeque::from(nodes);
59 | let mut indentation_context: VecDeque<(Node, String)> = VecDeque::new();
60 | let indent_comment_pool = self.get_indent_comment_pool();
61 | let mut latest_comment_line = "";
62 | let mut latest_comment_line_index = -1_isize;
63 |
64 | let mut lines = vec![];
65 | for line in self.source_code.lines() {
66 | lines.push(line.to_string());
67 | }
68 |
69 | for (i, line) in lines.iter().enumerate() {
70 | let row = i;
71 | let line_idx = i as isize;
72 | let column = line.len();
73 |
74 | let cursor_position = Point { row, column };
75 |
76 | if nodes_queue.is_empty() {
77 | writer_queue.push_back(line.to_owned());
78 | continue;
79 | }
80 |
81 | let (current_node, (row, from, to)) = match nodes_queue.front() {
82 | Some(item) => item,
83 | None => panic!("Failed to retrieve treesitter node from queue"),
84 | };
85 |
86 | let mut symbol_name_with_context = String::new();
87 |
88 | let mut pop_node = false;
89 |
90 | match Range::from_node(*current_node) {
91 | node_range if cursor_position.is_member_of(node_range) => {
92 | let node_type = current_node.kind();
93 |
94 | // rust specific code
95 | if node_type == "mod_item"
96 | && node_range.start_point.row == node_range.end_point.row
97 | {
98 | while !pending_queue.is_empty() {
99 | let decorator_line: &str = pending_queue.pop_front().unwrap();
100 | writer_queue.push_back(decorator_line.to_owned());
101 | }
102 | writer_queue.push_back(line.to_owned());
103 | nodes_queue.pop_front();
104 | continue;
105 | }
106 |
107 | if ignorable_node_types.contains(&node_type) {
108 | while !pending_queue.is_empty() {
109 | let decorator_line: &str = pending_queue.pop_front().unwrap();
110 | writer_queue.push_back(decorator_line.to_owned());
111 | }
112 | writer_queue.push_back(line.to_owned());
113 | nodes_queue.pop_front();
114 | continue;
115 | }
116 |
117 | if node_type == self.language.decorator_node_type() {
118 | pending_queue.push_back(line);
119 | } else {
120 | for (_node, node_symbol) in indentation_context.iter() {
121 | symbol_name_with_context
122 | .push_str(&format!("{} > ", node_symbol).to_string());
123 | }
124 |
125 | let node_symbol_with_indent = &lines[*row];
126 | let node_symbol = &node_symbol_with_indent[from.to_owned()..to.to_owned()];
127 |
128 | if *from == 0 && *to == 0 {
129 | symbol_name_with_context.push_str("anonymous");
130 | } else {
131 | symbol_name_with_context.push_str(node_symbol);
132 | }
133 |
134 | let indent_size = indentation_context.len();
135 | let comment_line: String = format!(
136 | "{} {}",
137 | indent_comment_pool[indent_size].clone(),
138 | symbol_name_with_context
139 | );
140 |
141 | if latest_comment_line != comment_line {
142 | writer_queue.push_back(comment_line);
143 | }
144 | if !pending_queue.is_empty() {
145 | while !pending_queue.is_empty() {
146 | if let Some(queued_line) = pending_queue.pop_front() {
147 | writer_queue.push_back(queued_line.to_owned());
148 | }
149 | }
150 | }
151 | writer_queue.push_back(line.to_owned());
152 | pop_node = true;
153 | }
154 |
155 | if !indentation_context.is_empty() {
156 | if let Some((current_context, _)) = indentation_context.back() {
157 | if cursor_position.row >= current_context.end_position().row {
158 | indentation_context.pop_back();
159 | }
160 | }
161 | }
162 |
163 | if nested_traversable_symbols.contains(&node_type) {
164 | let (_, from, to) = current_node.identifier_range();
165 |
166 | let symbol: String;
167 | if from == 0 && to == 0 {
168 | symbol = "anonymous".to_string();
169 | } else {
170 | symbol = line[from.to_owned()..to.to_owned()].to_string();
171 | }
172 |
173 | indentation_context.push_back((*current_node, symbol));
174 | pop_node = true;
175 | }
176 |
177 | if cursor_position == current_node.end_position() {
178 | pop_node = true;
179 | }
180 |
181 | if pop_node {
182 | nodes_queue.pop_front();
183 | }
184 | }
185 | _ => {
186 | if !indentation_context.is_empty() {
187 | if let Some((current_context, _)) = indentation_context.back() {
188 | if cursor_position.row >= current_context.end_position().row {
189 | indentation_context.pop_back();
190 | }
191 | }
192 | }
193 |
194 | if line == latest_comment_line && latest_comment_line_index == line_idx - 1 {
195 | continue;
196 | }
197 |
198 | let indentation_level = indentation_context.len();
199 | if line.starts_with(&indent_comment_pool[indentation_level]) {
200 | latest_comment_line = line;
201 | latest_comment_line_index = line_idx;
202 | }
203 | writer_queue.push_back(line.to_owned());
204 | }
205 | }
206 | }
207 |
208 | writer_queue.to_owned()
209 | }
210 |
211 | /// This methods collects treesitter nodes with BFS
212 | ///
213 | /// All of tree sitter nodes are ordered by non decreasing order
214 | fn get_scannable_nodes(&self, tree: &'tree Tree) -> Vec<(Node<'tree>, (usize, usize, usize))> {
215 | let mut deq: VecDeque> = VecDeque::new();
216 | let scannable_node_types = self.language.scannable_node_types();
217 | let nested_traversable_symbols = self.language.nested_traversable_symbols();
218 | let mut result = Vec::new();
219 | deq.push_back(tree.root_node());
220 |
221 | while !deq.is_empty() {
222 | if let Some(node) = deq.pop_front() {
223 | let node_type = node.kind();
224 |
225 | if scannable_node_types.contains(&node_type) {
226 | let identifier_range = node.identifier_range();
227 | result.push((node.to_owned(), identifier_range));
228 | }
229 |
230 | if !nested_traversable_symbols.contains(&node_type)
231 | && node_type != self.language.top_level_node_type()
232 | {
233 | continue;
234 | }
235 | deq = self.enqueue_child_nodes(deq, &node);
236 | }
237 | }
238 |
239 | result.sort_by(|(u, _), (v, _)| u.start_position().row.cmp(&v.start_position().row));
240 |
241 | result.to_owned()
242 | }
243 |
244 | fn enqueue_child_nodes(
245 | &self,
246 | mut deq: VecDeque>,
247 | node: &Node<'tree>,
248 | ) -> VecDeque> {
249 | let mut cursor = node.walk();
250 | let scannable_node_types = self.language.scannable_node_types();
251 | let node_type = node.kind();
252 |
253 | if self.language == Language::Ruby {
254 | if node_type == self.language.top_level_node_type() {
255 | for child_node in node.children(&mut cursor) {
256 | if scannable_node_types.contains(&child_node.kind()) {
257 | deq.push_back(child_node);
258 | }
259 | }
260 | return deq;
261 | }
262 | } else {
263 | for child_node in node.children(&mut cursor) {
264 | if scannable_node_types.contains(&child_node.kind()) {
265 | deq.push_back(child_node);
266 | }
267 | }
268 | }
269 |
270 | cursor.reset(*node);
271 |
272 | if let Some(body) = node.child_by_field_name("body") {
273 | let mut body_cursor = body.walk();
274 | for child_node in body.children(&mut body_cursor) {
275 | if scannable_node_types.contains(&child_node.kind()) {
276 | deq.push_back(child_node);
277 | }
278 | }
279 | }
280 |
281 | deq
282 | }
283 | }
284 |
--------------------------------------------------------------------------------
/src/commands/boyer_moore.rs:
--------------------------------------------------------------------------------
1 | // ref: https://www.sspilsbury.com/2017-09-23-explaining-boyer-moore/
2 | // ref: https://github.com/peterjoel/needle/blob/master/src/skip_search.rs
3 |
4 | pub struct BoyerMooreSearch<'a, T> {
5 | pattern: &'a [T],
6 | bad_character_table: [usize; 256],
7 | good_suffixes_table: Vec,
8 | }
9 |
10 | impl<'a, T> BoyerMooreSearch<'a, T>
11 | where
12 | T: Copy + PartialEq + Into,
13 | {
14 | /// Create new Boyer-Moore Search object with given pattern.
15 | ///
16 | /// ### Example
17 | ///
18 | /// Basic usage:
19 | ///
20 | /// If you want to search a pattern ("abc" in this case) in a text,
21 | /// you can simply put it in the function as an argument.
22 | ///
23 | /// ```
24 | /// use balpan::commands::boyer_moore::BoyerMooreSearch;
25 | ///
26 | /// let searcher = BoyerMooreSearch::new(b"abc");
27 | /// ```
28 | pub fn new(pattern: &'a [T]) -> BoyerMooreSearch {
29 | Self {
30 | pattern,
31 | bad_character_table: build_bad_chars_table(pattern),
32 | good_suffixes_table: build_suffixes_table(pattern),
33 | }
34 | }
35 | }
36 |
37 | /// `SearchIn` trait is define the interface which can iterate over the pattern in the text.
38 | pub trait SearchIn<'a, H: ?Sized> {
39 | type Iter: Iterator- ;
40 |
41 | fn find_in(&'a self, text: &'a H) -> Self::Iter;
42 | fn find_overlapping_in(&'a self, text: &'a H) -> Self::Iter;
43 | /// Find the first occurrence of the pattern within the given text.
44 | fn find_first_position(&'a self, text: &'a H) -> Option {
45 | self.find_in(text).next()
46 | }
47 | }
48 |
49 | impl<'a, T> SearchIn<'a, [T]> for BoyerMooreSearch<'a, T>
50 | where
51 | T: Copy + PartialEq + Into,
52 | {
53 | type Iter = BoyerMooreIter<'a, T>;
54 | /// Find all occurrences of the pattern within the given text,
55 | /// but only consider non-overlapping cases.
56 | ///
57 | /// `find_in` skips over the length of the pattern each time
58 | /// a match is found, so that overlapping occurrences are ignored.
59 | ///
60 | /// ### How it works:
61 | ///
62 | /// 1. Initialize the search at the beginning of the text.
63 | /// 2. Compare the pattern with the text at the current position.
64 | /// 3. If a match is found, yield the current position and skip forward by the parent's length (to ensure no overlaps).
65 | /// 4. If no match is found, apply the Boyer-Moore skipping rules (bad character and good suffix rules)
66 | /// to jump ahead and continue to search.
67 | /// 5. Repeat steps 2-4 until the end of the text is reached.
68 | ///
69 | /// ### Example
70 | ///
71 | /// Basic usage:
72 | ///
73 | /// ``` ignore
74 | /// use balpan::commands::boyer_moore::{BoyerMooreSearch, SearchIn};
75 | ///
76 | /// let searcher = BoyerMooreSearch::new(b"aba");
77 | /// let text = b"ababa";
78 | ///
79 | /// let result: Vec = searcher.find_in(text).collect();
80 | ///
81 | /// assert_eq!(vec![0], result);
82 | /// ```
83 | fn find_in(&'a self, text: &'a [T]) -> Self::Iter {
84 | BoyerMooreIter {
85 | searcher: self,
86 | text,
87 | pos: 0,
88 | overlap_match: false,
89 | }
90 | }
91 | /// Find all the overlapping occurrences of the pattern within given text, including the overlapping matches.
92 | /// Unlike the `find_in` method, which ony considers non-overlapping cases.
93 | /// by considering each position in the text as a starting point for the pattern.
94 | ///
95 | /// ### How it works:
96 | ///
97 | /// 1. Initialize the search at the beginning of the text.
98 | /// 2. Compare the pattern with the text at the current position.
99 | /// 3. If a match is found, yield the current position and move only one position forward
100 | /// (instead of skipping by the parent's length).
101 | /// 4. If no match is found, apply the Boyer-Moore skipping rules (bad character and good suffix rules)
102 | /// to jump ahead and continue to search.
103 | /// 5. Repeat steps 2-4 until the end of the text is reached.
104 | ///
105 | /// ### Example
106 | ///
107 | /// ``` ignore
108 | /// use balpan::commands::boyer_moore::{BoyerMooreSearch, SearchIn};
109 | ///
110 | /// let searcher = BoyerMooreSearch::new(b"aaba");
111 | /// let text = b"aabaabaaba";
112 | ///
113 | /// let result: Vec = searcher.find_overlapping_in(text).collect();
114 | ///
115 | /// assert_eq!(vec![0, 3, 6], result);
116 | /// ```
117 | ///
118 | /// The `find_overlapping_in` method would find matches at some positions,
119 | /// which means that the pattern "aaba" occurs at positions 0, 3, and 6 in the text.
120 | fn find_overlapping_in(&'a self, text: &'a [T]) -> Self::Iter {
121 | BoyerMooreIter {
122 | searcher: self,
123 | text,
124 | pos: 0,
125 | overlap_match: true,
126 | }
127 | }
128 | }
129 |
130 | pub struct BoyerMooreIter<'a, T> {
131 | searcher: &'a BoyerMooreSearch<'a, T>,
132 | text: &'a [T],
133 | pos: usize,
134 | overlap_match: bool,
135 | }
136 |
137 | impl<'a, T> Iterator for BoyerMooreIter<'a, T>
138 | where
139 | T: Copy + PartialEq + Into,
140 | {
141 | type Item = usize;
142 |
143 | fn next(&mut self) -> Option {
144 | find_from_position(&self.searcher, self.text, self.pos).map(|pos| {
145 | match self.overlap_match {
146 | true => self.pos = pos + 1,
147 | false => self.pos = pos + self.searcher.pattern.len(),
148 | }
149 |
150 | pos
151 | })
152 | }
153 | }
154 | /// `find_pending_character_index` method is looking for the occurrence of a specific character (pattern)
155 | /// in a given slice of characters (`chars`).
156 | ///
157 | /// If the character is found, the function returns the index of the found character - start index (`start`),
158 | /// effectively returning the relative position of the found character within the slice starting from the start index (`start`).
159 | ///
160 | /// If the character is not found, simply return 0.
161 | ///
162 | /// ### How it works:
163 | /// 1. Iterate through the slice of characters starting from the index 'start + 1'.
164 | /// 2. Compare each character with the given pattern.
165 | /// 3. If a match is found, return the relative position (i.e., the current index minus the start index).
166 | /// 4. If no match is found, return 0.
167 | ///
168 | /// ### Example
169 | ///
170 | /// - chars: \['A', 'B', 'C', 'B', 'D'\]
171 | /// - start: 1
172 | /// - pattern: 'B'
173 | ///
174 | /// Step 1: Start searching from index `start + 1` (i.e., 2):
175 | ///
176 | /// chars A B C B D
177 | /// index 0 1 2 3 4
178 | /// start ^
179 | ///
180 | /// Step 2: Compare each character with the given pattern 'B':
181 | ///
182 | /// chars A B C B D
183 | /// index 0 1 2 3 4
184 | /// start ^ ^ ^ ^
185 | /// pattern B B B
186 | ///
187 | /// Step 3: Pattern 'B' found at index 3, relative position is 3 - 1 = 2.
188 | ///
189 | /// chars A B C B D
190 | /// index 0 1 2 3 4
191 | /// start ^ ^
192 | /// pattern B B
193 | /// Result 2
194 | ///
195 | /// ### Example
196 | ///
197 | /// ```ignore
198 | /// use balpan::commands::boyer_moore::find_pending_character_index;
199 | ///
200 | /// let chars = vec!['A', 'B', 'C', 'B', 'D'];
201 | /// let start = 1;
202 | /// let pattern = &'B';
203 | ///
204 | /// let result = find_pending_character_index(&chars, start, pattern);
205 | ///
206 | /// assert_eq!(2, result);
207 | /// ```
208 | pub fn find_pending_character_index(chars: &[char], start: usize, pattern: &char) -> usize {
209 | for (i, item) in chars.iter().enumerate().skip(start + 1) {
210 | if item == pattern {
211 | return i - start;
212 | }
213 | }
214 |
215 | 0
216 | }
217 | /// `build_bad_chars_table` method is building a table of bad characters, which is key part of the Boyer-Moore algorithm.
218 | ///
219 | /// ### Description
220 | ///
221 | /// This method pre-computes a table that allows the algorithm to skip sections of the text to be searched,
222 | /// resulting in a lower number of overall character comparisons.
223 | ///
224 | /// In other words, this method creates a table that helps the main search function
225 | /// know how far to jump when a mismatch is found.
226 | ///
227 | /// The table's size is usually 256 bytes, to cover all possible ASCII characters.
228 | ///
229 | /// ### How it works:
230 | ///
231 | /// For example, let's assume a pattern "GATC":
232 | ///
233 | /// - pattern: "GATC"
234 | /// - length: 4
235 | ///
236 | /// Step 1: Initialize the table with the length of the pattern:
237 | ///
238 | /// table A B C D E F G ... T U V W X Y Z
239 | /// value 4 4 4 4 4 4 4 ... 4 4 4 4 4 4 4
240 | ///
241 | /// Step 2: Iterate through the pattern and update the table except the last character:
242 | ///
243 | /// 'G' is at index 0, distance to end - 1 = 4 - 0 - 1 = 3
244 | /// 'A' is at index 1, distance to end - 1 = 4 - 1 - 1 = 2
245 | /// 'T' is at index 2, distance to end - 1 = 4 - 2 - 1 = 1
246 | /// 'C' is the last character, skip
247 | ///
248 | /// Step 3: Update the table with the calculated distances:
249 | ///
250 | /// table A B C D E F G ... T U V W X Y Z
251 | /// value 2 4 4 4 4 4 3 ... 1 4 4 4 4 4 4
252 | ///
253 | /// ### Conclusion
254 | ///
255 | /// This table is used in the search process, allowing the BM search to skip over portions of
256 | /// the text that do not contain possible matches, thereby reducing the number of comparisons.
257 | pub fn build_bad_chars_table(needle: &[T]) -> [usize; 256]
258 | where
259 | T: Into + Copy,
260 | {
261 | let mut table = [needle.len(); 256];
262 | for i in 0..needle.len() - 1 {
263 | let c: usize = needle[i].into();
264 | table[c] = needle.len() - i - 1;
265 | }
266 |
267 | table
268 | }
269 | /// `get_suffix_table` method computes the suffix table.
270 | /// This table helps in defining how much to jump in case of a mismatch after some matches.
271 | ///
272 | /// ### Description
273 | ///
274 | /// This method computes a table where the entry at index `i` represents the length of
275 | /// the largest suffix of the pattern ending at position `i` that is also a prefix of the pattern.
276 | ///
277 | /// ### How it works:
278 | ///
279 | /// Assume a pattern is "ABAB":
280 | ///
281 | /// Step 1: Initialize the suffixes table with 0
282 | ///
283 | /// table A B A B
284 | /// suffixes 0 0 0 0
285 | ///
286 | /// Step 2: Start with suffix length 1. and check for each suffix it's a prefix of the pattern.
287 | ///
288 | /// For suffix length `1`: `B` is not a prefix, continue
289 | ///
290 | /// For suffix length `2`: `AB` is a prefix, update the entry
291 | ///
292 | /// table A B A B
293 | /// suffixes 0 0 2 0
294 | ///
295 | /// For suffix length `3`: `BAB` is not a prefix, continue
296 | ///
297 | /// ### Conclusion
298 | ///
299 | /// This table used to create the good suffix shift table, which tells the how far to go
300 | /// in case of a mismatch. By understanding the structure of the pattern itself, the BM
301 | /// can skip ahead more efficiently, by reduce the number of comparisons.
302 | pub fn get_suffix_table(pattern: &[T]) -> Vec {
303 | let len = pattern.len();
304 | let mut suffixes = vec![0; len];
305 | for suffix_len in 1..pattern.len() {
306 | let mut found_suffix = false;
307 | for i in (0..len - suffix_len).rev() {
308 | // either 0 or a previous match for a 1-smaller suffix
309 | if suffixes[i + suffix_len - 1] == suffix_len - 1
310 | && pattern[i] == pattern[len - suffix_len]
311 | {
312 | suffixes[i + suffix_len - 1] = suffix_len;
313 | found_suffix = true;
314 | }
315 | }
316 |
317 | if !found_suffix {
318 | break;
319 | }
320 | }
321 |
322 | suffixes
323 | }
324 | /// Builds the "good suffix table,"
325 | /// which is an essential part of the Boyer-Moore algorithm's optimization.
326 | ///
327 | /// It's used to determine how far to jump along the text when a mismatch occurs
328 | /// in the pattern after some matches.
329 | ///
330 | /// ### Description
331 | ///
332 | /// This method takes the suffix table computed by `get_suffix_table`
333 | /// and builds a table that directly tells the algorithm how far to jump
334 | /// in case of a mismatch at a given position.
335 | ///
336 | /// ### How it works:
337 | ///
338 | /// 1. Initializes a table with the pattern's length minus one at all positions.
339 | /// 2. Updates the table using the suffixes from get_suffix_table,
340 | /// making sure that the jumps are optimized according to the pattern's internal structure.
341 | /// 3. Specifically sets the last element of the table to 1,
342 | /// as the jump should always be at least one character.
343 | ///
344 | /// Using the pattern "ABAB" and assuming the suffix table as `[0, 0, 2, 0]`:
345 | ///
346 | /// Step 1: Initialize the table with the length of the needle minus one (3)
347 | ///
348 | /// pattern: A B A B
349 | /// needle: 3 3 3 3
350 | ///
351 | /// Step 2: Iterate through the suffixes table and update the entries
352 | ///
353 | /// - suffix length 2 at index 2, skip 2 positions
354 | /// A B A B
355 | /// 3 3 2 3
356 | ///
357 | /// Step 3: Set the last entry to 1
358 | ///
359 | /// A B A B
360 | /// 3 3 2 1
361 | pub fn build_suffixes_table(pattern: &[T]) -> Vec {
362 | let suffixes = get_suffix_table(pattern);
363 | let len = pattern.len();
364 | let mut table = vec![len - 1; len];
365 |
366 | for (i, suffix_len) in suffixes.into_iter().enumerate() {
367 | let needle_index = len - suffix_len - 1;
368 | let skip = len - i - 1;
369 | if table[needle_index] > skip {
370 | table[needle_index] = skip;
371 | }
372 | }
373 |
374 | table[len - 1] = 1;
375 | table
376 | }
377 |
378 | pub trait SkipSearch {
379 | fn skip_offset(&self, bad_char: T, pattern_pos: usize, text: &[T], text_pos: usize) -> usize;
380 | fn len(&self) -> usize;
381 | fn at(&self, index: usize) -> T;
382 | fn is_empty(&self) -> bool {
383 | self.len() == 0
384 | }
385 | }
386 |
387 | pub fn find_from_position<'a, T, U>(
388 | pattern: &'a U,
389 | text: &'a [T],
390 | mut position: usize,
391 | ) -> Option
392 | where
393 | T: PartialEq + Copy + Into,
394 | U: SkipSearch,
395 | {
396 | if pattern.len() > text.len() {
397 | return None;
398 | }
399 |
400 | let max_position = text.len() - pattern.len();
401 | while position <= max_position {
402 | let mut pattern_pos = pattern.len() - 1;
403 |
404 | while text[position + pattern_pos] == pattern.at(pattern_pos) {
405 | if pattern_pos == 0 {
406 | return Some(position);
407 | }
408 |
409 | pattern_pos -= 1;
410 | }
411 |
412 | let bad_char = text[position + pattern.len() - 1];
413 | position += pattern.skip_offset(bad_char, pattern_pos, text, position);
414 | }
415 |
416 | None
417 | }
418 |
419 | impl<'a, T> SkipSearch for &'a BoyerMooreSearch<'a, T>
420 | where
421 | T: Copy + Into,
422 | {
423 | fn skip_offset(&self, bad_char: T, pattern_pos: usize, _text: &[T], _text_pos: usize) -> usize {
424 | let bad_char_shift = self.bad_character_table[bad_char.into()];
425 | let good_suffix_shift = self.good_suffixes_table[pattern_pos];
426 |
427 | std::cmp::max(bad_char_shift, good_suffix_shift)
428 | }
429 |
430 | fn len(&self) -> usize {
431 | self.pattern.len()
432 | }
433 |
434 | fn at(&self, pos: usize) -> T {
435 | self.pattern[pos]
436 | }
437 | }
438 |
--------------------------------------------------------------------------------
/src/commands/grep.rs:
--------------------------------------------------------------------------------
1 | use std::path::PathBuf;
2 | use std::{io, path::Path};
3 |
4 | use regex::Regex;
5 | use tokio::fs::File;
6 | use tokio::io::{AsyncBufReadExt, BufReader};
7 |
8 | use serde::{Deserialize, Serialize};
9 |
10 | use crate::utils::suggest_subcommand;
11 |
12 | use super::pattern_search::PatternTree;
13 |
14 | #[derive(Debug, Serialize, Deserialize, Default)]
15 | pub struct GrepReport {
16 | pub directories: Vec,
17 | }
18 |
19 | #[derive(Debug, Serialize, Deserialize)]
20 | pub struct Directory {
21 | name: String,
22 | files: Vec,
23 | }
24 |
25 | #[derive(Debug, Serialize, Deserialize)]
26 | pub struct GrepFile {
27 | pub name: String,
28 | pub items: Vec,
29 | }
30 |
31 | #[derive(Debug, Serialize, Deserialize)]
32 | pub struct GrepLine {
33 | line: usize,
34 | content: String,
35 | position: Vec,
36 | }
37 |
38 | impl GrepReport {
39 | pub fn new() -> Self {
40 | Default::default()
41 | }
42 |
43 | fn process_line(
44 | &mut self,
45 | line: String,
46 | index: usize,
47 | path: &Path,
48 | pattern_tree: &mut PatternTree,
49 | patterns: &Vec,
50 | ) {
51 | let (found, positions) = pattern_tree.selective_search(patterns, &line);
52 |
53 | if found {
54 | // search file in list of files
55 | let dir_name = path.parent().unwrap().display().to_string();
56 | let file_name = path.display().to_string();
57 |
58 | let dir_index = self.directories.iter().position(|d| d.name == dir_name);
59 |
60 | if dir_index.is_none() {
61 | self.directories.push(Directory {
62 | name: dir_name.clone(),
63 | files: Vec::new(),
64 | });
65 | }
66 |
67 | let dir = self
68 | .directories
69 | .iter_mut()
70 | .find(|d| d.name == dir_name)
71 | .unwrap();
72 |
73 | let file_index = dir.files.iter().position(|f| f.name == file_name);
74 |
75 | if file_index.is_none() {
76 | dir.files.push(GrepFile {
77 | name: file_name.clone(),
78 | items: Vec::new(),
79 | });
80 | }
81 |
82 | let file = dir.files.iter_mut().find(|f| f.name == file_name).unwrap();
83 |
84 | let line = GrepLine {
85 | line: index + 1,
86 | content: line,
87 | position: positions,
88 | };
89 | file.items.push(line);
90 | }
91 | }
92 |
93 | pub async fn grep_file(
94 | &mut self,
95 | path: &Path,
96 | pattern_tree: &mut PatternTree,
97 | patterns: &Vec,
98 | ) -> io::Result<()> {
99 | let file = File::open(path).await?;
100 | let mut reader = BufReader::new(file);
101 |
102 | let mut line_bytes = Vec::new();
103 | let mut i = 0;
104 |
105 | while reader.read_until(b'\n', &mut line_bytes).await? > 0 {
106 | let line = String::from_utf8_lossy(&line_bytes).to_string();
107 | self.process_line(line, i, path, pattern_tree, patterns);
108 | line_bytes.clear();
109 | i += 1;
110 | }
111 |
112 | Ok(())
113 | }
114 |
115 | // TODO
116 | pub fn format_tree(&self, ident_size: usize) -> String {
117 | let mut result = String::new();
118 |
119 | for directory in &self.directories {
120 | result.push_str(&format!("{}\n", directory.name));
121 |
122 | for file in &directory.files {
123 | for item in &file.items {
124 | let file_relative_path =
125 | GrepReport::display_relative_path(&directory.name, &file.name);
126 |
127 | result.push_str(&format!(
128 | "{:ident$}{}:{}:{} - {}\n",
129 | "",
130 | file_relative_path,
131 | item.line,
132 | item.position[0],
133 | item.content.trim_start(),
134 | ident = ident_size,
135 | ));
136 | }
137 | }
138 | }
139 |
140 | result
141 | }
142 |
143 | fn format_plain(
144 | &self,
145 | hide_path: bool,
146 | list_of_files: bool,
147 | count: bool,
148 | patterns_to_search: Vec,
149 | colorize: bool,
150 | ) -> String {
151 | let mut result = String::new();
152 | let mut counter: usize = 0;
153 |
154 | if !count {
155 | for dir in &self.directories {
156 | let path = Path::new(&dir.name);
157 |
158 | if !hide_path {
159 | dir_path_pretty(path, &mut result);
160 | }
161 |
162 | for file in &dir.files {
163 | if !hide_path {
164 | let file_name = Path::new(&file.name);
165 | let last_two = last_two(file_name);
166 | result.push_str(&format!("{}\n", last_two[0]));
167 | }
168 |
169 | if !list_of_files {
170 | for item in &file.items {
171 | if colorize {
172 | // `(?i)` is for case insensitive search
173 | let pattern =
174 | Regex::new(&format!(r"(?i){}", patterns_to_search.join(" ")))
175 | .unwrap();
176 | let text = &item.content;
177 |
178 | let colored_text = pattern
179 | .replace_all(text, |caps: ®ex::Captures| {
180 | format!("\x1b[31m{}\x1b[0m", &caps[0])
181 | });
182 |
183 | result.push_str(&format!(
184 | "{} {}",
185 | item.line,
186 | colored_text.trim_start()
187 | ));
188 | } else {
189 | result.push_str(&format!(
190 | "{} {}",
191 | item.line,
192 | item.content.trim_start()
193 | ));
194 | }
195 | counter += 1;
196 | }
197 | result.push('\n');
198 | } else {
199 | counter += file.items.len();
200 | }
201 | }
202 | }
203 | result.push_str(&format!("\nTotal {} lines found\n", counter));
204 | } else {
205 | counter = self
206 | .directories
207 | .iter()
208 | .map(|dir| dir.files.iter().map(|file| file.items.len()).sum::())
209 | .sum();
210 | result = format!("Total {} lines found\n", counter);
211 | }
212 |
213 | result
214 | }
215 |
216 | #[allow(clippy::too_many_arguments)]
217 | pub fn report_formatting(
218 | &mut self,
219 | format: Option,
220 | hide_path: bool,
221 | list_of_files: bool,
222 | count: bool,
223 | patterns_to_search: Vec,
224 | colorize: bool,
225 | ) -> String {
226 | let default = "plain".to_string();
227 | let format = format.unwrap_or(default);
228 |
229 | match format.as_str() {
230 | "json" => serde_json::to_string_pretty(self).unwrap(),
231 | "plain" => self.format_plain(
232 | hide_path,
233 | list_of_files,
234 | count,
235 | patterns_to_search,
236 | colorize,
237 | ),
238 | // "tree" => self.format_tree(4),
239 | _ => {
240 | let suggest = suggest_subcommand(&format).unwrap();
241 | format!("Unknown format: '{}'. Did you mean '{}'?", format, suggest)
242 | }
243 | }
244 | }
245 |
246 | fn display_relative_path(directory: &str, file_name: &str) -> String {
247 | let base_path = Path::new(directory);
248 | let path = Path::new(file_name);
249 |
250 | let relative_path = path.strip_prefix(base_path).unwrap();
251 | let mut display_path = PathBuf::new();
252 |
253 | for _ in 1..base_path.components().count() - 2 {
254 | display_path.push("..");
255 | }
256 |
257 | display_path.push(relative_path);
258 |
259 | display_path.display().to_string()
260 | }
261 | }
262 |
263 | fn last_two(path: &Path) -> Vec<&str> {
264 | path.iter()
265 | .rev()
266 | .take(2)
267 | .map(|s| s.to_str().unwrap())
268 | .collect()
269 | }
270 |
271 | fn dir_path_pretty(path: &Path, result: &mut String) {
272 | let last_two: Vec<&str> = last_two(path);
273 |
274 | if last_two.len() == 2 {
275 | result.push_str(&format!("{}/{}\n", last_two[1], last_two[0]));
276 | }
277 |
278 | result.push_str(&format!("{}\n", last_two[0]));
279 | }
280 |
--------------------------------------------------------------------------------
/src/commands/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod boyer_moore;
2 | pub mod grep;
3 | pub mod pattern_search;
4 |
--------------------------------------------------------------------------------
/src/commands/pattern_search.rs:
--------------------------------------------------------------------------------
1 | use crate::commands::boyer_moore::{BoyerMooreSearch, SearchIn};
2 | use aho_corasick::AhoCorasick;
3 | use regex::Regex;
4 |
5 | #[derive(Debug, Clone)]
6 | pub struct PatternTree {
7 | pub ignore_case: bool,
8 | pub regex_flag: bool,
9 | }
10 |
11 | type PatternPosition = (bool, Vec);
12 |
13 | #[allow(clippy::new_without_default)]
14 | impl PatternTree {
15 | pub fn new() -> Self {
16 | PatternTree {
17 | ignore_case: false,
18 | regex_flag: false,
19 | }
20 | }
21 |
22 | /// Call all search methods based on the given patterns
23 | ///
24 | /// If the pattern is single, then call `boyer_moore_search` method.
25 | /// Because BM algorithm is known as the fastest algorithm for single pattern search.
26 | ///
27 | /// Whereas, if the pattern is multiple, then call `aho_corasick_search` method.
28 | /// AC is known as the fastest algorithm for multiple pattern search.
29 | pub fn selective_search(&self, patterns: &Vec, text: &str) -> PatternPosition {
30 | if self.regex_flag {
31 | return self.regex(text, &patterns[0]);
32 | }
33 |
34 | match patterns.len() {
35 | 0 => (false, vec![]),
36 | 1 => match self.ignore_case {
37 | true => self.boyer_moore_search(&text.to_lowercase(), &patterns[0].to_lowercase()),
38 | false => self.boyer_moore_search(text, &patterns[0]),
39 | },
40 | _ => {
41 | if self.ignore_case {
42 | let mut lower_patterns: Vec = Vec::new();
43 | patterns
44 | .iter()
45 | .for_each(|pattern| lower_patterns.push(pattern.to_lowercase()));
46 | self.aho_corasick_search(&text.to_lowercase(), &lower_patterns)
47 | } else {
48 | self.aho_corasick_search(text, patterns)
49 | }
50 | }
51 | }
52 | }
53 |
54 | pub fn aho_corasick_search(&self, text: &str, patterns: &Vec) -> PatternPosition {
55 | let ac = AhoCorasick::new(patterns).unwrap();
56 | let mut result: Vec = Vec::new();
57 |
58 | for matched in ac.find_iter(text) {
59 | result.push(matched.start());
60 | }
61 |
62 | (!result.is_empty(), result)
63 | }
64 |
65 | pub fn boyer_moore_search(&self, text: &str, pattern: &String) -> PatternPosition {
66 | let searcher = BoyerMooreSearch::new(pattern.as_bytes());
67 | let result: Vec = searcher.find_in(text.as_bytes()).collect();
68 |
69 | (!result.is_empty(), result)
70 | }
71 |
72 | pub fn regex(&self, text: &str, pattern: &String) -> PatternPosition {
73 | let re = match self.ignore_case {
74 | true => Regex::new(&format!(r"(?i){}", pattern)).unwrap(),
75 | false => Regex::new(pattern).unwrap(),
76 | };
77 |
78 | let mut result: Vec = Vec::new();
79 |
80 | for matched in re.find_iter(text) {
81 | result.push(matched.start());
82 | }
83 |
84 | (!result.is_empty(), result)
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/config.rs:
--------------------------------------------------------------------------------
1 | // This is referred from the helix codebase:
2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/config.rs
3 | use std::str::from_utf8;
4 |
5 | /// Default built-in languages.toml.
6 | pub fn default_lang_config() -> toml::Value {
7 | let default_config = include_bytes!("../languages.toml");
8 | toml::from_str(from_utf8(default_config).unwrap())
9 | .expect("Could not parse built-in languages.toml to valid toml")
10 | }
11 |
12 | /// User configured languages.toml file, merged with the default config.
13 | pub fn user_lang_config() -> Result {
14 | let config = [
15 | crate::config_dir(),
16 | crate::find_workspace().0.join(".balpan"),
17 | ]
18 | .into_iter()
19 | .map(|path| path.join("languages.toml"))
20 | .filter_map(|file| {
21 | std::fs::read_to_string(file)
22 | .map(|config| toml::from_str(&config))
23 | .ok()
24 | })
25 | .collect::, _>>()?
26 | .into_iter()
27 | .fold(default_lang_config(), |a, b| {
28 | // combines for example
29 | // b:
30 | // [[language]]
31 | // name = "toml"
32 | // language-server = { command = "taplo", args = ["lsp", "stdio"] }
33 | //
34 | // a:
35 | // [[language]]
36 | // language-server = { command = "/usr/bin/taplo" }
37 | //
38 | // into:
39 | // [[language]]
40 | // name = "toml"
41 | // language-server = { command = "/usr/bin/taplo" }
42 | //
43 | // thus it overrides the third depth-level of b with values of a if they exist, but otherwise merges their values
44 | crate::merge_toml_values(a, b, 3)
45 | });
46 |
47 | Ok(config)
48 | }
49 |
--------------------------------------------------------------------------------
/src/grammar.rs:
--------------------------------------------------------------------------------
1 | // This is referred from the helix codebase:
2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/grammar.rs
3 | use anyhow::{anyhow, bail, Context, Result};
4 | use serde::{Deserialize, Serialize};
5 | use std::time::SystemTime;
6 | use std::{
7 | collections::HashSet,
8 | path::{Path, PathBuf},
9 | process::Command,
10 | sync::mpsc::channel,
11 | };
12 | use std::{fs, thread};
13 | use tempfile::TempPath;
14 | use tree_sitter::Language;
15 |
16 | #[cfg(unix)]
17 | const DYLIB_EXTENSION: &str = "so";
18 |
19 | #[cfg(windows)]
20 | const DYLIB_EXTENSION: &str = "dll";
21 |
22 | #[cfg(target_arch = "wasm32")]
23 | const DYLIB_EXTENSION: &str = "wasm";
24 |
25 | #[derive(Debug, Serialize, Deserialize)]
26 | struct Configuration {
27 | #[serde(rename = "use-grammars")]
28 | pub grammar_selection: Option,
29 | pub grammar: Vec,
30 | }
31 |
32 | #[derive(Debug, Serialize, Deserialize)]
33 | #[serde(rename_all = "lowercase", untagged)]
34 | pub enum GrammarSelection {
35 | Only { only: HashSet },
36 | Except { except: HashSet },
37 | }
38 |
39 | #[derive(Debug, Serialize, Deserialize)]
40 | #[serde(deny_unknown_fields)]
41 | pub struct GrammarConfiguration {
42 | #[serde(rename = "name")]
43 | pub grammar_id: String,
44 | pub source: GrammarSource,
45 | }
46 |
47 | #[derive(Debug, Serialize, Deserialize)]
48 | #[serde(rename_all = "lowercase", untagged)]
49 | pub enum GrammarSource {
50 | Local {
51 | path: String,
52 | },
53 | Git {
54 | #[serde(rename = "git")]
55 | remote: String,
56 | #[serde(rename = "rev")]
57 | revision: String,
58 | subpath: Option,
59 | },
60 | }
61 |
62 | const BUILD_TARGET: &str = "x86_64-unknown-linux-gnu"; // env!("BUILD_TARGET");
63 | const REMOTE_NAME: &str = "origin";
64 |
65 | #[cfg(target_arch = "wasm32")]
66 | pub fn get_language(name: &str) -> Result {
67 | unimplemented!()
68 | }
69 |
70 | #[cfg(not(target_arch = "wasm32"))]
71 | pub fn get_language(name: &str) -> Result {
72 | use libloading::{Library, Symbol};
73 | let mut rel_library_path = PathBuf::new().join("grammars").join(name);
74 | rel_library_path.set_extension(DYLIB_EXTENSION);
75 | let library_path = crate::runtime_file(&rel_library_path);
76 |
77 | let library = unsafe { Library::new(&library_path) }
78 | .with_context(|| format!("Error opening dynamic library {:?}", library_path))?;
79 | let language_fn_name = format!("tree_sitter_{}", name.replace('-', "_"));
80 | let language = unsafe {
81 | let language_fn: Symbol Language> = library
82 | .get(language_fn_name.as_bytes())
83 | .with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
84 | language_fn()
85 | };
86 | std::mem::forget(library);
87 | Ok(language)
88 | }
89 |
90 | pub fn fetch_grammars() -> Result<()> {
91 | // We do not need to fetch local grammars.
92 | let mut grammars = get_grammar_configs()?;
93 | grammars.retain(|grammar| !matches!(grammar.source, GrammarSource::Local { .. }));
94 |
95 | println!("Fetching {} grammars", grammars.len());
96 | let results = run_parallel(grammars, fetch_grammar);
97 |
98 | let mut errors = Vec::new();
99 | let mut git_updated = Vec::new();
100 | let mut git_up_to_date = 0;
101 | let mut non_git = Vec::new();
102 |
103 | for (grammar_id, res) in results {
104 | match res {
105 | Ok(FetchStatus::GitUpToDate) => git_up_to_date += 1,
106 | Ok(FetchStatus::GitUpdated { revision }) => git_updated.push((grammar_id, revision)),
107 | Ok(FetchStatus::NonGit) => non_git.push(grammar_id),
108 | Err(e) => errors.push((grammar_id, e)),
109 | }
110 | }
111 |
112 | non_git.sort_unstable();
113 | git_updated.sort_unstable_by(|a, b| a.0.cmp(&b.0));
114 |
115 | if git_up_to_date != 0 {
116 | println!("{} up to date git grammars", git_up_to_date);
117 | }
118 |
119 | if !non_git.is_empty() {
120 | println!("{} non git grammars", non_git.len());
121 | println!("\t{:?}", non_git);
122 | }
123 |
124 | if !git_updated.is_empty() {
125 | println!("{} updated grammars", git_updated.len());
126 | // We checked the vec is not empty, unwrapping will not panic
127 | let longest_id = git_updated.iter().map(|x| x.0.len()).max().unwrap();
128 | for (id, rev) in git_updated {
129 | println!(
130 | "\t{id:width$} now on {rev}",
131 | id = id,
132 | width = longest_id,
133 | rev = rev
134 | );
135 | }
136 | }
137 |
138 | if !errors.is_empty() {
139 | let len = errors.len();
140 | for (i, (grammar, error)) in errors.into_iter().enumerate() {
141 | println!("Failure {}/{len}: {grammar} {error}", i + 1);
142 | }
143 | bail!("{len} grammars failed to fetch");
144 | }
145 |
146 | Ok(())
147 | }
148 |
149 | pub fn build_grammars(target: Option) -> Result<()> {
150 | let grammars = get_grammar_configs()?;
151 | println!("Building {} grammars", grammars.len());
152 | let results = run_parallel(grammars, move |grammar| {
153 | build_grammar(grammar, target.as_deref())
154 | });
155 |
156 | let mut errors = Vec::new();
157 | let mut already_built = 0;
158 | let mut built = Vec::new();
159 |
160 | for (grammar_id, res) in results {
161 | match res {
162 | Ok(BuildStatus::AlreadyBuilt) => already_built += 1,
163 | Ok(BuildStatus::Built) => built.push(grammar_id),
164 | Err(e) => errors.push((grammar_id, e)),
165 | }
166 | }
167 |
168 | built.sort_unstable();
169 |
170 | if already_built != 0 {
171 | println!("{} grammars already built", already_built);
172 | }
173 |
174 | if !built.is_empty() {
175 | println!("{} grammars built now", built.len());
176 | println!("\t{:?}", built);
177 | }
178 |
179 | if !errors.is_empty() {
180 | let len = errors.len();
181 | for (i, (grammar_id, error)) in errors.into_iter().enumerate() {
182 | println!("Failure {}/{len}: {grammar_id} {error}", i + 1);
183 | }
184 | bail!("{len} grammars failed to build");
185 | }
186 |
187 | Ok(())
188 | }
189 |
190 | // Returns the set of grammar configurations the user requests.
191 | // Grammars are configured in the default and user `languages.toml` and are
192 | // merged. The `grammar_selection` key of the config is then used to filter
193 | // down all grammars into a subset of the user's choosing.
194 | fn get_grammar_configs() -> Result> {
195 | let config: Configuration = crate::config::user_lang_config()
196 | .context("Could not parse languages.toml")?
197 | .try_into()?;
198 |
199 | let grammars = match config.grammar_selection {
200 | Some(GrammarSelection::Only { only: selections }) => config
201 | .grammar
202 | .into_iter()
203 | .filter(|grammar| selections.contains(&grammar.grammar_id))
204 | .collect(),
205 | Some(GrammarSelection::Except { except: rejections }) => config
206 | .grammar
207 | .into_iter()
208 | .filter(|grammar| !rejections.contains(&grammar.grammar_id))
209 | .collect(),
210 | None => config.grammar,
211 | };
212 |
213 | Ok(grammars)
214 | }
215 |
216 | fn run_parallel(grammars: Vec, job: F) -> Vec<(String, Result)>
217 | where
218 | F: Fn(GrammarConfiguration) -> Result + Send + 'static + Clone,
219 | Res: Send + 'static,
220 | {
221 | let (tx, rx) = channel();
222 | let mut handles = Vec::new();
223 |
224 | for grammar in grammars {
225 | let tx = tx.to_owned();
226 | let job = job.to_owned();
227 |
228 | let handle = thread::spawn(move || {
229 | let result = (grammar.grammar_id.clone(), job(grammar));
230 | let _ = tx.send(result);
231 | });
232 |
233 | handles.push(handle);
234 | }
235 |
236 | for handle in handles {
237 | let _ = handle.join();
238 | }
239 |
240 | drop(tx); // not necessary, but makes it explicit that we're done with the sender
241 | rx.iter().collect()
242 | }
243 |
244 | enum FetchStatus {
245 | GitUpToDate,
246 | GitUpdated { revision: String },
247 | NonGit,
248 | }
249 |
250 | fn fetch_grammar(grammar: GrammarConfiguration) -> Result {
251 | if let GrammarSource::Git {
252 | remote, revision, ..
253 | } = grammar.source
254 | {
255 | let grammar_dir = crate::runtime_dirs()
256 | .first()
257 | .expect("No runtime directories provided") // guaranteed by post-condition
258 | .join("grammars")
259 | .join("sources")
260 | .join(&grammar.grammar_id);
261 |
262 | fs::create_dir_all(&grammar_dir).context(format!(
263 | "Could not create grammar directory {:?}",
264 | grammar_dir
265 | ))?;
266 |
267 | // create the grammar dir contains a git directory
268 | if !grammar_dir.join(".git").exists() {
269 | git(&grammar_dir, ["init"])?;
270 | }
271 |
272 | // ensure the remote matches the configured remote
273 | if get_remote_url(&grammar_dir).map_or(true, |s| s != remote) {
274 | set_remote(&grammar_dir, &remote)?;
275 | }
276 |
277 | // ensure the revision matches the configured revision
278 | if get_revision(&grammar_dir).map_or(true, |s| s != revision) {
279 | // Fetch the exact revision from the remote.
280 | // Supported by server-side git since v2.5.0 (July 2015),
281 | // enabled by default on major git hosts.
282 | git(
283 | &grammar_dir,
284 | ["fetch", "--depth", "1", REMOTE_NAME, &revision],
285 | )?;
286 | git(&grammar_dir, ["checkout", &revision])?;
287 |
288 | Ok(FetchStatus::GitUpdated { revision })
289 | } else {
290 | Ok(FetchStatus::GitUpToDate)
291 | }
292 | } else {
293 | Ok(FetchStatus::NonGit)
294 | }
295 | }
296 |
297 | // Sets the remote for a repository to the given URL, creating the remote if
298 | // it does not yet exist.
299 | fn set_remote(repository_dir: &Path, remote_url: &str) -> Result {
300 | git(
301 | repository_dir,
302 | ["remote", "set-url", REMOTE_NAME, remote_url],
303 | )
304 | .or_else(|_| git(repository_dir, ["remote", "add", REMOTE_NAME, remote_url]))
305 | }
306 |
307 | fn get_remote_url(repository_dir: &Path) -> Option {
308 | git(repository_dir, ["remote", "get-url", REMOTE_NAME]).ok()
309 | }
310 |
311 | fn get_revision(repository_dir: &Path) -> Option {
312 | git(repository_dir, ["rev-parse", "HEAD"]).ok()
313 | }
314 |
315 | // A wrapper around 'git' commands which returns stdout in success and a
316 | // helpful error message showing the command, stdout, and stderr in error.
317 | fn git(repository_dir: &Path, args: I) -> Result
318 | where
319 | I: IntoIterator
- ,
320 | S: AsRef,
321 | {
322 | let output = Command::new("git")
323 | .args(args)
324 | .current_dir(repository_dir)
325 | .output()?;
326 |
327 | if output.status.success() {
328 | Ok(String::from_utf8_lossy(&output.stdout)
329 | .trim_end()
330 | .to_owned())
331 | } else {
332 | // TODO: figure out how to display the git command using `args`
333 | Err(anyhow!(
334 | "Git command failed.\nStdout: {}\nStderr: {}",
335 | String::from_utf8_lossy(&output.stdout),
336 | String::from_utf8_lossy(&output.stderr),
337 | ))
338 | }
339 | }
340 |
341 | enum BuildStatus {
342 | AlreadyBuilt,
343 | Built,
344 | }
345 |
346 | fn build_grammar(grammar: GrammarConfiguration, target: Option<&str>) -> Result {
347 | let grammar_dir = if let GrammarSource::Local { path } = &grammar.source {
348 | PathBuf::from(&path)
349 | } else {
350 | crate::runtime_dirs()
351 | .first()
352 | .expect("No runtime directories provided") // guaranteed by post-condition
353 | .join("grammars")
354 | .join("sources")
355 | .join(&grammar.grammar_id)
356 | };
357 |
358 | let grammar_dir_entries = grammar_dir.read_dir().with_context(|| {
359 | format!(
360 | "Failed to read directory {:?}. Did you use 'hx --grammar fetch'?",
361 | grammar_dir
362 | )
363 | })?;
364 |
365 | if grammar_dir_entries.count() == 0 {
366 | return Err(anyhow!(
367 | "Directory {:?} is empty. Did you use 'hx --grammar fetch'?",
368 | grammar_dir
369 | ));
370 | };
371 |
372 | let path = match &grammar.source {
373 | GrammarSource::Git {
374 | subpath: Some(subpath),
375 | ..
376 | } => grammar_dir.join(subpath),
377 | _ => grammar_dir,
378 | }
379 | .join("src");
380 |
381 | build_tree_sitter_library(&path, grammar, target)
382 | }
383 |
384 | fn build_tree_sitter_library(
385 | src_path: &Path,
386 | grammar: GrammarConfiguration,
387 | target: Option<&str>,
388 | ) -> Result {
389 | let header_path = src_path;
390 | let parser_path = src_path.join("parser.c");
391 | let mut scanner_path = src_path.join("scanner.c");
392 |
393 | let scanner_path = if scanner_path.exists() {
394 | Some(scanner_path)
395 | } else {
396 | scanner_path.set_extension("cc");
397 | if scanner_path.exists() {
398 | Some(scanner_path)
399 | } else {
400 | None
401 | }
402 | };
403 | let parser_lib_path = crate::runtime_dirs()
404 | .first()
405 | .expect("No runtime directories provided") // guaranteed by post-condition
406 | .join("grammars");
407 | let mut library_path = parser_lib_path.join(&grammar.grammar_id);
408 | library_path.set_extension(DYLIB_EXTENSION);
409 |
410 | // if we are running inside a buildscript emit cargo metadata
411 | // to detect if we are running from a buildscript check some env variables
412 | // that cargo only sets for build scripts
413 | if std::env::var("OUT_DIR").is_ok() && std::env::var("CARGO").is_ok() {
414 | if let Some(scanner_path) = scanner_path.as_ref().and_then(|path| path.to_str()) {
415 | println!("cargo:rerun-if-changed={scanner_path}");
416 | }
417 | if let Some(parser_path) = parser_path.to_str() {
418 | println!("cargo:rerun-if-changed={parser_path}");
419 | }
420 | }
421 |
422 | let recompile = needs_recompile(&library_path, &parser_path, &scanner_path)
423 | .context("Failed to compare source and binary timestamps")?;
424 |
425 | if !recompile {
426 | return Ok(BuildStatus::AlreadyBuilt);
427 | }
428 |
429 | let mut config = cc::Build::new();
430 | config
431 | .cpp(true)
432 | .opt_level(3)
433 | .cargo_metadata(false)
434 | .host(BUILD_TARGET)
435 | .target(target.unwrap_or(BUILD_TARGET));
436 | let compiler = config.get_compiler();
437 | let mut command = Command::new(compiler.path());
438 | command.current_dir(src_path);
439 | for (key, value) in compiler.env() {
440 | command.env(key, value);
441 | }
442 |
443 | command.args(compiler.args());
444 | // used to delay dropping the temporary object file until after the compilation is complete
445 | let _path_guard;
446 |
447 | if compiler.is_like_msvc() {
448 | command
449 | .args(["/nologo", "/LD", "/I"])
450 | .arg(header_path)
451 | .arg("/Od")
452 | .arg("/utf-8")
453 | .arg("/std:c11");
454 | if let Some(scanner_path) = scanner_path.as_ref() {
455 | if scanner_path.extension() == Some("c".as_ref()) {
456 | command.arg(scanner_path);
457 | } else {
458 | let mut cpp_command = Command::new(compiler.path());
459 | cpp_command.current_dir(src_path);
460 | for (key, value) in compiler.env() {
461 | cpp_command.env(key, value);
462 | }
463 | cpp_command.args(compiler.args());
464 | let object_file =
465 | library_path.with_file_name(format!("{}_scanner.obj", &grammar.grammar_id));
466 | cpp_command
467 | .args(["/nologo", "/LD", "/I"])
468 | .arg(header_path)
469 | .arg("/Od")
470 | .arg("/utf-8")
471 | .arg("/std:c++14")
472 | .arg(format!("/Fo{}", object_file.display()))
473 | .arg("/c")
474 | .arg(scanner_path);
475 | let output = cpp_command
476 | .output()
477 | .context("Failed to execute C++ compiler")?;
478 |
479 | if !output.status.success() {
480 | return Err(anyhow!(
481 | "Parser compilation failed.\nStdout: {}\nStderr: {}",
482 | String::from_utf8_lossy(&output.stdout),
483 | String::from_utf8_lossy(&output.stderr)
484 | ));
485 | }
486 | command.arg(&object_file);
487 | _path_guard = TempPath::from_path(object_file);
488 | }
489 | }
490 |
491 | command
492 | .arg(parser_path)
493 | .arg("/link")
494 | .arg(format!("/out:{}", library_path.to_str().unwrap()));
495 | } else {
496 | command
497 | .arg("-shared")
498 | .arg("-fPIC")
499 | .arg("-fno-exceptions")
500 | .arg("-I")
501 | .arg(header_path)
502 | .arg("-o")
503 | .arg(&library_path);
504 |
505 | if let Some(scanner_path) = scanner_path.as_ref() {
506 | if scanner_path.extension() == Some("c".as_ref()) {
507 | command.arg("-xc").arg("-std=c11").arg(scanner_path);
508 | } else {
509 | let mut cpp_command = Command::new(compiler.path());
510 | cpp_command.current_dir(src_path);
511 | for (key, value) in compiler.env() {
512 | cpp_command.env(key, value);
513 | }
514 | cpp_command.args(compiler.args());
515 | let object_file =
516 | library_path.with_file_name(format!("{}_scanner.o", &grammar.grammar_id));
517 | cpp_command
518 | .arg("-fPIC")
519 | .arg("-fno-exceptions")
520 | .arg("-I")
521 | .arg(header_path)
522 | .arg("-o")
523 | .arg(&object_file)
524 | .arg("-std=c++14")
525 | .arg("-c")
526 | .arg(scanner_path);
527 | let output = cpp_command
528 | .output()
529 | .context("Failed to execute C++ compiler")?;
530 | if !output.status.success() {
531 | return Err(anyhow!(
532 | "Parser compilation failed.\nStdout: {}\nStderr: {}",
533 | String::from_utf8_lossy(&output.stdout),
534 | String::from_utf8_lossy(&output.stderr)
535 | ));
536 | }
537 |
538 | command.arg(&object_file);
539 | _path_guard = TempPath::from_path(object_file);
540 | }
541 | }
542 | command.arg("-xc").arg("-std=c11").arg(parser_path);
543 | if cfg!(all(
544 | unix,
545 | not(any(target_os = "macos", target_os = "illumos"))
546 | )) {
547 | command.arg("-Wl,-z,relro,-z,now");
548 | }
549 | }
550 |
551 | let output = command
552 | .output()
553 | .context("Failed to execute C/C++ compiler")?;
554 | if !output.status.success() {
555 | return Err(anyhow!(
556 | "Parser compilation failed.\nStdout: {}\nStderr: {}",
557 | String::from_utf8_lossy(&output.stdout),
558 | String::from_utf8_lossy(&output.stderr)
559 | ));
560 | }
561 |
562 | Ok(BuildStatus::Built)
563 | }
564 |
565 | fn needs_recompile(
566 | lib_path: &Path,
567 | parser_c_path: &Path,
568 | scanner_path: &Option,
569 | ) -> Result {
570 | if !lib_path.exists() {
571 | return Ok(true);
572 | }
573 | let lib_mtime = mtime(lib_path)?;
574 | if mtime(parser_c_path)? > lib_mtime {
575 | return Ok(true);
576 | }
577 | if let Some(scanner_path) = scanner_path {
578 | if mtime(scanner_path)? > lib_mtime {
579 | return Ok(true);
580 | }
581 | }
582 | Ok(false)
583 | }
584 |
585 | fn mtime(path: &Path) -> Result {
586 | Ok(fs::metadata(path)?.modified()?)
587 | }
588 |
589 | /// Gives the contents of a file from a language's `runtime/queries/`
590 | /// directory
591 | pub fn load_runtime_file(language: &str, filename: &str) -> Result {
592 | let path = crate::runtime_file(&PathBuf::new().join("queries").join(language).join(filename));
593 | std::fs::read_to_string(path)
594 | }
595 |
--------------------------------------------------------------------------------
/src/language.rs:
--------------------------------------------------------------------------------
1 | #[derive(PartialEq)]
2 | pub enum Language {
3 | Rust,
4 | Python,
5 | Ruby,
6 | Cpp,
7 | TypeScript,
8 | JavaScript,
9 | Other(String),
10 | }
11 |
12 | impl Language {
13 | pub fn as_str(&self) -> &str {
14 | match self {
15 | Self::Rust => "rust",
16 | Self::Python => "python",
17 | Self::Ruby => "ruby",
18 | Self::Cpp => "cpp",
19 | Self::TypeScript => "typescript",
20 | Self::JavaScript => "javascript",
21 | Self::Other(ref language) => language.as_str(),
22 | }
23 | }
24 |
25 | #[inline]
26 | pub fn from_extension(extension: &str) -> Self {
27 | match extension {
28 | "rs" => Self::Rust,
29 | "py" => Self::Python,
30 | "rb" => Self::Ruby,
31 | "cpp" => Self::Cpp,
32 | "h" => Self::Cpp,
33 | "hpp" => Self::Cpp,
34 | "ts" => Self::TypeScript,
35 | "js" => Self::JavaScript,
36 | other_extension => Self::Other(other_extension.to_string()),
37 | }
38 | }
39 |
40 | /// language specific tree-sitter node types
41 | pub fn top_level_node_type(&self) -> &str {
42 | match self {
43 | Language::Rust => "source_file",
44 | Language::Python => "module",
45 | Language::Ruby | Language::JavaScript | Language::TypeScript => "program",
46 | Language::Cpp => "translation_unit",
47 | _ => "",
48 | }
49 | }
50 |
51 | pub fn decorator_node_type(&self) -> &str {
52 | match self {
53 | Language::Rust => "attribute_item",
54 | Language::Python | Language::Ruby | Language::Cpp => "null",
55 | Language::TypeScript | Language::JavaScript => "decorator",
56 | _ => "",
57 | }
58 | }
59 |
60 | pub fn comment_node_type(&self) -> &str {
61 | match self {
62 | Language::Rust => "line_comment",
63 | Language::Python
64 | | Language::Ruby
65 | | Language::Cpp
66 | | Language::TypeScript
67 | | Language::JavaScript => "comment",
68 | _ => "",
69 | }
70 | }
71 |
72 | pub fn scannable_node_types(&self) -> Vec<&str> {
73 | let mut scannable = self.ignorable_node_types();
74 | let mut commentable = self.commentable_node_types();
75 | scannable.append(&mut commentable);
76 | scannable
77 | }
78 |
79 | pub fn ignorable_node_types(&self) -> Vec<&str> {
80 | match self {
81 | Language::Rust => vec![
82 | "type_item",
83 | "static_item",
84 | "extern_crate_declaration",
85 | "const_item",
86 | "use_declaration",
87 | "expression_statement",
88 | "macro_invocation",
89 | "foreign_mod_item", // extern "C"
90 | ],
91 | Language::TypeScript | Language::JavaScript => {
92 | vec!["string_fragment", "import_specifier", "named_imports"]
93 | }
94 | _ => vec![],
95 | }
96 | }
97 |
98 | pub fn commentable_node_types(&self) -> Vec<&str> {
99 | match self {
100 | Language::Rust => vec![
101 | "attribute_item",
102 | "mod_item",
103 | "enum_item",
104 | "impl_item",
105 | "function_item",
106 | "struct_item",
107 | "trait_item",
108 | "macro_definition",
109 | ],
110 | Language::Python => vec![
111 | "class_definition",
112 | "function_definition",
113 | "decorated_definition",
114 | ],
115 | Language::Ruby => vec!["class", "method", "function", "module"],
116 | Language::Cpp => vec![
117 | "namespace_definition",
118 | "function_definition",
119 | "class_specifier",
120 | ],
121 | Language::TypeScript | Language::JavaScript => vec![
122 | "enum_declaration",
123 | "function_declaration",
124 | "class_declaration",
125 | "method_definition",
126 | "interface_declaration",
127 | "export_statement",
128 | // "variable_declaration",
129 | "expression_statement", // namespace
130 | ],
131 | _ => vec![],
132 | }
133 | }
134 |
135 | pub fn nested_traversable_symbols(&self) -> Vec<&str> {
136 | match self {
137 | Language::Rust => vec!["mod_item", "impl_item"],
138 | Language::Python => vec!["class_definition"],
139 | Language::Ruby => vec!["class", "module"],
140 | Language::Cpp => vec!["namespace_definition", "class_specifier"],
141 | Language::TypeScript | Language::JavaScript => vec![
142 | "class_declaration",
143 | "expression_statement",
144 | "internal_module",
145 | ],
146 | _ => vec![],
147 | }
148 | }
149 | }
150 |
151 | impl From<&str> for Language {
152 | fn from(language_name: &str) -> Self {
153 | match language_name {
154 | "rust" => Self::Rust,
155 | "python" => Self::Python,
156 | "ruby" => Self::Ruby,
157 | "cpp" => Self::Cpp,
158 | "typescript" => Self::TypeScript,
159 | "javascript" => Self::JavaScript,
160 | other_language => Self::Other(other_language.to_string()),
161 | }
162 | }
163 | }
164 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // This is referred from the helix codebase:
2 | // https://github.com/helix-editor/helix/blob/master/helix-loader/src/lib.rs
3 | pub mod analyzer;
4 | pub mod commands;
5 | pub mod config;
6 | pub mod grammar;
7 | pub mod language;
8 | pub mod scanner;
9 | pub mod tokens;
10 | pub mod tree_sitter_extended;
11 | pub mod utils;
12 |
13 | use etcetera::base_strategy::{choose_base_strategy, BaseStrategy};
14 | use std::path::{Path, PathBuf};
15 | use toml::{map::Map, Value};
16 |
17 | static RUNTIME_DIRS: once_cell::sync::Lazy> =
18 | once_cell::sync::Lazy::new(prioritize_runtime_dirs);
19 |
20 | static CONFIG_FILE: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new();
21 |
22 | pub fn initialize_config_file(specified_file: Option) {
23 | let config_file = specified_file.unwrap_or_else(|| {
24 | let config_dir = config_dir();
25 |
26 | if !config_dir.exists() {
27 | std::fs::create_dir_all(&config_dir).ok();
28 | }
29 |
30 | config_dir.join("config.toml")
31 | });
32 |
33 | // We should only initialize this value once.
34 | CONFIG_FILE.set(config_file).ok();
35 | }
36 |
37 | /// A list of runtime directories from highest to lowest priority
38 | ///
39 | /// The priority is:
40 | ///
41 | /// 1. sibling directory to `CARGO_MANIFEST_DIR` (if environment variable is set)
42 | /// 2. subdirectory of user config directory (always included)
43 | /// 3. `BALPAN_RUNTIME` (if environment variable is set)
44 | /// 4. subdirectory of path to balpan executable (always included)
45 | ///
46 | /// Postcondition: returns at least two paths (they might not exist).
47 | fn prioritize_runtime_dirs() -> Vec {
48 | const RT_DIR: &str = "runtime";
49 | // Adding higher priority first
50 | let mut rt_dirs = Vec::new();
51 | if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") {
52 | // this is the directory of the crate being run by cargo, we need the workspace path so we take the parent
53 | let path = PathBuf::from(dir).parent().unwrap().join(RT_DIR);
54 | log::debug!("runtime dir: {}", path.to_string_lossy());
55 | rt_dirs.push(path);
56 | }
57 |
58 | let conf_rt_dir = config_dir().join(RT_DIR);
59 | rt_dirs.push(conf_rt_dir);
60 |
61 | if let Ok(dir) = std::env::var("BALPAN_RUNTIME") {
62 | rt_dirs.push(dir.into());
63 | }
64 |
65 | // fallback to location of the executable being run
66 | // canonicalize the path in case the executable is symlinked
67 | let exe_rt_dir = std::env::current_exe()
68 | .ok()
69 | .and_then(|path| std::fs::canonicalize(path).ok())
70 | .and_then(|path| path.parent().map(|path| path.to_path_buf().join(RT_DIR)))
71 | .unwrap();
72 | rt_dirs.push(exe_rt_dir);
73 | rt_dirs
74 | }
75 |
76 | /// Runtime directories ordered from highest to lowest priority
77 | ///
78 | /// All directories should be checked when looking for files.
79 | ///
80 | /// Postcondition: returns at least one path (it might not exist).
81 | pub fn runtime_dirs() -> &'static [PathBuf] {
82 | &RUNTIME_DIRS
83 | }
84 |
85 | /// Find file with path relative to runtime directory
86 | ///
87 | /// `rel_path` should be the relative path from within the `runtime/` directory.
88 | /// The valid runtime directories are searched in priority order and the first
89 | /// file found to exist is returned, otherwise None.
90 | fn find_runtime_file(rel_path: &Path) -> Option {
91 | RUNTIME_DIRS.iter().find_map(|rt_dir| {
92 | let path = rt_dir.join(rel_path);
93 | if path.exists() {
94 | return Some(path);
95 | }
96 |
97 | None
98 | })
99 | }
100 |
101 | /// Find file with path relative to runtime directory
102 | ///
103 | /// `rel_path` should be the relative path from within the `runtime/` directory.
104 | /// The valid runtime directories are searched in priority order and the first
105 | /// file found to exist is returned, otherwise the path to the final attempt
106 | /// that failed.
107 | pub fn runtime_file(rel_path: &Path) -> PathBuf {
108 | find_runtime_file(rel_path).unwrap_or_else(|| {
109 | RUNTIME_DIRS
110 | .last()
111 | .map(|dir| dir.join(rel_path))
112 | .unwrap_or_default()
113 | })
114 | }
115 |
116 | enum StrategyType {
117 | Config,
118 | Cache,
119 | }
120 |
121 | fn get_dir(target: StrategyType) -> PathBuf {
122 | let target_str = match target {
123 | StrategyType::Config => "config",
124 | StrategyType::Cache => "cache",
125 | };
126 |
127 | // Check if the directory override environment variable is set
128 | if let Ok(dir) = std::env::var(format!("BALPAN_{}_DIR", target_str.to_uppercase())) {
129 | return PathBuf::from(dir);
130 | }
131 |
132 | let strategy = choose_base_strategy()
133 | .unwrap_or_else(|_| panic!("Unable to find the {target_str} directory strategy!"));
134 | let mut path = match target {
135 | StrategyType::Config => strategy.config_dir(),
136 | StrategyType::Cache => strategy.cache_dir(),
137 | };
138 |
139 | path.push("balpan");
140 |
141 | path
142 | }
143 |
144 | pub fn config_dir() -> PathBuf {
145 | get_dir(StrategyType::Config)
146 | }
147 |
148 | pub fn cache_dir() -> PathBuf {
149 | get_dir(StrategyType::Cache)
150 | }
151 |
152 | pub fn config_file() -> PathBuf {
153 | CONFIG_FILE
154 | .get()
155 | .map(|path| path.to_path_buf())
156 | .unwrap_or_else(|| config_dir().join("config.toml"))
157 | }
158 |
159 | pub fn workspace_config_file() -> PathBuf {
160 | find_workspace().0.join(".balpan").join("config.toml")
161 | }
162 |
163 | pub fn lang_config_file() -> PathBuf {
164 | config_dir().join("languages.toml")
165 | }
166 |
167 | pub fn log_file() -> PathBuf {
168 | cache_dir().join("balpan.log")
169 | }
170 |
171 | fn get_name(v: &Value) -> Option<&str> {
172 | v.get("name").and_then(Value::as_str)
173 | }
174 |
175 | /// Merge two TOML documents, merging values from `right` onto `left`
176 | ///
177 | /// When an array exists in both `left` and `right`, `right`'s array is
178 | /// used. When a table exists in both `left` and `right`, the merged table
179 | /// consists of all keys in `left`'s table unioned with all keys in `right`
180 | /// with the values of `right` being merged recursively onto values of
181 | /// `left`.
182 | ///
183 | /// `merge_toplevel_arrays` controls whether a top-level array in the TOML
184 | /// document is merged instead of overridden. This is useful for TOML
185 | /// documents that use a top-level array of values like the `languages.toml`,
186 | /// where one usually wants to override or add to the array instead of
187 | /// replacing it altogether.
188 | pub fn merge_toml_values(left: toml::Value, right: toml::Value, merge_depth: usize) -> toml::Value {
189 | match (left, right) {
190 | (Value::Array(left_items), Value::Array(right_items)) => {
191 | toml_array_value(merge_depth, left_items, right_items)
192 | }
193 | (Value::Table(left_map), Value::Table(right_map)) => {
194 | toml_table_value(merge_depth, left_map, right_map)
195 | }
196 | // Catch everything else we didn't handle, and use the right value
197 | (_, value) => value,
198 | }
199 | }
200 |
201 | fn toml_array_value(
202 | merge_depth: usize,
203 | mut left_items: Vec,
204 | right_items: Vec,
205 | ) -> toml::Value {
206 | // The top-level arrays should be merged but nested arrays should
207 | // act as overrides. For the `languages.toml` config, this means
208 | // that you can specify a sub-set of languages in an overriding
209 | // `languages.toml` but that nested arrays like Language Server
210 | // arguments are replaced instead of merged.
211 | if merge_depth == 0 {
212 | return Value::Array(right_items);
213 | }
214 |
215 | left_items.reserve(right_items.len());
216 |
217 | for r_val in right_items {
218 | let l_val = get_name(&r_val)
219 | .and_then(|r_name| left_items.iter().position(|v| get_name(v) == Some(r_name)))
220 | .map(|l_pos| left_items.remove(l_pos));
221 |
222 | let m_val = match l_val {
223 | Some(l) => merge_toml_values(l, r_val, merge_depth - 1),
224 | None => r_val,
225 | };
226 |
227 | left_items.push(m_val);
228 | }
229 |
230 | Value::Array(left_items)
231 | }
232 |
233 | fn toml_table_value(
234 | merge_depth: usize,
235 | mut left_map: Map,
236 | right_map: Map,
237 | ) -> toml::Value {
238 | if merge_depth == 0 {
239 | return Value::Table(right_map);
240 | }
241 |
242 | for (r_name, r_val) in right_map {
243 | match left_map.remove(&r_name) {
244 | Some(l_val) => {
245 | let merged_val = merge_toml_values(l_val, r_val, merge_depth - 1);
246 | left_map.insert(r_name, merged_val);
247 | }
248 | None => {
249 | left_map.insert(r_name, r_val);
250 | }
251 | }
252 | }
253 |
254 | Value::Table(left_map)
255 | }
256 |
257 | /// Finds the current workspace folder.
258 | /// Used as a ceiling dir for LSP root resolution, the filepicker and potentially as a future filewatching root
259 | ///
260 | /// This function starts searching the FS upward from the CWD
261 | /// and returns the first directory that contains either `.git` or `.balpan`.
262 | /// If no workspace was found returns (CWD, true).
263 | /// Otherwise (workspace, false) is returned
264 | pub fn find_workspace() -> (PathBuf, bool) {
265 | let current_dir = std::env::current_dir().expect("unable to determine current directory");
266 | for ancestor in current_dir.ancestors() {
267 | if ancestor.join(".git").exists() || ancestor.join(".balpan").exists() {
268 | return (ancestor.to_owned(), false);
269 | }
270 | }
271 |
272 | (current_dir, true)
273 | }
274 |
275 | #[cfg(test)]
276 | mod merge_toml_tests {
277 | use std::str;
278 |
279 | use super::merge_toml_values;
280 | use toml::Value;
281 |
282 | #[test]
283 | fn language_toml_map_merges() {
284 | const USER: &str = r#"
285 | [[language]]
286 | name = "nix"
287 | test = "bbb"
288 | indent = { tab-width = 4, unit = " ", test = "aaa" }
289 | "#;
290 |
291 | let base = include_bytes!("../languages.toml");
292 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config");
293 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config");
294 | let user: Value = toml::from_str(USER).unwrap();
295 |
296 | let merged = merge_toml_values(base, user, 3);
297 | let languages = merged.get("language").unwrap().as_array().unwrap();
298 | let nix = languages
299 | .iter()
300 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "nix")
301 | .unwrap();
302 | let nix_indent = nix.get("indent").unwrap();
303 |
304 | // We changed tab-width and unit in indent so check them if they are the new values
305 | assert_eq!(
306 | nix_indent.get("tab-width").unwrap().as_integer().unwrap(),
307 | 4
308 | );
309 | assert_eq!(nix_indent.get("unit").unwrap().as_str().unwrap(), " ");
310 | // We added a new keys, so check them
311 | assert_eq!(nix.get("test").unwrap().as_str().unwrap(), "bbb");
312 | assert_eq!(nix_indent.get("test").unwrap().as_str().unwrap(), "aaa");
313 | // We didn't change comment-token so it should be same
314 | assert_eq!(nix.get("comment-token").unwrap().as_str().unwrap(), "#");
315 | }
316 |
317 | #[test]
318 | fn language_toml_nested_array_merges() {
319 | const USER: &str = r#"
320 | [[language]]
321 | name = "typescript"
322 | language-server = { command = "deno", args = ["lsp"] }
323 | "#;
324 |
325 | let base = include_bytes!("../languages.toml");
326 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config");
327 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config");
328 | let user: Value = toml::from_str(USER).unwrap();
329 |
330 | let merged = merge_toml_values(base, user, 3);
331 | let languages = merged.get("language").unwrap().as_array().unwrap();
332 | let ts = languages
333 | .iter()
334 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "typescript")
335 | .unwrap();
336 | assert_eq!(
337 | ts.get("language-server")
338 | .unwrap()
339 | .get("args")
340 | .unwrap()
341 | .as_array()
342 | .unwrap(),
343 | &vec![Value::String("lsp".into())]
344 | )
345 | }
346 |
347 | #[test]
348 | fn allow_env_variable_override() {
349 | const USER: &str = r#"
350 | [[language]]
351 | name = "typescript"
352 | language-server = { command = "deno", args = ["lsp"] }
353 | "#;
354 |
355 | let base = include_bytes!("../languages.toml");
356 | let base = str::from_utf8(base).expect("Couldn't parse built-in languages config");
357 | let base: Value = toml::from_str(base).expect("Couldn't parse built-in languages config");
358 | let user: Value = toml::from_str(USER).unwrap();
359 |
360 | std::env::set_var("BALPAN_CONFIG_DIR", "/tmp");
361 | let merged = merge_toml_values(base, user, 3);
362 | std::env::remove_var("BALPAN_CONFIG_DIR");
363 |
364 | let languages = merged.get("language").unwrap().as_array().unwrap();
365 | let ts = languages
366 | .iter()
367 | .find(|v| v.get("name").unwrap().as_str().unwrap() == "typescript")
368 | .unwrap();
369 | assert_eq!(
370 | ts.get("language-server")
371 | .unwrap()
372 | .get("args")
373 | .unwrap()
374 | .as_array()
375 | .unwrap(),
376 | &vec![Value::String("lsp".into())]
377 | )
378 | }
379 | }
380 |
--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
1 | use std::path::Path;
2 | use std::time::Instant;
3 |
4 | use balpan::commands::pattern_search::PatternTree;
5 | use clap::{Parser, Subcommand};
6 | use glob::glob;
7 |
8 | use balpan::commands::grep::GrepReport;
9 | use balpan::scanner::Scanner;
10 | use balpan::utils::{get_current_repository, list_available_files, suggest_subcommand};
11 | use git2::Repository;
12 | use tokio::runtime::{Builder, Runtime};
13 |
14 | #[derive(Debug, Parser)]
15 | #[command(author, about, version, long_about = None)]
16 | struct BalpanApp {
17 | #[clap(subcommand)]
18 | command: BalpanCommand,
19 | }
20 |
21 | #[derive(Debug, Subcommand)]
22 | enum BalpanCommand {
23 | #[clap(about = "Setup environment for Balpan and fetch all available treesitter parsers")]
24 | Init,
25 | #[clap(about = "Reset environment for Balpan and removes all TODO comments")]
26 | Reset,
27 | #[clap(
28 | about = "Searches a particular pattern of characters, and displays all lines that contain that pattern"
29 | )]
30 | Grep {
31 | #[clap(short = 'f', long, help = "Specific file to scan")]
32 | file: Option,
33 | #[clap(short = 'p', long, help = "Specific pattern to search")]
34 | pattern: Option,
35 | #[clap(
36 | long,
37 | help = "Apply formatting to the output. Available options: json, tree, plain (default)"
38 | )]
39 | #[clap(
40 | short = 'i',
41 | long = "ignore",
42 | help = "ignores the case(upper or lower) of the pattern."
43 | )]
44 | ignore_case: Option>,
45 | #[clap(
46 | short = 'H',
47 | help = "Display the matched lines, but do not display the filenames."
48 | )]
49 | hide_path: bool,
50 | #[clap(
51 | short = 'l',
52 | help = "Display the names of files that contain matches, without displaying the matched lines."
53 | )]
54 | list_of_files: bool,
55 | #[clap(
56 | short = 'c',
57 | help = "This prints only a count of the lines that match a pattern."
58 | )]
59 | count: bool,
60 | #[clap(
61 | short = 'T',
62 | long = "time",
63 | help = "Display the elapsed time during the execution of the command."
64 | )]
65 | show_elapsed_time: bool,
66 | #[clap(short = 'o', help = "Colorize the matched pattern in the output.")]
67 | colorize: bool,
68 | #[clap(
69 | short = 'E',
70 | help = "Treats pattern as an extended regular expression (ERE)."
71 | )]
72 | extended_regex: bool,
73 | format: Option,
74 | },
75 | #[clap(about = "Generate a TODO comment for specific file")]
76 | Analyze {
77 | #[clap(short, long, help = "Specific file to scan")]
78 | pattern: Option,
79 | },
80 | }
81 |
82 | fn create_runtime() -> Runtime {
83 | Builder::new_current_thread().enable_all().build().unwrap()
84 | }
85 |
86 | fn main() {
87 | let app = BalpanApp::parse();
88 |
89 | // verify that the subcommand entered is correct.
90 | let user_input: Option = std::env::args().nth(1);
91 |
92 | if let Some(input) = user_input {
93 | if suggest_subcommand(&input).is_some() {
94 | println!("Did you mean '{}'?", suggest_subcommand(&input).unwrap());
95 | }
96 | }
97 |
98 | match app.command {
99 | BalpanCommand::Init => {
100 | let runtime = create_runtime();
101 |
102 | runtime.block_on(async { handle_init().await })
103 | }
104 | BalpanCommand::Reset => handle_reset(),
105 | BalpanCommand::Grep {
106 | file,
107 | pattern,
108 | format,
109 | ignore_case,
110 | hide_path,
111 | list_of_files,
112 | count,
113 | colorize,
114 | extended_regex,
115 | show_elapsed_time: elapsed,
116 | } => {
117 | let time = Instant::now();
118 | let runtime = create_runtime();
119 |
120 | let patterns: Option> =
121 | pattern.map(|p| p.split_whitespace().map(|s| s.to_string()).collect());
122 |
123 | runtime.block_on(async {
124 | let mut report = GrepReport::new();
125 | handle_grep(
126 | file,
127 | patterns,
128 | &mut report,
129 | format,
130 | ignore_case,
131 | hide_path,
132 | list_of_files,
133 | count,
134 | colorize,
135 | extended_regex,
136 | )
137 | .await;
138 | });
139 |
140 | if elapsed {
141 | println!("time: {:?}", time.elapsed());
142 | }
143 | }
144 | BalpanCommand::Analyze { pattern } => {
145 | match pattern {
146 | Some(ref p) => {
147 | if !p.starts_with('"') || !p.ends_with('"') {
148 | panic!("Invalid file path. Please include double quotes(`\"`) in the path.")
149 | }
150 | }
151 | None => panic!("No file specified. Please specify a file path to analyze"),
152 | }
153 |
154 | let runtime = create_runtime();
155 |
156 | runtime.block_on(async {
157 | handle_analyze(pattern).await;
158 | });
159 | }
160 | }
161 | }
162 |
163 | fn git(args: Vec) {
164 | std::process::Command::new("git")
165 | .args(args)
166 | .output()
167 | .unwrap();
168 | }
169 |
170 | fn find_branch<'a>(repository: &Repository, target: &'a str) -> Option<&'a str> {
171 | let mut iter = repository.branches(None);
172 |
173 | while let Some(Ok((ref branch, _))) = &iter.as_mut().expect("???").next() {
174 | if let Ok(Some(branch_name)) = branch.name() {
175 | if target == branch_name {
176 | return Some(target);
177 | }
178 | }
179 | }
180 |
181 | None
182 | }
183 |
184 | fn find_main_or_master_branch<'a>(repo: &'a Repository, branches: &[&'a str]) -> String {
185 | if branches.is_empty() {
186 | panic!("No main or master branch found");
187 | }
188 |
189 | if let Some(branch) = find_branch(repo, branches[0]) {
190 | return branch.to_string();
191 | }
192 |
193 | find_main_or_master_branch(repo, &branches[1..])
194 | }
195 |
196 | fn handle_reset() {
197 | let repo = get_current_repository().unwrap();
198 | //let onboarding_branch = find_branch(&repo, "onboarding").to_string();
199 | let is_already_setup: bool;
200 |
201 | let onboarding_branch = match find_branch(&repo, "onboarding") {
202 | Some(branch) => {
203 | is_already_setup = true;
204 | branch.to_string()
205 | }
206 | None => panic!("No onboarding branch found"),
207 | };
208 |
209 | let main_branch = find_main_or_master_branch(&repo, &["main", "master"]);
210 |
211 | if is_already_setup {
212 | git(vec!["switch".to_owned(), main_branch]);
213 | git(vec![
214 | "branch".to_owned(),
215 | "-d".to_owned(),
216 | onboarding_branch,
217 | ]);
218 | }
219 | }
220 |
221 | async fn handle_init() {
222 | let repo = get_current_repository().unwrap();
223 | let mut is_already_setup: bool = false;
224 |
225 | let _onboarding_branch = match find_branch(&repo, "onboarding") {
226 | Some(branch) => {
227 | is_already_setup = true;
228 | branch.to_string()
229 | }
230 | None => String::new(),
231 | };
232 |
233 | let main_branch = find_main_or_master_branch(&repo, &["main", "master"]);
234 |
235 | if !is_already_setup {
236 | git(vec!["switch".to_owned(), main_branch.clone()]);
237 | git(vec![
238 | "switch".to_owned(),
239 | "-c".to_owned(),
240 | "onboarding".to_owned(),
241 | ]);
242 | }
243 |
244 | git(vec!["switch".to_owned(), main_branch]);
245 | git(vec!["switch".to_owned(), "onboarding".to_owned()]);
246 |
247 | Scanner::scan(&repo).await;
248 | println!("init!");
249 | }
250 |
251 | #[allow(clippy::too_many_arguments)]
252 | async fn handle_grep(
253 | file: Option,
254 | pattern: Option>,
255 | report: &mut GrepReport,
256 | format: Option,
257 | ignore_case: Option>,
258 | hide_path: bool,
259 | list_of_files: bool,
260 | count: bool,
261 | colorize: bool,
262 | extends_regex: bool,
263 | ) {
264 | let mut pattern_tree = PatternTree::new();
265 | let default_patterns = vec!["[TODO]".to_string(), "[DONE]".to_string()];
266 |
267 | let patterns_to_search: Vec;
268 |
269 | if extends_regex {
270 | pattern_tree.ignore_case = true;
271 | pattern_tree.regex_flag = true;
272 | }
273 |
274 | match ignore_case {
275 | Some(ignore_patterns) => {
276 | pattern_tree.ignore_case = true;
277 | patterns_to_search = ignore_patterns;
278 | }
279 | None => {
280 | patterns_to_search = pattern.unwrap_or(default_patterns);
281 | }
282 | }
283 |
284 | match file {
285 | Some(file_path) => {
286 | scan_specific_file(file_path, report, &mut pattern_tree, &patterns_to_search).await
287 | }
288 | None => scan_project_directory(report, pattern_tree, patterns_to_search.clone()).await,
289 | }
290 |
291 | let formatting = report.report_formatting(
292 | format,
293 | hide_path,
294 | list_of_files,
295 | count,
296 | patterns_to_search,
297 | colorize,
298 | );
299 | println!("{}", formatting);
300 | }
301 |
302 | async fn handle_analyze(pattern: Option) {
303 | if pattern.is_none() {
304 | panic!("No file specified. Please specify a file path to analyze")
305 | }
306 |
307 | let file_pattern_str = pattern.unwrap();
308 | let filter = glob(&file_pattern_str).expect("Failed to read file pattern");
309 |
310 | for entry in filter {
311 | match entry {
312 | Ok(path) => Scanner::scan_specific_file(path).await,
313 | Err(e) => println!("Error while reading file pattern: {}", e),
314 | }
315 | }
316 | }
317 |
318 | async fn scan_project_directory(
319 | report: &mut GrepReport,
320 | mut pattern_tree: PatternTree,
321 | patterns_to_search: Vec,
322 | ) {
323 | let repo = get_current_repository().expect("No repository found");
324 | let repo_path = repo.workdir().expect("No workdir found").to_str().unwrap();
325 |
326 | let available_files: Vec = list_available_files(repo_path).await;
327 |
328 | for file in available_files {
329 | let path = Path::new(&file);
330 | update_report(report, path, &mut pattern_tree, &patterns_to_search).await;
331 | }
332 | }
333 |
334 | async fn scan_specific_file(
335 | file_path: String,
336 | report: &mut GrepReport,
337 | pattern_tree: &mut PatternTree,
338 | patterns_to_search: &Vec,
339 | ) {
340 | let path = Path::new(&file_path);
341 | update_report(report, path, pattern_tree, patterns_to_search).await;
342 | }
343 |
344 | async fn update_report(
345 | report: &mut GrepReport,
346 | path: &Path,
347 | pattern_tree: &mut PatternTree,
348 | patterns_to_search: &Vec,
349 | ) {
350 | report
351 | .grep_file(path, pattern_tree, patterns_to_search)
352 | .await
353 | .unwrap();
354 | }
355 |
--------------------------------------------------------------------------------
/src/scanner.rs:
--------------------------------------------------------------------------------
1 | use std::fs::File;
2 | use std::io::{Read, Seek, Write};
3 | use std::path::{Path, PathBuf};
4 |
5 | use git2::Repository;
6 |
7 | use crate::analyzer::Analyzer;
8 | use crate::grammar::{build_grammars, fetch_grammars};
9 | use crate::language::Language;
10 | use crate::utils::list_available_files;
11 |
12 | pub struct Scanner;
13 |
14 | impl Scanner {
15 | pub async fn scan(repo: &Repository) {
16 | fetch_grammars().unwrap();
17 | build_grammars(None).unwrap();
18 |
19 | if let Some(workdir) = repo.workdir() {
20 | let repo_root = workdir.to_string_lossy();
21 | let filenames = list_available_files(&repo_root);
22 | for filename in filenames.await {
23 | if filename.contains("test") {
24 | continue;
25 | }
26 | let path = Path::new(&filename);
27 | let language = match path.extension() {
28 | Some(os_str) => Language::from_extension(os_str.to_str().unwrap()),
29 | _ => Language::Other("".to_string()),
30 | };
31 |
32 | if let Language::Other(_) = language {
33 | continue;
34 | }
35 |
36 | if let Ok(mut file) = File::options().read(true).write(true).open(path) {
37 | let mut source_code = String::new();
38 | file.read_to_string(&mut source_code).unwrap();
39 | let with_empty_line = source_code.ends_with('\n');
40 | let analyzer = Analyzer {
41 | source_code,
42 | language,
43 | };
44 |
45 | let writer_queue = &analyzer.analyze();
46 | let mut lines = vec![];
47 |
48 | for line in writer_queue {
49 | lines.push(String::from(line));
50 | }
51 |
52 | if with_empty_line {
53 | lines.push(String::new());
54 | }
55 |
56 | file.set_len(0).unwrap();
57 | file.rewind().unwrap();
58 | file.write_all(lines.join("\n").as_bytes()).unwrap();
59 | }
60 | }
61 | }
62 | }
63 |
64 | /// Scan a specific file and add TODO comments
65 | pub async fn scan_specific_file(path: PathBuf) {
66 | fetch_grammars().unwrap();
67 | build_grammars(None).unwrap();
68 |
69 | if let Ok(mut file) = File::options().read(true).write(true).open(path.clone()) {
70 | let mut source_code = String::new();
71 | file.read_to_string(&mut source_code).unwrap();
72 | let with_empty_line = source_code.ends_with('\n');
73 |
74 | let language = match path.extension() {
75 | Some(p) => Language::from_extension(p.to_str().unwrap()),
76 | _ => Language::Other(String::new()),
77 | };
78 |
79 | let analyzer = Analyzer {
80 | source_code,
81 | language,
82 | };
83 |
84 | let writer_queue = &analyzer.analyze();
85 | let mut lines: Vec = vec![];
86 |
87 | for line in writer_queue {
88 | lines.push(String::from(line));
89 | }
90 |
91 | if with_empty_line {
92 | lines.push(String::new());
93 | }
94 |
95 | file.set_len(0).unwrap();
96 | file.rewind().unwrap();
97 | file.write_all(lines.join("\n").as_bytes()).unwrap();
98 | }
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/src/tokens.rs:
--------------------------------------------------------------------------------
1 | use crate::language::Language;
2 |
3 | pub enum CommentToken {
4 | TripleSlashTODO,
5 | DoubleSlashTODO,
6 | HashTODO,
7 | Other,
8 | }
9 |
10 | impl CommentToken {
11 | pub fn from_language(language: &Language) -> Self {
12 | match language {
13 | Language::Rust | Language::Cpp => CommentToken::TripleSlashTODO,
14 | Language::Python | Language::Ruby => CommentToken::HashTODO,
15 | Language::JavaScript | Language::TypeScript => CommentToken::DoubleSlashTODO,
16 | _ => CommentToken::Other,
17 | }
18 | }
19 |
20 | pub fn to_str(&self) -> &str {
21 | match self {
22 | CommentToken::TripleSlashTODO => "/// [TODO]",
23 | CommentToken::DoubleSlashTODO => "// [TODO]",
24 | CommentToken::HashTODO => "# [TODO]",
25 | CommentToken::Other => "",
26 | }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/tree_sitter_extended.rs:
--------------------------------------------------------------------------------
1 | use tree_sitter::{Node, Point, Range};
2 |
3 | pub trait MembershipCheck {
4 | fn is_before(&self, range: Range) -> bool;
5 | fn is_after(&self, range: Range) -> bool;
6 | fn is_member_of(&self, range: Range) -> bool;
7 | }
8 |
9 | impl MembershipCheck for Point {
10 | fn is_before(&self, range: Range) -> bool {
11 | let start_point = range.start_point;
12 |
13 | if self.row < start_point.row {
14 | return true;
15 | }
16 |
17 | if self.row > start_point.row {
18 | return false;
19 | }
20 |
21 | self.column < start_point.column
22 | }
23 |
24 | fn is_after(&self, range: Range) -> bool {
25 | let end_point = range.end_point;
26 |
27 | if self.row < end_point.row {
28 | return false;
29 | }
30 |
31 | if self.row > end_point.row {
32 | return true;
33 | }
34 |
35 | self.column > end_point.column
36 | }
37 |
38 | fn is_member_of(&self, range: Range) -> bool {
39 | if self.is_before(range) {
40 | return false;
41 | }
42 |
43 | if self.is_after(range) {
44 | return false;
45 | }
46 |
47 | true
48 | }
49 | }
50 |
51 | pub trait RangeFactory {
52 | fn from_node(node: Node) -> Range;
53 | }
54 |
55 | impl RangeFactory for Range {
56 | #[inline]
57 | fn from_node(node: Node) -> Range {
58 | Range {
59 | start_byte: node.start_byte(),
60 | end_byte: node.end_byte(),
61 | start_point: node.start_position(),
62 | end_point: node.end_position(),
63 | }
64 | }
65 | }
66 |
67 | pub trait ResolveSymbol {
68 | fn identifier_range(&self) -> (usize, usize, usize);
69 | }
70 |
71 | impl ResolveSymbol for Node<'_> {
72 | fn identifier_range(&self) -> (usize, usize, usize) {
73 | let simple_cases = [
74 | "attribute_item",
75 | "use_declaration",
76 | "macro_invocation",
77 | "expression_statement",
78 | "foreign_mod_item",
79 | ];
80 |
81 | if simple_cases.contains(&self.kind()) {
82 | return (0, 0, 0);
83 | }
84 |
85 | let mut node = self.child_by_field_name("name");
86 |
87 | if self.kind() == "namespace_definition" && node.is_none() {
88 | return (0, 0, 0);
89 | }
90 |
91 | if self.kind() == "function_definition" {
92 | if let Some(child) = self.child_by_field_name("declarator") {
93 | node = child.child_by_field_name("declarator");
94 | }
95 | }
96 |
97 | if self.kind() == "method_definition" {
98 | node = self.child_by_field_name("name");
99 | }
100 |
101 | // case of decorated_definition
102 | if self.kind() == "decorated_definition" {
103 | let definition_node = self.child_by_field_name("definition").unwrap();
104 | node = definition_node.child_by_field_name("name");
105 | }
106 |
107 | // case of impl_item
108 | if self.kind() == "impl_item" {
109 | node = self.child_by_field_name("trait"); // impl Foo for Bar
110 | node = match node {
111 | None => self.child_by_field_name("type"), // impl Foo
112 | result => result,
113 | }
114 | }
115 |
116 | // e.g. `export function foo() {}`
117 | if self.kind() == "export_statement" {
118 | // this case handles import statement especially `export * from './compiler_facade_interface';` things.
119 | // I think this is not a good way to handle this case, but I don't know how to handle this case.
120 | if self.child_by_field_name("source").is_some() {
121 | return (0, 0, 0);
122 | }
123 |
124 | if let Some(child) = self.child_by_field_name("declaration") {
125 | node = child.child_by_field_name("name");
126 | }
127 | }
128 |
129 | let identifier_node =
130 | node.unwrap_or_else(|| panic!("`{}` is an invalid identifier node type", self.kind()));
131 |
132 | let from = identifier_node.start_position().column;
133 | let row = identifier_node.end_position().row;
134 | let to = identifier_node.end_position().column;
135 |
136 | (row, from, to)
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/src/utils.rs:
--------------------------------------------------------------------------------
1 | use std::collections::HashSet;
2 | use std::env;
3 | use std::fs::File;
4 |
5 | use git2::Repository;
6 | use ignore::{DirEntry, WalkBuilder};
7 | use once_cell::sync::Lazy;
8 | use strsim::levenshtein;
9 |
10 | #[rustfmt::skip]
11 | static IGNORED_EXTENSIONS: Lazy> = Lazy::new(|| {
12 | [
13 | ".tmp", ".bak", ".swp", ".old", ".new", ".orig", ".patch", ".diff", // temporary
14 | ".proj", ".sln", ".classpath", ".project", // project
15 | ".obj", ".exe", ".dll", ".class", ".o", ".e", // binary
16 | ".toml", ".lock", ".json", ".md", ".yaml", ".yml", ".xml", ".ini", // dev config
17 | ".zip", ".tar", ".gz", ".rar", ".7z", ".tgz", ".xz", ".bz2", // compressed
18 | ".png", ".jpg", ".jpeg", ".bmp", ".svg", ".gif", // image
19 | ".wav", ".mp3", ".mp4", ".avi", ".mov", ".flv", ".ogg", // audio/video
20 | ".doc", ".docx", ".pdf", ".ppt", ".pptx", ".xls", "xlsx", ".odt", // document
21 | ".yml", ".xml", ".ini", // config
22 | ".log", ".dat", // log
23 | ".yarn", ".npm", // package manager
24 | ]
25 | .iter()
26 | .map(|&s| s.into())
27 | .collect()
28 | });
29 |
30 | static IGNORED_PREFIXES: Lazy> = Lazy::new(|| {
31 | ["."].iter().map(|&s| s.into()).collect() // hidden files start with '.'
32 | });
33 |
34 | pub fn get_current_repository() -> Option {
35 | let current_dir = env::current_dir().ok()?;
36 | let repo = Repository::discover(current_dir).ok()?;
37 |
38 | Some(repo)
39 | }
40 |
41 | pub async fn list_available_files(repo_path: &str) -> Vec {
42 | let mut result = Vec::new();
43 |
44 | let is_ignored = move |entry: &DirEntry| {
45 | let extension = entry
46 | .path()
47 | .extension()
48 | .and_then(|s| s.to_str())
49 | .unwrap_or("");
50 | let file_name = entry
51 | .path()
52 | .file_name()
53 | .and_then(|s| s.to_str())
54 | .unwrap_or("");
55 |
56 | IGNORED_EXTENSIONS.contains(&format!(".{}", extension))
57 | || IGNORED_PREFIXES
58 | .iter()
59 | .any(|prefix| file_name.starts_with(prefix))
60 | };
61 |
62 | let walker = WalkBuilder::new(repo_path)
63 | .hidden(true)
64 | .git_ignore(true)
65 | .parents(false)
66 | .filter_entry(move |f| !is_ignored(f))
67 | .build();
68 |
69 | for entry in walker.flatten() {
70 | match entry.file_type() {
71 | Some(file_type) if file_type.is_file() => {
72 | if let Ok(_file) = File::open(entry.path()) {
73 | result.push(entry.path().to_string_lossy().to_string());
74 | }
75 | }
76 | // if file type is directory or other things, just skip it
77 | _ => continue,
78 | }
79 | }
80 |
81 | result
82 | }
83 |
84 | #[rustfmt::skip]
85 | static DICTIONARY: Lazy> = Lazy::new(|| {
86 | vec![
87 | "init", "reset", "grep", "help", "file", "pattern", "format", "json", "plain",
88 | ]
89 | });
90 |
91 | pub fn suggest_subcommand(input: &str) -> Option {
92 | let mut closest = None;
93 | let mut smallest_distance = 80; // default maximum line length setting for COBOL
94 | const THRESHOLD: usize = 3;
95 |
96 | for item in &*DICTIONARY {
97 | let distance = levenshtein(input, *item);
98 | match distance {
99 | 0 => return None,
100 | 1..=THRESHOLD if distance < smallest_distance => {
101 | smallest_distance = distance;
102 | closest = Some((*item).to_string());
103 | }
104 | _ => {}
105 | }
106 | }
107 |
108 | closest
109 | }
--------------------------------------------------------------------------------
/tests/analyzer_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod analyzer_test {
3 | mod analyze_test;
4 | }
5 |
--------------------------------------------------------------------------------
/tests/analyzer_test/analyze_test.rs:
--------------------------------------------------------------------------------
1 | use balpan::analyzer::Analyzer;
2 | use balpan::grammar::{build_grammars, fetch_grammars};
3 | use balpan::language::Language;
4 | use indoc::indoc;
5 |
6 | fn assert_analyzed_source_code(source_code: &str, expected: &str, language: &str) {
7 | fetch_grammars().unwrap();
8 | build_grammars(None).unwrap();
9 |
10 | let analyzer = Analyzer {
11 | source_code: source_code.to_string(),
12 | language: Language::from(language),
13 | };
14 |
15 | let writer_queue = &analyzer.analyze();
16 | let mut string_vector = vec![];
17 |
18 | for line in writer_queue {
19 | string_vector.push(String::from(line));
20 | }
21 |
22 | let actual: String = string_vector
23 | // .iter()
24 | // .map( |str| { *str } )
25 | // .collect::>()
26 | .join("\n");
27 |
28 | assert_eq!(expected, actual);
29 | }
30 |
31 | #[test]
32 | fn test_stacked_macros() {
33 | let source_code = indoc! {r#"
34 | #[derive(Deserialize)]
35 | #[serde(bound(deserialize = "T: Deserialize<'de>"))]
36 | struct List {
37 | #[serde(deserialize_with = "deserialize_vec")]
38 | items: Vec,
39 | }"#};
40 |
41 | let result = indoc! {r#"
42 | /// [TODO] List
43 | #[derive(Deserialize)]
44 | #[serde(bound(deserialize = "T: Deserialize<'de>"))]
45 | struct List {
46 | #[serde(deserialize_with = "deserialize_vec")]
47 | items: Vec,
48 | }"#};
49 |
50 | assert_analyzed_source_code(source_code, result, "rust")
51 | }
52 |
53 | #[test]
54 | fn test_idempotency() {
55 | let source_code = indoc! {r#"
56 | /// [TODO] List
57 | #[derive(Deserialize)]
58 | #[serde(bound(deserialize = "T: Deserialize<'de>"))]
59 | struct List {
60 | #[serde(deserialize_with = "deserialize_vec")]
61 | items: Vec,
62 | }"#};
63 |
64 | let result = indoc! {r#"
65 | /// [TODO] List
66 | #[derive(Deserialize)]
67 | #[serde(bound(deserialize = "T: Deserialize<'de>"))]
68 | struct List {
69 | #[serde(deserialize_with = "deserialize_vec")]
70 | items: Vec,
71 | }"#};
72 |
73 | assert_analyzed_source_code(source_code, result, "rust")
74 | }
75 |
76 | #[test]
77 | fn test_idempotency_within_nested_scope() {
78 | let source_code = indoc! {"
79 | # [TODO] Post
80 | class Post(models.Model):
81 | user = models.ForeignKey(User)
82 |
83 | # [TODO] Post > Meta
84 | class Meta:
85 | table_name = 'posts'
86 |
87 | # [TODO] Post > count
88 | @staticmethod
89 | def count(cls):
90 | return cls.count
91 |
92 | # [TODO] Post > author
93 | def author(self):
94 | return self.user
95 |
96 | # [TODO] Comment
97 | class Comment(models.Model):
98 | user = models.ForeignKey(User)
99 |
100 | # [TODO] Comment > Meta
101 | class Meta:
102 | table_name = 'comments'
103 |
104 | # [TODO] Comment > count
105 | @staticmethod
106 | def count(cls):
107 | return cls.count
108 |
109 | # [TODO] Comment > author
110 | def author(self):
111 | return self.user"};
112 |
113 | let result = indoc! {"
114 | # [TODO] Post
115 | class Post(models.Model):
116 | user = models.ForeignKey(User)
117 |
118 | # [TODO] Post > Meta
119 | class Meta:
120 | table_name = 'posts'
121 |
122 | # [TODO] Post > count
123 | @staticmethod
124 | def count(cls):
125 | return cls.count
126 |
127 | # [TODO] Post > author
128 | def author(self):
129 | return self.user
130 |
131 | # [TODO] Comment
132 | class Comment(models.Model):
133 | user = models.ForeignKey(User)
134 |
135 | # [TODO] Comment > Meta
136 | class Meta:
137 | table_name = 'comments'
138 |
139 | # [TODO] Comment > count
140 | @staticmethod
141 | def count(cls):
142 | return cls.count
143 |
144 | # [TODO] Comment > author
145 | def author(self):
146 | return self.user"};
147 |
148 | assert_analyzed_source_code(source_code, result, "python")
149 | }
150 |
151 | #[test]
152 | fn test_ignore_todo_test_macro() {
153 | let source_code = indoc! {"
154 | #[cfg(test)]
155 | mod tests {
156 | use super::*;
157 |
158 | #[test]
159 | fn test_foo() {
160 | assert_eq!(foo(), 1);
161 | }
162 | }"};
163 |
164 | let result = indoc! {"
165 | /// [TODO] tests
166 | #[cfg(test)]
167 | mod tests {
168 | use super::*;
169 |
170 | /// [TODO] tests > test_foo
171 | #[test]
172 | fn test_foo() {
173 | assert_eq!(foo(), 1);
174 | }
175 | }"};
176 |
177 | assert_analyzed_source_code(source_code, result, "rust")
178 | }
179 |
180 | #[test]
181 | fn test_ignore_doc_macro() {
182 | let source_code = indoc! {r#"
183 | #[doc = "This is a doc comment"]
184 | fn foo() {
185 | println!("foo");
186 | }"#};
187 |
188 | let result = indoc! {r#"
189 | /// [TODO] foo
190 | #[doc = "This is a doc comment"]
191 | fn foo() {
192 | println!("foo");
193 | }"#};
194 |
195 | assert_analyzed_source_code(source_code, result, "rust")
196 | }
197 |
198 | #[test]
199 | fn test_trait_and_impl() {
200 | let source_code = indoc! { "
201 | pub trait RangeFactory {
202 | fn from_node(node: Node) -> Range;
203 | }
204 |
205 | impl RangeFactory for Range {
206 | #[inline]
207 | fn from_node(node: Node) -> Range {
208 | Range {
209 | start_byte: node.start_byte(),
210 | end_byte: node.end_byte(),
211 | start_point: node.start_position(),
212 | end_point: node.end_position(),
213 | }
214 | }
215 | }"};
216 |
217 | let result = indoc! { "
218 | /// [TODO] RangeFactory
219 | pub trait RangeFactory {
220 | fn from_node(node: Node) -> Range;
221 | }
222 |
223 | /// [TODO] RangeFactory
224 | impl RangeFactory for Range {
225 | /// [TODO] RangeFactory > from_node
226 | #[inline]
227 | fn from_node(node: Node) -> Range {
228 | Range {
229 | start_byte: node.start_byte(),
230 | end_byte: node.end_byte(),
231 | start_point: node.start_position(),
232 | end_point: node.end_position(),
233 | }
234 | }
235 | }"};
236 |
237 | assert_analyzed_source_code(source_code, result, "rust")
238 | }
239 |
240 | #[test]
241 | fn test_trait_and_impl_with_mod() {
242 | let source_code = indoc! { "
243 | mod tree_sitter_extended {
244 | pub trait RangeFactory {
245 | fn from_node(node: Node) -> Range;
246 | }
247 |
248 | impl RangeFactory for Range {
249 | #[inline]
250 | fn from_node(node: Node) -> Range {
251 | Range {
252 | start_byte: node.start_byte(),
253 | end_byte: node.end_byte(),
254 | start_point: node.start_position(),
255 | end_point: node.end_position(),
256 | }
257 | }
258 | }
259 | }"};
260 |
261 | let result = indoc! { "
262 | /// [TODO] tree_sitter_extended
263 | mod tree_sitter_extended {
264 | /// [TODO] tree_sitter_extended > RangeFactory
265 | pub trait RangeFactory {
266 | fn from_node(node: Node) -> Range;
267 | }
268 |
269 | /// [TODO] tree_sitter_extended > RangeFactory
270 | impl RangeFactory for Range {
271 | /// [TODO] tree_sitter_extended > RangeFactory > from_node
272 | #[inline]
273 | fn from_node(node: Node) -> Range {
274 | Range {
275 | start_byte: node.start_byte(),
276 | end_byte: node.end_byte(),
277 | start_point: node.start_position(),
278 | end_point: node.end_position(),
279 | }
280 | }
281 | }
282 | }"};
283 |
284 | assert_analyzed_source_code(source_code, result, "rust")
285 | }
286 |
--------------------------------------------------------------------------------
/tests/integration_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod integration_test {
3 | use balpan::analyzer::Analyzer;
4 | use balpan::grammar::{build_grammars, fetch_grammars};
5 | use balpan::language::Language;
6 |
7 | mod analyze_command_test;
8 | // mod toggle_command_test;
9 |
10 | pub fn assert_analyzed_source_code(source_code: &str, expected: &str, language: &str) {
11 | fetch_grammars().unwrap();
12 | build_grammars(None).unwrap();
13 |
14 | let analyzer = Analyzer {
15 | source_code: source_code.to_string(),
16 | language: Language::from(language),
17 | };
18 |
19 | let writer_queue = &analyzer.analyze();
20 | let mut string_vector = vec![];
21 |
22 | for line in writer_queue {
23 | string_vector.push(String::from(line));
24 | }
25 |
26 | let actual: String = string_vector.join("\n");
27 |
28 | if actual != expected {
29 | println!("expected: {}\n\n", expected);
30 | println!("actual: {}\n\n", actual);
31 | }
32 |
33 | assert_eq!(expected, actual);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod python_test;
3 |
4 | #[cfg(test)]
5 | mod rust_test;
6 |
7 | #[cfg(test)]
8 | mod ruby_test;
9 |
10 | #[cfg(test)]
11 | mod cpp_test;
12 |
13 | #[cfg(test)]
14 | mod c_test;
15 |
16 | #[cfg(test)]
17 | mod typescript_test;
18 |
19 | #[cfg(test)]
20 | mod javascript_test;
21 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/c_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod neovim_case_test;
3 |
4 | #[cfg(test)]
5 | mod redis_case_test;
6 |
7 | #[cfg(test)]
8 | mod nginx_case_test;
9 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/c_test/neovim_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_function_definition() {
6 | let source_code = indoc! { r#"
7 | static OptVal object_as_optval(Object o, bool *error)
8 | {
9 | switch (o.type) {
10 | case kObjectTypeNil:
11 | return NIL_OPTVAL;
12 | case kObjectTypeBoolean:
13 | return BOOLEAN_OPTVAL(o.data.boolean);
14 | case kObjectTypeInteger:
15 | return NUMBER_OPTVAL(o.data.integer);
16 | case kObjectTypeString:
17 | return STRING_OPTVAL(o.data.string);
18 | default:
19 | *error = true;
20 | return NIL_OPTVAL;
21 | }
22 | }"#};
23 |
24 | let result = indoc! { r#"
25 | /// [TODO] object_as_optval
26 | static OptVal object_as_optval(Object o, bool *error)
27 | {
28 | switch (o.type) {
29 | case kObjectTypeNil:
30 | return NIL_OPTVAL;
31 | case kObjectTypeBoolean:
32 | return BOOLEAN_OPTVAL(o.data.boolean);
33 | case kObjectTypeInteger:
34 | return NUMBER_OPTVAL(o.data.integer);
35 | case kObjectTypeString:
36 | return STRING_OPTVAL(o.data.string);
37 | default:
38 | *error = true;
39 | return NIL_OPTVAL;
40 | }
41 | }"#};
42 |
43 | assert_analyzed_source_code(source_code, result, "cpp");
44 | }
45 |
46 | #[test]
47 | fn test_function_definition_with_conditional_compilation() {
48 | let source_code = indoc! { r#"
49 | int path_is_absolute(const char *fname)
50 | {
51 | #ifdef MSWIN
52 | if (*fname == NUL) {
53 | return false;
54 | }
55 | // A name like "d:/foo" and "//server/share" is absolute
56 | return ((isalpha((uint8_t)fname[0]) && fname[1] == ':' && vim_ispathsep_nocolon(fname[2]))
57 | || (vim_ispathsep_nocolon(fname[0]) && fname[0] == fname[1]));
58 | #else
59 | // UNIX: This just checks if the file name starts with '/' or '~'.
60 | return *fname == '/' || *fname == '~';
61 | #endif
62 | }"#};
63 |
64 | let result = indoc! { r#"
65 | /// [TODO] path_is_absolute
66 | int path_is_absolute(const char *fname)
67 | {
68 | #ifdef MSWIN
69 | if (*fname == NUL) {
70 | return false;
71 | }
72 | // A name like "d:/foo" and "//server/share" is absolute
73 | return ((isalpha((uint8_t)fname[0]) && fname[1] == ':' && vim_ispathsep_nocolon(fname[2]))
74 | || (vim_ispathsep_nocolon(fname[0]) && fname[0] == fname[1]));
75 | #else
76 | // UNIX: This just checks if the file name starts with '/' or '~'.
77 | return *fname == '/' || *fname == '~';
78 | #endif
79 | }"#};
80 |
81 | assert_analyzed_source_code(source_code, result, "cpp");
82 | }
83 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/c_test/nginx_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_function_definition_with_nginx_convention() {
6 | let source_code = indoc! { r#"
7 | static int
8 | ngx_stream_ssl_alpn_select(ngx_ssl_conn_t *ssl_conn, const unsigned char **out,
9 | unsigned char *outlen, const unsigned char *in, unsigned int inlen,
10 | void *arg)
11 | {
12 | ngx_str_t *alpn;
13 | #if (NGX_DEBUG)
14 | unsigned int i;
15 | ngx_connection_t *c;
16 |
17 | c = ngx_ssl_get_connection(ssl_conn);
18 |
19 | for (i = 0; i < inlen; i += in[i] + 1) {
20 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0,
21 | "SSL ALPN supported by client: %*s",
22 | (size_t) in[i], &in[i + 1]);
23 | }
24 |
25 | #endif
26 |
27 | alpn = arg;
28 |
29 | if (SSL_select_next_proto((unsigned char **) out, outlen, alpn->data,
30 | alpn->len, in, inlen)
31 | != OPENSSL_NPN_NEGOTIATED)
32 | {
33 | return SSL_TLSEXT_ERR_ALERT_FATAL;
34 | }
35 |
36 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0,
37 | "SSL ALPN selected: %*s", (size_t) *outlen, *out);
38 |
39 | return SSL_TLSEXT_ERR_OK;
40 | }"#};
41 |
42 | let result = indoc! { r#"
43 | /// [TODO] ngx_stream_ssl_alpn_select
44 | static int
45 | ngx_stream_ssl_alpn_select(ngx_ssl_conn_t *ssl_conn, const unsigned char **out,
46 | unsigned char *outlen, const unsigned char *in, unsigned int inlen,
47 | void *arg)
48 | {
49 | ngx_str_t *alpn;
50 | #if (NGX_DEBUG)
51 | unsigned int i;
52 | ngx_connection_t *c;
53 |
54 | c = ngx_ssl_get_connection(ssl_conn);
55 |
56 | for (i = 0; i < inlen; i += in[i] + 1) {
57 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0,
58 | "SSL ALPN supported by client: %*s",
59 | (size_t) in[i], &in[i + 1]);
60 | }
61 |
62 | #endif
63 |
64 | alpn = arg;
65 |
66 | if (SSL_select_next_proto((unsigned char **) out, outlen, alpn->data,
67 | alpn->len, in, inlen)
68 | != OPENSSL_NPN_NEGOTIATED)
69 | {
70 | return SSL_TLSEXT_ERR_ALERT_FATAL;
71 | }
72 |
73 | ngx_log_debug2(NGX_LOG_DEBUG_STREAM, c->log, 0,
74 | "SSL ALPN selected: %*s", (size_t) *outlen, *out);
75 |
76 | return SSL_TLSEXT_ERR_OK;
77 | }"#};
78 |
79 | assert_analyzed_source_code(source_code, result, "cpp");
80 | }
81 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/c_test/redis_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_declaration_of_function() {
6 | let source_code = indoc! { r#"
7 | list *listCreate(void);
8 | void listRelease(list *list);
9 | void listEmpty(list *list);
10 | list *listAddNodeHead(list *list, void *value);
11 | list *listAddNodeTail(list *list, void *value);
12 | list *listInsertNode(list *list, listNode *old_node, void *value, int after);
13 | void listDelNode(list *list, listNode *node);
14 | listIter *listGetIterator(list *list, int direction);
15 | listNode *listNext(listIter *iter);
16 | void listReleaseIterator(listIter *iter);
17 | list *listDup(list *orig);
18 | listNode *listSearchKey(list *list, void *key);
19 | listNode *listIndex(list *list, long index);
20 | void listRewind(list *list, listIter *li);
21 | void listRewindTail(list *list, listIter *li);
22 | void listRotateTailToHead(list *list);
23 | void listRotateHeadToTail(list *list);
24 | void listJoin(list *l, list *o);
25 | void listInitNode(listNode *node, void *value);
26 | void listLinkNodeHead(list *list, listNode *node);
27 | void listLinkNodeTail(list *list, listNode *node);
28 | void listUnlinkNode(list *list, listNode *node);"#};
29 |
30 | let result = indoc! { r#"
31 | list *listCreate(void);
32 | void listRelease(list *list);
33 | void listEmpty(list *list);
34 | list *listAddNodeHead(list *list, void *value);
35 | list *listAddNodeTail(list *list, void *value);
36 | list *listInsertNode(list *list, listNode *old_node, void *value, int after);
37 | void listDelNode(list *list, listNode *node);
38 | listIter *listGetIterator(list *list, int direction);
39 | listNode *listNext(listIter *iter);
40 | void listReleaseIterator(listIter *iter);
41 | list *listDup(list *orig);
42 | listNode *listSearchKey(list *list, void *key);
43 | listNode *listIndex(list *list, long index);
44 | void listRewind(list *list, listIter *li);
45 | void listRewindTail(list *list, listIter *li);
46 | void listRotateTailToHead(list *list);
47 | void listRotateHeadToTail(list *list);
48 | void listJoin(list *l, list *o);
49 | void listInitNode(listNode *node, void *value);
50 | void listLinkNodeHead(list *list, listNode *node);
51 | void listLinkNodeTail(list *list, listNode *node);
52 | void listUnlinkNode(list *list, listNode *node);"#};
53 |
54 | assert_analyzed_source_code(source_code, result, "cpp");
55 | }
56 |
57 | #[ignore]
58 | fn test_function_definition_together_with_macro_combined() {
59 | let source_code = indoc! {r#"
60 | REDIS_NO_SANITIZE("bounds")
61 | clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) {
62 |
63 | uint32_t channel_len, message_len;
64 |
65 | channel = getDecodedObject(channel);
66 | message = getDecodedObject(message);
67 | channel_len = sdslen(channel->ptr);
68 | message_len = sdslen(message->ptr);
69 |
70 | size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
71 | msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
72 | clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen);
73 |
74 | clusterMsg *hdr = &msgblock->msg;
75 | hdr->data.publish.msg.channel_len = htonl(channel_len);
76 | hdr->data.publish.msg.message_len = htonl(message_len);
77 | memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr));
78 | memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr),
79 | message->ptr,sdslen(message->ptr));
80 |
81 | decrRefCount(channel);
82 | decrRefCount(message);
83 |
84 | return msgblock;
85 | }"#};
86 |
87 | let result = indoc! {r#"
88 | /// [TODO] clusterCreatePublishMsgBlock
89 | REDIS_NO_SANITIZE("bounds")
90 | clusterMsgSendBlock *clusterCreatePublishMsgBlock(robj *channel, robj *message, uint16_t type) {
91 |
92 | uint32_t channel_len, message_len;
93 |
94 | channel = getDecodedObject(channel);
95 | message = getDecodedObject(message);
96 | channel_len = sdslen(channel->ptr);
97 | message_len = sdslen(message->ptr);
98 |
99 | size_t msglen = sizeof(clusterMsg)-sizeof(union clusterMsgData);
100 | msglen += sizeof(clusterMsgDataPublish) - 8 + channel_len + message_len;
101 | clusterMsgSendBlock *msgblock = createClusterMsgSendBlock(type, msglen);
102 |
103 | clusterMsg *hdr = &msgblock->msg;
104 | hdr->data.publish.msg.channel_len = htonl(channel_len);
105 | hdr->data.publish.msg.message_len = htonl(message_len);
106 | memcpy(hdr->data.publish.msg.bulk_data,channel->ptr,sdslen(channel->ptr));
107 | memcpy(hdr->data.publish.msg.bulk_data+sdslen(channel->ptr),
108 | message->ptr,sdslen(message->ptr));
109 |
110 | decrRefCount(channel);
111 | decrRefCount(message);
112 |
113 | return msgblock;
114 | }"#};
115 |
116 | assert_analyzed_source_code(source_code, result, "cpp");
117 | }
118 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/cpp_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod blazingmq_case_test;
3 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/cpp_test/blazingmq_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_function_definition_with_nested_scope() {
6 | let result = indoc! { r#"
7 | /// [TODO] BloombergLP
8 | namespace BloombergLP {
9 | /// [TODO] BloombergLP > bmqimp
10 | namespace bmqimp {
11 | /// [TODO] BloombergLP > bmqimp > anonymous
12 | namespace {
13 | // CONSTANTS
14 | const double k_RECONNECT_INTERVAL_MS = 500;
15 | const int k_RECONNECT_COUNT = bsl::numeric_limits::max();
16 | const bsls::Types::Int64 k_CHANNEL_LOW_WATERMARK = 512 * 1024;
17 |
18 | /// Create the StatContextConfiguration to use, from the specified
19 | /// `options`, and using the specified `allocator` for memory allocations.
20 | /// [TODO] BloombergLP > bmqimp > anonymous > statContextConfiguration
21 | mwcst::StatContextConfiguration
22 | statContextConfiguration(const bmqt::SessionOptions& options,
23 | bslma::Allocator* allocator)
24 | {
25 | mwcst::StatContextConfiguration config("stats", allocator);
26 | if (options.statsDumpInterval() != bsls::TimeInterval()) {
27 | // Stats configuration:
28 | // we snapshot every second
29 | // first level keeps 30s of history
30 | // second level keeps enough for the dump interval
31 | // Because some stats require range computation, second level actually
32 | // has to be of size 1 more than the dump interval
33 | config.defaultHistorySize(
34 | 30,
35 | (options.statsDumpInterval().seconds() / 30) + 1);
36 | }
37 | else {
38 | config.defaultHistorySize(2);
39 | }
40 |
41 | return config;
42 | }
43 | }
44 | }
45 | }"#};
46 |
47 | let source_code = indoc! { r#"
48 | namespace BloombergLP {
49 | namespace bmqimp {
50 | namespace {
51 | // CONSTANTS
52 | const double k_RECONNECT_INTERVAL_MS = 500;
53 | const int k_RECONNECT_COUNT = bsl::numeric_limits::max();
54 | const bsls::Types::Int64 k_CHANNEL_LOW_WATERMARK = 512 * 1024;
55 |
56 | /// Create the StatContextConfiguration to use, from the specified
57 | /// `options`, and using the specified `allocator` for memory allocations.
58 | mwcst::StatContextConfiguration
59 | statContextConfiguration(const bmqt::SessionOptions& options,
60 | bslma::Allocator* allocator)
61 | {
62 | mwcst::StatContextConfiguration config("stats", allocator);
63 | if (options.statsDumpInterval() != bsls::TimeInterval()) {
64 | // Stats configuration:
65 | // we snapshot every second
66 | // first level keeps 30s of history
67 | // second level keeps enough for the dump interval
68 | // Because some stats require range computation, second level actually
69 | // has to be of size 1 more than the dump interval
70 | config.defaultHistorySize(
71 | 30,
72 | (options.statsDumpInterval().seconds() / 30) + 1);
73 | }
74 | else {
75 | config.defaultHistorySize(2);
76 | }
77 |
78 | return config;
79 | }
80 | }
81 | }
82 | }"#};
83 |
84 | assert_analyzed_source_code(source_code, result, "cpp");
85 | }
86 |
87 | #[test]
88 | fn test_class_declaration_with_nested_scope() {
89 | let source_code = indoc! { r#"
90 | namespace m_bmqbrkr {
91 | class Task_AllocatorManager {
92 | private:
93 | mqbcfg::AllocatorType::Value d_type;
94 |
95 | bsls::ObjectBuffer d_store;
96 | private:
97 | Task_AllocatorManager(const Task_AllocatorManager&); // = delete;
98 | public:
99 | explicit Task_AllocatorManager(mqbcfg::AllocatorType::Value type);
100 |
101 | ~Task_AllocatorManager();
102 | };
103 | }"#};
104 |
105 | let result = indoc! { r#"
106 | /// [TODO] m_bmqbrkr
107 | namespace m_bmqbrkr {
108 | /// [TODO] m_bmqbrkr > Task_AllocatorManager
109 | class Task_AllocatorManager {
110 | private:
111 | mqbcfg::AllocatorType::Value d_type;
112 |
113 | bsls::ObjectBuffer d_store;
114 | private:
115 | Task_AllocatorManager(const Task_AllocatorManager&); // = delete;
116 | public:
117 | explicit Task_AllocatorManager(mqbcfg::AllocatorType::Value type);
118 |
119 | ~Task_AllocatorManager();
120 | };
121 | }"#};
122 |
123 | assert_analyzed_source_code(source_code, result, "cpp");
124 | }
125 |
126 | #[ignore]
127 | fn test_templated_function_definition() {
128 | let source_code = indoc! { r#"
129 | template
130 | bool parseCommand(CMD* command, const bsl::string& jsonInput)
131 | {
132 | bsl::istringstream is(jsonInput);
133 | baljsn::DecoderOptions options;
134 | options.setSkipUnknownElements(true);
135 | baljsn::Decoder decoder;
136 | int rc = decoder.decode(is, command, options);
137 | if (rc != 0) {
138 | BALL_LOG_ERROR << "Unable to decode: " << jsonInput << bsl::endl
139 | << decoder.loggedMessages();
140 | return false; // RETURN
141 | }
142 |
143 | return true;
144 | }
145 |
146 | template
147 | inline bool Value::is() const
148 | {
149 | return d_value.is();
150 | }
151 |
152 | template
153 | inline const TYPE& Value::the() const
154 | {
155 | return d_value.the();
156 | }
157 |
158 | template
159 | inline typename VISITOR::ResultType Value::apply(const VISITOR& visitor) const
160 | {
161 | return d_value.apply(visitor);
162 | }
163 | "#};
164 |
165 | let result = indoc! { r#"
166 | /// [TODO] parseCommand
167 | template
168 | bool parseCommand(CMD* command, const bsl::string& jsonInput)
169 | {
170 | bsl::istringstream is(jsonInput);
171 | baljsn::DecoderOptions options;
172 | options.setSkipUnknownElements(true);
173 | baljsn::Decoder decoder;
174 | int rc = decoder.decode(is, command, options);
175 | if (rc != 0) {
176 | BALL_LOG_ERROR << "Unable to decode: " << jsonInput << bsl::endl
177 | << decoder.loggedMessages();
178 | return false; // RETURN
179 | }
180 |
181 | return true;
182 | }
183 |
184 | /// [TODO] Value::is
185 | template
186 | inline bool Value::is() const
187 | {
188 | return d_value.is();
189 | }
190 |
191 | /// [TODO] Value::the
192 | template
193 | inline const TYPE& Value::the() const
194 | {
195 | return d_value.the();
196 | }
197 |
198 | /// [TODO] Value::apply
199 | template
200 | inline typename VISITOR::ResultType Value::apply(const VISITOR& visitor) const
201 | {
202 | return d_value.apply(visitor);
203 | }"#};
204 |
205 | assert_analyzed_source_code(source_code, result, "cpp");
206 | }
207 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/javascript_test.rs:
--------------------------------------------------------------------------------
1 | mod react_native_case_test;
2 | #[cfg(test)]
3 | mod svelt_cast_test;
4 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/javascript_test/react_native_case_test.rs:
--------------------------------------------------------------------------------
1 | use indoc::indoc;
2 |
3 | use crate::integration_test::assert_analyzed_source_code;
4 |
5 | #[test]
6 | fn test_function_declaration() {
7 | let source_code = indoc! {r#"
8 | function getPackageName(file /*: string */) /*: string */ {
9 | return path.relative(PACKAGES_DIR, file).split(path.sep)[0];
10 | }
11 |
12 | function getBuildPath(file /*: string */) /*: string */ {
13 | const packageDir = path.join(PACKAGES_DIR, getPackageName(file));
14 |
15 | return path.join(
16 | packageDir,
17 | file.replace(path.join(packageDir, SRC_DIR), BUILD_DIR),
18 | );
19 | }
20 |
21 | async function rewritePackageExports(packageName /*: string */) {
22 | const packageJsonPath = path.join(PACKAGES_DIR, packageName, 'package.json');
23 | const pkg = JSON.parse(await fs.readFile(packageJsonPath, 'utf8'));
24 |
25 | await fs.writeFile(
26 | packageJsonPath,
27 | prettier.format(JSON.stringify(pkg), {parser: 'json'}),
28 | );
29 | }"#};
30 |
31 | let expected = indoc! {r#"
32 | // [TODO] getPackageName
33 | function getPackageName(file /*: string */) /*: string */ {
34 | return path.relative(PACKAGES_DIR, file).split(path.sep)[0];
35 | }
36 |
37 | // [TODO] getBuildPath
38 | function getBuildPath(file /*: string */) /*: string */ {
39 | const packageDir = path.join(PACKAGES_DIR, getPackageName(file));
40 |
41 | return path.join(
42 | packageDir,
43 | file.replace(path.join(packageDir, SRC_DIR), BUILD_DIR),
44 | );
45 | }
46 |
47 | // [TODO] rewritePackageExports
48 | async function rewritePackageExports(packageName /*: string */) {
49 | const packageJsonPath = path.join(PACKAGES_DIR, packageName, 'package.json');
50 | const pkg = JSON.parse(await fs.readFile(packageJsonPath, 'utf8'));
51 |
52 | await fs.writeFile(
53 | packageJsonPath,
54 | prettier.format(JSON.stringify(pkg), {parser: 'json'}),
55 | );
56 | }"#};
57 |
58 | assert_analyzed_source_code(source_code, expected, "javascript")
59 | }
60 |
61 | #[test]
62 | fn test_class() {
63 | let source_code = indoc! {r#"
64 | export class KeyPressHandler {
65 | _isInterceptingKeyStrokes = false;
66 | _isHandlingKeyPress = false;
67 | _onPress: (key: string) => Promise;
68 |
69 | constructor(onPress: (key: string) => Promise) {
70 | this._onPress = onPress;
71 | }
72 |
73 | /** Start intercepting all key strokes and passing them to the input `onPress` method. */
74 | startInterceptingKeyStrokes() {
75 | if (this._isInterceptingKeyStrokes) {
76 | return;
77 | }
78 | this._isInterceptingKeyStrokes = true;
79 | const {stdin} = process;
80 | // $FlowFixMe[prop-missing]
81 | stdin.setRawMode(true);
82 | stdin.resume();
83 | stdin.setEncoding('utf8');
84 | stdin.on('data', this._handleKeypress);
85 | }
86 |
87 | /** Stop intercepting all key strokes. */
88 | stopInterceptingKeyStrokes() {
89 | if (!this._isInterceptingKeyStrokes) {
90 | return;
91 | }
92 | this._isInterceptingKeyStrokes = false;
93 | const {stdin} = process;
94 | stdin.removeListener('data', this._handleKeypress);
95 | // $FlowFixMe[prop-missing]
96 | stdin.setRawMode(false);
97 | stdin.resume();
98 | }
99 | }"#};
100 |
101 | let expected = indoc! {r#"
102 | // [TODO] KeyPressHandler
103 | export class KeyPressHandler {
104 | _isInterceptingKeyStrokes = false;
105 | _isHandlingKeyPress = false;
106 | _onPress: (key: string) => Promise;
107 |
108 | constructor(onPress: (key: string) => Promise) {
109 | this._onPress = onPress;
110 | }
111 |
112 | /** Start intercepting all key strokes and passing them to the input `onPress` method. */
113 | startInterceptingKeyStrokes() {
114 | if (this._isInterceptingKeyStrokes) {
115 | return;
116 | }
117 | this._isInterceptingKeyStrokes = true;
118 | const {stdin} = process;
119 | // $FlowFixMe[prop-missing]
120 | stdin.setRawMode(true);
121 | stdin.resume();
122 | stdin.setEncoding('utf8');
123 | stdin.on('data', this._handleKeypress);
124 | }
125 |
126 | /** Stop intercepting all key strokes. */
127 | stopInterceptingKeyStrokes() {
128 | if (!this._isInterceptingKeyStrokes) {
129 | return;
130 | }
131 | this._isInterceptingKeyStrokes = false;
132 | const {stdin} = process;
133 | stdin.removeListener('data', this._handleKeypress);
134 | // $FlowFixMe[prop-missing]
135 | stdin.setRawMode(false);
136 | stdin.resume();
137 | }
138 | }"#};
139 |
140 | assert_analyzed_source_code(source_code, expected, "javascript")
141 | }
142 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/javascript_test/svelt_cast_test.rs:
--------------------------------------------------------------------------------
1 | use indoc::indoc;
2 |
3 | use crate::integration_test::assert_analyzed_source_code;
4 |
5 | #[test]
6 | #[ignore = "TODO: arrow function"]
7 | fn test_arrow_function() {
8 | let source_code = indoc! {r#"
9 | export const parse = (source) =>
10 | code_red.parse(source, {
11 | sourceType: 'module',
12 | ecmaVersion: 13,
13 | locations: true
14 | });
15 |
16 | /**
17 | * @param {string} source
18 | * @param {number} index
19 | */
20 | export const parse_expression_at = (source, index) =>
21 | code_red.parseExpressionAt(source, index, {
22 | sourceType: 'module',
23 | ecmaVersion: 13,
24 | locations: true
25 | });"#};
26 |
27 | let expected = indoc! {r#"
28 | // [TODO] parse
29 | export const parse = (source) =>
30 | code_red.parse(source, {
31 | sourceType: 'module',
32 | ecmaVersion: 13,
33 | locations: true
34 | });
35 |
36 | /**
37 | * @param {string} source
38 | * @param {number} index
39 | */
40 | // [TODO] parse_expression_at
41 | export const parse_expression_at = (source, index) =>
42 | code_red.parseExpressionAt(source, index, {
43 | sourceType: 'module',
44 | ecmaVersion: 13,
45 | locations: true
46 | });"#};
47 |
48 | assert_analyzed_source_code(source_code, expected, "javascript")
49 | }
50 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/python_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod rustpython_case_test;
3 |
4 | #[cfg(test)]
5 | mod python_dependency_injector_case_test;
6 |
7 | #[cfg(test)]
8 | mod django_case_test;
9 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/python_test/django_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_class_definition_within_class() {
6 | let source_code = indoc! {r#"
7 | class Car(models.Model):
8 | name = models.CharField(max_length=20)
9 | default_parts = models.ManyToManyField(Part)
10 | optional_parts = models.ManyToManyField(Part, related_name="cars_optional")
11 |
12 | class Meta:
13 | ordering = ("name",)"#};
14 |
15 | let result = indoc! {r#"
16 | # [TODO] Car
17 | class Car(models.Model):
18 | name = models.CharField(max_length=20)
19 | default_parts = models.ManyToManyField(Part)
20 | optional_parts = models.ManyToManyField(Part, related_name="cars_optional")
21 |
22 | # [TODO] Car > Meta
23 | class Meta:
24 | ordering = ("name",)"#};
25 |
26 | assert_analyzed_source_code(source_code, result, "python")
27 | }
28 |
29 | #[test]
30 | fn test_decorated_definitions_within_class_definition() {
31 | let source_code = indoc! {r#"
32 | class Choices(enum.Enum, metaclass=ChoicesMeta):
33 | """Class for creating enumerated choices."""
34 |
35 | @DynamicClassAttribute
36 | def label(self):
37 | return self._label_
38 |
39 | @property
40 | def do_not_call_in_templates(self):
41 | return True"#};
42 |
43 | let result = indoc! {r#"
44 | # [TODO] Choices
45 | class Choices(enum.Enum, metaclass=ChoicesMeta):
46 | """Class for creating enumerated choices."""
47 |
48 | # [TODO] Choices > label
49 | @DynamicClassAttribute
50 | def label(self):
51 | return self._label_
52 |
53 | # [TODO] Choices > do_not_call_in_templates
54 | @property
55 | def do_not_call_in_templates(self):
56 | return True"#};
57 |
58 | assert_analyzed_source_code(source_code, result, "python")
59 | }
60 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/python_test/python_dependency_injector_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_decorated_definition() {
6 | let source_code = indoc! {r#"
7 | @app.route("/")
8 | @inject
9 | def index(service: Service = Provide[Container.service]):
10 | result = service.process()
11 | return jsonify({"result": result})"#};
12 |
13 | let result = indoc! {r#"
14 | # [TODO] index
15 | @app.route("/")
16 | @inject
17 | def index(service: Service = Provide[Container.service]):
18 | result = service.process()
19 | return jsonify({"result": result})"#};
20 |
21 | assert_analyzed_source_code(source_code, result, "python")
22 | }
23 |
24 | #[test]
25 | fn test_decorated_async_function_definition() {
26 | let source_code = indoc! {r#"
27 | @inject
28 | async def async_injection(
29 | resource1: object = Provide["resource1"],
30 | resource2: object = Provide["resource2"],
31 | ):
32 | return resource1, resource2
33 |
34 | @inject
35 | async def async_injection_with_closing(
36 | resource1: object = Closing[Provide["resource1"]],
37 | resource2: object = Closing[Provide["resource2"]],
38 | ):
39 | return resource1, resource2"#};
40 |
41 | let result = indoc! {r#"
42 | # [TODO] async_injection
43 | @inject
44 | async def async_injection(
45 | resource1: object = Provide["resource1"],
46 | resource2: object = Provide["resource2"],
47 | ):
48 | return resource1, resource2
49 |
50 | # [TODO] async_injection_with_closing
51 | @inject
52 | async def async_injection_with_closing(
53 | resource1: object = Closing[Provide["resource1"]],
54 | resource2: object = Closing[Provide["resource2"]],
55 | ):
56 | return resource1, resource2"#};
57 |
58 | assert_analyzed_source_code(source_code, result, "python")
59 | }
60 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/python_test/rustpython_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | /// Test stdlib
5 | ///
6 | #[test]
7 | fn test_class_definition() {
8 | let source_code = indoc! {r#"
9 | class FeedParser:
10 | """A feed-style parser of email."""
11 |
12 | def __init__(self, _factory=None, *, policy=compat32):
13 | """_factory is called with no arguments to create a new message obj
14 |
15 | The policy keyword specifies a policy object that controls a number of
16 | aspects of the parser's operation. The default policy maintains
17 | backward compatibility.
18 |
19 | """
20 | self.policy = policy
21 | self._old_style_factory = False
22 | if _factory is None:
23 | if policy.message_factory is None:
24 | from email.message import Message
25 | self._factory = Message
26 | else:
27 | self._factory = policy.message_factory
28 | else:
29 | self._factory = _factory
30 | try:
31 | _factory(policy=self.policy)
32 | except TypeError:
33 | # Assume this is an old-style factory
34 | self._old_style_factory = True
35 | self._input = BufferedSubFile()
36 | self._msgstack = []
37 | self._parse = self._parsegen().__next__
38 | self._cur = None
39 | self._last = None
40 | self._headersonly = False
41 |
42 | # Non-public interface for supporting Parser's headersonly flag
43 | def _set_headersonly(self):
44 | self._headersonly = True
45 |
46 | def feed(self, data):
47 | """Push more data into the parser."""
48 | self._input.push(data)
49 | self._call_parse()
50 |
51 | def _call_parse(self):
52 | try:
53 | self._parse()
54 | except StopIteration:
55 | pass"#};
56 |
57 | let result = indoc! {r#"
58 | # [TODO] FeedParser
59 | class FeedParser:
60 | """A feed-style parser of email."""
61 |
62 | # [TODO] FeedParser > __init__
63 | def __init__(self, _factory=None, *, policy=compat32):
64 | """_factory is called with no arguments to create a new message obj
65 |
66 | The policy keyword specifies a policy object that controls a number of
67 | aspects of the parser's operation. The default policy maintains
68 | backward compatibility.
69 |
70 | """
71 | self.policy = policy
72 | self._old_style_factory = False
73 | if _factory is None:
74 | if policy.message_factory is None:
75 | from email.message import Message
76 | self._factory = Message
77 | else:
78 | self._factory = policy.message_factory
79 | else:
80 | self._factory = _factory
81 | try:
82 | _factory(policy=self.policy)
83 | except TypeError:
84 | # Assume this is an old-style factory
85 | self._old_style_factory = True
86 | self._input = BufferedSubFile()
87 | self._msgstack = []
88 | self._parse = self._parsegen().__next__
89 | self._cur = None
90 | self._last = None
91 | self._headersonly = False
92 |
93 | # Non-public interface for supporting Parser's headersonly flag
94 | # [TODO] FeedParser > _set_headersonly
95 | def _set_headersonly(self):
96 | self._headersonly = True
97 |
98 | # [TODO] FeedParser > feed
99 | def feed(self, data):
100 | """Push more data into the parser."""
101 | self._input.push(data)
102 | self._call_parse()
103 |
104 | # [TODO] FeedParser > _call_parse
105 | def _call_parse(self):
106 | try:
107 | self._parse()
108 | except StopIteration:
109 | pass"#};
110 |
111 | assert_analyzed_source_code(source_code, result, "python")
112 | }
113 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/ruby_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod mastodon_case_test;
3 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/ruby_test/mastodon_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_class_declaration_with_nested_scope() {
6 | let source_code = indoc! { r#"
7 | module Chewy
8 | class Strategy
9 | class Mastodon < Base
10 | def initialize
11 | super
12 |
13 | @stash = Hash.new { |hash, key| hash[key] = [] }
14 | end
15 |
16 | def update(type, objects, _options = {})
17 | @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
18 | end
19 |
20 | def leave
21 | RedisConfiguration.with do |redis|
22 | redis.pipelined do |pipeline|
23 | @stash.each do |type, ids|
24 | pipeline.sadd("chewy:queue:#{type.name}", ids)
25 | end
26 | end
27 | end
28 | end
29 | end
30 | end
31 | end"#};
32 |
33 | let result = indoc! { r#"
34 | # [TODO] Chewy
35 | module Chewy
36 | # [TODO] Chewy > Strategy
37 | class Strategy
38 | # [TODO] Chewy > Strategy > Mastodon
39 | class Mastodon < Base
40 | # [TODO] Chewy > Strategy > Mastodon > initialize
41 | def initialize
42 | super
43 |
44 | @stash = Hash.new { |hash, key| hash[key] = [] }
45 | end
46 |
47 | # [TODO] Chewy > Strategy > Mastodon > update
48 | def update(type, objects, _options = {})
49 | @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
50 | end
51 |
52 | # [TODO] Chewy > Strategy > Mastodon > leave
53 | def leave
54 | RedisConfiguration.with do |redis|
55 | redis.pipelined do |pipeline|
56 | @stash.each do |type, ids|
57 | pipeline.sadd("chewy:queue:#{type.name}", ids)
58 | end
59 | end
60 | end
61 | end
62 | end
63 | end
64 | end"#};
65 |
66 | assert_analyzed_source_code(source_code, result, "ruby");
67 | }
68 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/rust_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod anyhow_case_test;
3 |
4 | #[cfg(test)]
5 | mod rustpython_case_test;
6 |
7 | #[cfg(test)]
8 | mod serde_case_test;
9 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/rust_test/anyhow_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_declaring_error_enum_with_macro() {
6 | let source_code = indoc! {r#"
7 | use thiserror::Error;
8 |
9 | #[derive(Error, Debug)]
10 | pub enum FormatError {
11 | #[error("Invalid header (expected {expected:?}, got {found:?})")]
12 | InvalidHeader {
13 | expected: String,
14 | found: String,
15 | },
16 | #[error("Missing attribute: {0}")]
17 | MissingAttribute(String),
18 | }"#};
19 |
20 | let result = indoc! {r#"
21 | use thiserror::Error;
22 |
23 | /// [TODO] FormatError
24 | #[derive(Error, Debug)]
25 | pub enum FormatError {
26 | #[error("Invalid header (expected {expected:?}, got {found:?})")]
27 | InvalidHeader {
28 | expected: String,
29 | found: String,
30 | },
31 | #[error("Missing attribute: {0}")]
32 | MissingAttribute(String),
33 | }"#};
34 |
35 | assert_analyzed_source_code(source_code, result, "rust")
36 | }
37 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/rust_test/rustpython_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/jit/src/lib.rs#L10-L28
5 | #[test]
6 | fn test_declaring_enum_with_stacked_attribute() {
7 | let source_code = indoc! {r#"
8 | #[derive(Debug, thiserror::Error)]
9 | #[non_exhaustive]
10 | pub enum JitCompileError {
11 | #[error("function can't be jitted")]
12 | NotSupported,
13 | #[error("bad bytecode")]
14 | BadBytecode,
15 | #[error("error while compiling to machine code: {0}")]
16 | CraneliftError(#[from] ModuleError),
17 | }
18 |
19 | #[derive(Debug, thiserror::Error, Eq, PartialEq)]
20 | #[non_exhaustive]
21 | pub enum JitArgumentError {
22 | #[error("argument is of wrong type")]
23 | ArgumentTypeMismatch,
24 | #[error("wrong number of arguments")]
25 | WrongNumberOfArguments,
26 | }"#};
27 |
28 | let result = indoc! {r#"
29 | /// [TODO] JitCompileError
30 | #[derive(Debug, thiserror::Error)]
31 | #[non_exhaustive]
32 | pub enum JitCompileError {
33 | #[error("function can't be jitted")]
34 | NotSupported,
35 | #[error("bad bytecode")]
36 | BadBytecode,
37 | #[error("error while compiling to machine code: {0}")]
38 | CraneliftError(#[from] ModuleError),
39 | }
40 |
41 | /// [TODO] JitArgumentError
42 | #[derive(Debug, thiserror::Error, Eq, PartialEq)]
43 | #[non_exhaustive]
44 | pub enum JitArgumentError {
45 | #[error("argument is of wrong type")]
46 | ArgumentTypeMismatch,
47 | #[error("wrong number of arguments")]
48 | WrongNumberOfArguments,
49 | }"#};
50 |
51 | assert_analyzed_source_code(source_code, result, "rust")
52 | }
53 |
54 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/vm/src/compiler.rs#L5C1-L6
55 | #[test]
56 | fn test_macro_above_use_declaration_should_be_ignored() {
57 | let source_code = indoc! { r#"
58 | #[cfg(feature = "rustpython-compiler")]
59 | use rustpython_compiler::*;"#};
60 |
61 | let result = indoc! { r#"
62 | #[cfg(feature = "rustpython-compiler")]
63 | use rustpython_compiler::*;"#};
64 |
65 | assert_analyzed_source_code(source_code, result, "rust")
66 | }
67 |
68 | /// https://github.com/RustPython/RustPython/blob/bdb0c8f64557e0822f0bcfd63defbad54625c17a/wasm/lib/src/js_module.rs#L24-L55
69 | #[test]
70 | fn test_macro_above_extern_c_module() {
71 | let source_code = indoc! { r#"
72 | #[wasm_bindgen(inline_js = "
73 | export function has_prop(target, prop) { return prop in Object(target); }
74 | export function get_prop(target, prop) { return target[prop]; }
75 | export function set_prop(target, prop, value) { target[prop] = value; }
76 | export function type_of(a) { return typeof a; }
77 | export function instance_of(lhs, rhs) { return lhs instanceof rhs; }
78 | ")]
79 | extern "C" {
80 | #[wasm_bindgen(catch)]
81 | fn has_prop(target: &JsValue, prop: &JsValue) -> Result;
82 | #[wasm_bindgen(catch)]
83 | fn get_prop(target: &JsValue, prop: &JsValue) -> Result;
84 | #[wasm_bindgen(catch)]
85 | fn set_prop(target: &JsValue, prop: &JsValue, value: &JsValue) -> Result<(), JsValue>;
86 | #[wasm_bindgen]
87 | fn type_of(a: &JsValue) -> String;
88 | #[wasm_bindgen(catch)]
89 | fn instance_of(lhs: &JsValue, rhs: &JsValue) -> Result;
90 | }"#};
91 |
92 | let result = indoc! { r#"
93 | #[wasm_bindgen(inline_js = "
94 | export function has_prop(target, prop) { return prop in Object(target); }
95 | export function get_prop(target, prop) { return target[prop]; }
96 | export function set_prop(target, prop, value) { target[prop] = value; }
97 | export function type_of(a) { return typeof a; }
98 | export function instance_of(lhs, rhs) { return lhs instanceof rhs; }
99 | ")]
100 | extern "C" {
101 | #[wasm_bindgen(catch)]
102 | fn has_prop(target: &JsValue, prop: &JsValue) -> Result;
103 | #[wasm_bindgen(catch)]
104 | fn get_prop(target: &JsValue, prop: &JsValue) -> Result;
105 | #[wasm_bindgen(catch)]
106 | fn set_prop(target: &JsValue, prop: &JsValue, value: &JsValue) -> Result<(), JsValue>;
107 | #[wasm_bindgen]
108 | fn type_of(a: &JsValue) -> String;
109 | #[wasm_bindgen(catch)]
110 | fn instance_of(lhs: &JsValue, rhs: &JsValue) -> Result;
111 | }"#};
112 |
113 | assert_analyzed_source_code(source_code, result, "rust")
114 | }
115 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/rust_test/serde_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_several_impl_declaration() {
6 | let source_code = indoc! {"
7 | impl PartialEq for Ident {
8 | fn eq(&self, word: &Symbol) -> bool {
9 | self == word.0
10 | }
11 | }
12 |
13 | impl<'a> PartialEq for &'a Ident {
14 | fn eq(&self, word: &Symbol) -> bool {
15 | *self == word.0
16 | }
17 | }
18 |
19 | impl PartialEq for Path {
20 | fn eq(&self, word: &Symbol) -> bool {
21 | self.is_ident(word.0)
22 | }
23 | }"};
24 |
25 | let result = indoc! {"
26 | /// [TODO] PartialEq
27 | impl PartialEq for Ident {
28 | /// [TODO] PartialEq > eq
29 | fn eq(&self, word: &Symbol) -> bool {
30 | self == word.0
31 | }
32 | }
33 |
34 | /// [TODO] PartialEq
35 | impl<'a> PartialEq for &'a Ident {
36 | /// [TODO] PartialEq > eq
37 | fn eq(&self, word: &Symbol) -> bool {
38 | *self == word.0
39 | }
40 | }
41 |
42 | /// [TODO] PartialEq
43 | impl PartialEq for Path {
44 | /// [TODO] PartialEq > eq
45 | fn eq(&self, word: &Symbol) -> bool {
46 | self.is_ident(word.0)
47 | }
48 | }"};
49 |
50 | assert_analyzed_source_code(source_code, result, "rust")
51 | }
52 |
53 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/lib.rs#L155-L156
54 | #[test]
55 | fn test_macro_above_extern_crate_declaration_should_be_ignored() {
56 | let source_code = indoc! { r#"
57 | #[cfg(feature = "alloc")]
58 | extern crate alloc;"#};
59 |
60 | let result = indoc! { r#"
61 | #[cfg(feature = "alloc")]
62 | extern crate alloc;"#};
63 |
64 | assert_analyzed_source_code(source_code, result, "rust");
65 | }
66 |
67 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/precompiled/bin/main.rs#L11-L12
68 | #[test]
69 | fn test_macro_above_static_variable_should_be_ignored() {
70 | let source_code = indoc! {r#"
71 | #[global_allocator]
72 | static ALLOCATOR: MonotonicAllocator = MonotonicAllocator;"#};
73 |
74 | let result = indoc! {r#"
75 | #[global_allocator]
76 | static ALLOCATOR: MonotonicAllocator = MonotonicAllocator;"#};
77 |
78 | assert_analyzed_source_code(source_code, result, "rust")
79 | }
80 |
81 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/de/impls.rs#L1783-L1793
82 | #[test]
83 | fn test_macro_above_macro_invocation_should_be_ignored() {
84 | let source_code = indoc! { r#"
85 | #[cfg(any(feature = "std", feature = "alloc"))]
86 | forwarded_impl!((T), Box, Box::new);
87 |
88 | #[cfg(any(feature = "std", feature = "alloc"))]
89 | forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice);
90 |
91 | #[cfg(any(feature = "std", feature = "alloc"))]
92 | forwarded_impl!((), Box, String::into_boxed_str);
93 |
94 | #[cfg(all(feature = "std", any(unix, windows)))]
95 | forwarded_impl!((), Box, OsString::into_boxed_os_str);"#};
96 |
97 | let result = indoc! { r#"
98 | #[cfg(any(feature = "std", feature = "alloc"))]
99 | forwarded_impl!((T), Box, Box::new);
100 |
101 | #[cfg(any(feature = "std", feature = "alloc"))]
102 | forwarded_impl!((T), Box<[T]>, Vec::into_boxed_slice);
103 |
104 | #[cfg(any(feature = "std", feature = "alloc"))]
105 | forwarded_impl!((), Box, String::into_boxed_str);
106 |
107 | #[cfg(all(feature = "std", any(unix, windows)))]
108 | forwarded_impl!((), Box, OsString::into_boxed_os_str);"#};
109 |
110 | assert_analyzed_source_code(source_code, result, "rust")
111 | }
112 |
113 | /// https://github.com/serde-rs/serde/blob/7b548db91ed7da81a5c0ddbd6f6f21238aacfebe/serde/src/de/mod.rs#L119-L126
114 | #[test]
115 | fn test_ignore_mod_items_in_a_row() {
116 | let source_code = indoc! { r#"
117 | pub mod value;
118 |
119 | #[cfg(not(no_integer128))]
120 | mod format;
121 | mod ignored_any;
122 | mod impls;
123 | pub(crate) mod size_hint;
124 | mod utf8;"#};
125 |
126 | let result = indoc! { r#"
127 | pub mod value;
128 |
129 | #[cfg(not(no_integer128))]
130 | mod format;
131 | mod ignored_any;
132 | mod impls;
133 | pub(crate) mod size_hint;
134 | mod utf8;"#};
135 |
136 | assert_analyzed_source_code(source_code, result, "rust");
137 | }
138 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/typescript_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod typescript_case_test;
3 |
4 | #[cfg(test)]
5 | mod angular_case_test;
6 |
7 | #[cfg(test)]
8 | mod async_case_test;
9 |
10 | #[cfg(test)]
11 | mod svelt_case_test;
12 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/typescript_test/angular_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_angular_code() {
6 | let source_code = indoc! {r#"
7 | export const enum JitCompilerUsage {
8 | Decorator,
9 | PartialDeclaration,
10 | }
11 |
12 | export interface JitCompilerUsageRequest {
13 | usage: JitCompilerUsage;
14 | kind: 'directive'|'component'|'pipe'|'injectable'|'NgModule';
15 | type: Type;
16 | }
17 |
18 | export function getCompilerFacade(request: JitCompilerUsageRequest): CompilerFacade {
19 | const globalNg: ExportedCompilerFacade = global['ng'];
20 | if (globalNg && globalNg.ɵcompilerFacade) {
21 | return globalNg.ɵcompilerFacade;
22 | }
23 |
24 | if (typeof ngDevMode === 'undefined' || ngDevMode) {
25 | console.error(`JIT compilation failed for ${request.kind}`, request.type);
26 |
27 | let message = `The ${request.kind} '${
28 | request
29 | .type.name}' needs to be compiled using the JIT compiler, but '@angular/compiler' is not available.\n\n`;
30 | if (request.usage === JitCompilerUsage.PartialDeclaration) {
31 | message += `The ${request.kind} is part of a library that has been partially compiled.\n`;
32 | message +=
33 | `However, the Angular Linker has not processed the library such that JIT compilation is used as fallback.\n`;
34 | message += '\n';
35 | message +=
36 | `Ideally, the library is processed using the Angular Linker to become fully AOT compiled.\n`;
37 | } else {
38 | message +=
39 | `JIT compilation is discouraged for production use-cases! Consider using AOT mode instead.\n`;
40 | }
41 | message +=
42 | `Alternatively, the JIT compiler should be loaded by bootstrapping using '@angular/platform-browser-dynamic' or '@angular/platform-server',\n`;
43 | message +=
44 | `or manually provide the compiler with 'import "@angular/compiler";' before bootstrapping.`;
45 | throw new Error(message);
46 | } else {
47 | throw new Error('JIT compiler unavailable');
48 | }
49 | }"#};
50 |
51 | let expected = indoc! {r#"
52 | // [TODO] JitCompilerUsage
53 | export const enum JitCompilerUsage {
54 | Decorator,
55 | PartialDeclaration,
56 | }
57 |
58 | // [TODO] JitCompilerUsageRequest
59 | export interface JitCompilerUsageRequest {
60 | usage: JitCompilerUsage;
61 | kind: 'directive'|'component'|'pipe'|'injectable'|'NgModule';
62 | type: Type;
63 | }
64 |
65 | // [TODO] getCompilerFacade
66 | export function getCompilerFacade(request: JitCompilerUsageRequest): CompilerFacade {
67 | const globalNg: ExportedCompilerFacade = global['ng'];
68 | if (globalNg && globalNg.ɵcompilerFacade) {
69 | return globalNg.ɵcompilerFacade;
70 | }
71 |
72 | if (typeof ngDevMode === 'undefined' || ngDevMode) {
73 | console.error(`JIT compilation failed for ${request.kind}`, request.type);
74 |
75 | let message = `The ${request.kind} '${
76 | request
77 | .type.name}' needs to be compiled using the JIT compiler, but '@angular/compiler' is not available.\n\n`;
78 | if (request.usage === JitCompilerUsage.PartialDeclaration) {
79 | message += `The ${request.kind} is part of a library that has been partially compiled.\n`;
80 | message +=
81 | `However, the Angular Linker has not processed the library such that JIT compilation is used as fallback.\n`;
82 | message += '\n';
83 | message +=
84 | `Ideally, the library is processed using the Angular Linker to become fully AOT compiled.\n`;
85 | } else {
86 | message +=
87 | `JIT compilation is discouraged for production use-cases! Consider using AOT mode instead.\n`;
88 | }
89 | message +=
90 | `Alternatively, the JIT compiler should be loaded by bootstrapping using '@angular/platform-browser-dynamic' or '@angular/platform-server',\n`;
91 | message +=
92 | `or manually provide the compiler with 'import "@angular/compiler";' before bootstrapping.`;
93 | throw new Error(message);
94 | } else {
95 | throw new Error('JIT compiler unavailable');
96 | }
97 | }"#};
98 |
99 | assert_analyzed_source_code(source_code, expected, "typescript")
100 | }
101 |
102 | #[test]
103 | #[ignore = "Should not be add TODO comment to above the `export ... from` statement"]
104 | fn test_angular_import_statement() {
105 | let source_code = indoc! {r#"
106 | import {global} from '../util/global';
107 | import {CompilerFacade, ExportedCompilerFacade, Type} from './compiler_facade_interface';
108 | export * from './compiler_facade_interface';
109 | "#};
110 |
111 | let _should_fix_output = indoc! {r#"
112 | import {global} from '../util/global';
113 | import {CompilerFacade, ExportedCompilerFacade, Type} from './compiler_facade_interface';
114 | // [TODO] anonymous
115 | export * from './compiler_facade_interface';"#};
116 |
117 | assert_analyzed_source_code(source_code, source_code, "typescript")
118 | }
119 |
120 | #[test]
121 | fn test_abstract_class_statement() {
122 | let source_code = indoc! {r#"
123 | export abstract class RendererFactory2 {
124 | abstract createRenderer(hostElement: any, type: RendererType2|null): Renderer2;
125 | abstract begin?(): void;
126 | abstract end?(): void;
127 | abstract whenRenderingDone?(): Promise;
128 | }"#};
129 |
130 | let exptected = indoc! {r#"
131 | // [TODO] RendererFactory2
132 | export abstract class RendererFactory2 {
133 | abstract createRenderer(hostElement: any, type: RendererType2|null): Renderer2;
134 | abstract begin?(): void;
135 | abstract end?(): void;
136 | abstract whenRenderingDone?(): Promise;
137 | }"#};
138 |
139 | assert_analyzed_source_code(source_code, exptected, "typescript")
140 | }
141 |
142 | #[test]
143 | fn test_normal_class_statement() {
144 | let source_code = indoc! {r#"
145 | export class TransferState {
146 | static ɵprov =
147 | ɵɵdefineInjectable({
148 | token: TransferState,
149 | providedIn: 'root',
150 | factory: initTransferState,
151 | });
152 |
153 | /** @internal */
154 | store: Record = {};
155 |
156 | private onSerializeCallbacks: {[k: string]: () => unknown | undefined} = {};
157 |
158 | /**
159 | * Get the value corresponding to a key. Return `defaultValue` if key is not found.
160 | */
161 | get(key: StateKey, defaultValue: T): T {
162 | return this.store[key] !== undefined ? this.store[key] as T : defaultValue;
163 | }
164 |
165 | /**
166 | * Set the value corresponding to a key.
167 | */
168 | set(key: StateKey, value: T): void {
169 | this.store[key] = value;
170 | }
171 |
172 | /**
173 | * Remove a key from the store.
174 | */
175 | remove(key: StateKey): void {
176 | delete this.store[key];
177 | }
178 |
179 | /**
180 | * Test whether a key exists in the store.
181 | */
182 | hasKey(key: StateKey): boolean {
183 | return this.store.hasOwnProperty(key);
184 | }
185 |
186 | /**
187 | * Indicates whether the state is empty.
188 | */
189 | get isEmpty(): boolean {
190 | return Object.keys(this.store).length === 0;
191 | }
192 |
193 | /**
194 | * Register a callback to provide the value for a key when `toJson` is called.
195 | */
196 | onSerialize(key: StateKey, callback: () => T): void {
197 | this.onSerializeCallbacks[key] = callback;
198 | }
199 | }"#};
200 |
201 | let expected = indoc! {r#"
202 | // [TODO] TransferState
203 | export class TransferState {
204 | static ɵprov =
205 | ɵɵdefineInjectable({
206 | token: TransferState,
207 | providedIn: 'root',
208 | factory: initTransferState,
209 | });
210 |
211 | /** @internal */
212 | store: Record = {};
213 |
214 | private onSerializeCallbacks: {[k: string]: () => unknown | undefined} = {};
215 |
216 | /**
217 | * Get the value corresponding to a key. Return `defaultValue` if key is not found.
218 | */
219 | get(key: StateKey, defaultValue: T): T {
220 | return this.store[key] !== undefined ? this.store[key] as T : defaultValue;
221 | }
222 |
223 | /**
224 | * Set the value corresponding to a key.
225 | */
226 | set(key: StateKey, value: T): void {
227 | this.store[key] = value;
228 | }
229 |
230 | /**
231 | * Remove a key from the store.
232 | */
233 | remove(key: StateKey): void {
234 | delete this.store[key];
235 | }
236 |
237 | /**
238 | * Test whether a key exists in the store.
239 | */
240 | hasKey(key: StateKey): boolean {
241 | return this.store.hasOwnProperty(key);
242 | }
243 |
244 | /**
245 | * Indicates whether the state is empty.
246 | */
247 | get isEmpty(): boolean {
248 | return Object.keys(this.store).length === 0;
249 | }
250 |
251 | /**
252 | * Register a callback to provide the value for a key when `toJson` is called.
253 | */
254 | onSerialize(key: StateKey, callback: () => T): void {
255 | this.onSerializeCallbacks[key] = callback;
256 | }
257 | }"#};
258 |
259 | assert_analyzed_source_code(source_code, expected, "typescript")
260 | }
261 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/typescript_test/async_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_async_function_expression() {
6 | let source_code = indoc! {r#"
7 | async function foo (){
8 | const dddd = await asyncBusby(22);
9 | console.log(dddd);
10 | }"#};
11 |
12 | let expected = indoc! {r#"
13 | // [TODO] foo
14 | async function foo (){
15 | const dddd = await asyncBusby(22);
16 | console.log(dddd);
17 | }"#};
18 |
19 | assert_analyzed_source_code(source_code, expected, "typescript")
20 | }
21 |
22 | #[test]
23 | #[ignore = "TODO: Support arrow function"]
24 | fn test_async_arrow_function() {
25 | let source_code = indoc! {r#"
26 | const foo = async () => {
27 | const dddd = await asyncBusby(22);
28 | console.log(dddd);
29 | }"#};
30 |
31 | let expected = indoc! {r#"
32 | // [TODO] foo
33 | const foo = async () => {
34 | const dddd = await asyncBusby(22);
35 | console.log(dddd);
36 | }"#};
37 |
38 | assert_analyzed_source_code(source_code, expected, "typescript")
39 | }
40 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/typescript_test/svelt_case_test.rs:
--------------------------------------------------------------------------------
1 | use indoc::indoc;
2 |
3 | use crate::integration_test::assert_analyzed_source_code;
4 |
5 | #[test]
6 | fn test_interface_and_type_extends_with_exports() {
7 | let source_code = indoc! {r#"
8 | interface BaseNode {
9 | start: number;
10 | end: number;
11 | type: string;
12 | children?: TemplateNode[];
13 | [prop_name: string]: any;
14 | }
15 |
16 | export type DirectiveType =
17 | | 'Action'
18 | | 'Animation'
19 | | 'Binding'
20 | | 'Class'
21 | | 'StyleDirective'
22 | | 'EventHandler'
23 | | 'Let'
24 | | 'Ref'
25 | | 'Transition';
26 |
27 | export interface BaseDirective extends BaseNode {
28 | type: DirectiveType;
29 | name: string;
30 | }"#};
31 |
32 | let expected = indoc! {r#"
33 | // [TODO] BaseNode
34 | interface BaseNode {
35 | start: number;
36 | end: number;
37 | type: string;
38 | children?: TemplateNode[];
39 | [prop_name: string]: any;
40 | }
41 |
42 | // [TODO] DirectiveType
43 | export type DirectiveType =
44 | | 'Action'
45 | | 'Animation'
46 | | 'Binding'
47 | | 'Class'
48 | | 'StyleDirective'
49 | | 'EventHandler'
50 | | 'Let'
51 | | 'Ref'
52 | | 'Transition';
53 |
54 | // [TODO] BaseDirective
55 | export interface BaseDirective extends BaseNode {
56 | type: DirectiveType;
57 | name: string;
58 | }"#};
59 |
60 | assert_analyzed_source_code(source_code, expected, "typescript")
61 | }
62 |
--------------------------------------------------------------------------------
/tests/integration_test/analyze_command_test/typescript_test/typescript_case_test.rs:
--------------------------------------------------------------------------------
1 | use crate::integration_test::assert_analyzed_source_code;
2 | use indoc::indoc;
3 |
4 | #[test]
5 | fn test_typescript_export_functions() {
6 | let source_code = indoc! {r#"
7 | export function parseBindingIdentifier(privateIdentifierDiagnosticMessage?: DiagnosticMessage) {
8 | return createIdentifier(isBindingIdentifier(), /*diagnosticMessage*/ undefined, privateIdentifierDiagnosticMessage);
9 | }
10 | export function parseIdentifier(diagnosticMessage?: DiagnosticMessage, privateIdentifierDiagnosticMessage?: DiagnosticMessage): Identifier {
11 | return createIdentifier(isIdentifier(), diagnosticMessage, privateIdentifierDiagnosticMessage);
12 | }
13 | export function parseIdentifierName(diagnosticMessage?: DiagnosticMessage): Identifier {
14 | return createIdentifier(tokenIsIdentifierOrKeyword(token()), diagnosticMessage);
15 | }"#};
16 |
17 | let expected = indoc! {r#"
18 | // [TODO] parseBindingIdentifier
19 | export function parseBindingIdentifier(privateIdentifierDiagnosticMessage?: DiagnosticMessage) {
20 | return createIdentifier(isBindingIdentifier(), /*diagnosticMessage*/ undefined, privateIdentifierDiagnosticMessage);
21 | }
22 | // [TODO] parseIdentifier
23 | export function parseIdentifier(diagnosticMessage?: DiagnosticMessage, privateIdentifierDiagnosticMessage?: DiagnosticMessage): Identifier {
24 | return createIdentifier(isIdentifier(), diagnosticMessage, privateIdentifierDiagnosticMessage);
25 | }
26 | // [TODO] parseIdentifierName
27 | export function parseIdentifierName(diagnosticMessage?: DiagnosticMessage): Identifier {
28 | return createIdentifier(tokenIsIdentifierOrKeyword(token()), diagnosticMessage);
29 | }"#};
30 |
31 | assert_analyzed_source_code(source_code, expected, "typescript")
32 | }
33 |
34 | #[test]
35 | fn test_typescript_functions() {
36 | let source_code = indoc! {r#"
37 | function setContextFlag(val: boolean, flag: NodeFlags) {
38 | if (val) {
39 | contextFlags |= flag;
40 | }
41 | else {
42 | contextFlags &= ~flag;
43 | }
44 | }
45 |
46 | function setDisallowInContext(val: boolean) {
47 | setContextFlag(val, NodeFlags.DisallowInContext);
48 | }
49 |
50 | function setYieldContext(val: boolean) {
51 | setContextFlag(val, NodeFlags.YieldContext);
52 | }
53 |
54 | function setDecoratorContext(val: boolean) {
55 | setContextFlag(val, NodeFlags.DecoratorContext);
56 | }
57 |
58 | function setAwaitContext(val: boolean) {
59 | setContextFlag(val, NodeFlags.AwaitContext);
60 | }"#};
61 |
62 | let expected = indoc! {r#"
63 | // [TODO] setContextFlag
64 | function setContextFlag(val: boolean, flag: NodeFlags) {
65 | if (val) {
66 | contextFlags |= flag;
67 | }
68 | else {
69 | contextFlags &= ~flag;
70 | }
71 | }
72 |
73 | // [TODO] setDisallowInContext
74 | function setDisallowInContext(val: boolean) {
75 | setContextFlag(val, NodeFlags.DisallowInContext);
76 | }
77 |
78 | // [TODO] setYieldContext
79 | function setYieldContext(val: boolean) {
80 | setContextFlag(val, NodeFlags.YieldContext);
81 | }
82 |
83 | // [TODO] setDecoratorContext
84 | function setDecoratorContext(val: boolean) {
85 | setContextFlag(val, NodeFlags.DecoratorContext);
86 | }
87 |
88 | // [TODO] setAwaitContext
89 | function setAwaitContext(val: boolean) {
90 | setContextFlag(val, NodeFlags.AwaitContext);
91 | }"#};
92 |
93 | assert_analyzed_source_code(source_code, expected, "typescript")
94 | }
95 |
--------------------------------------------------------------------------------
/tests/pattern_search_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod pattern_search_test {
3 | #[test]
4 | fn test_search_multiple_pattern() {
5 | use balpan::commands::pattern_search::PatternTree;
6 |
7 | let searcher = PatternTree::new();
8 |
9 | let text = "ABAAABCDABCDABABCD";
10 | let patterns = vec!["ABCD".to_string(), "BCD".to_string()];
11 |
12 | let expected = (true, vec![4, 8, 14]);
13 | assert_eq!(searcher.aho_corasick_search(text, &patterns), expected);
14 | }
15 |
16 | #[test]
17 | fn test_search_todo_done_comments_using_aho_corasick() {
18 | use balpan::commands::pattern_search::PatternTree;
19 |
20 | let searcher = PatternTree::new();
21 | let text = r#"
22 | //[TODO] ABC
23 | //some comment
24 | //struct ABC {
25 | // field: i32,
26 | // field2: i32,
27 | //}
28 | //
29 | //[DONE] DEF
30 | //some comment about DEF
31 | //fn DEF() {
32 | // unimplemented!();
33 | //}
34 | //"#;
35 |
36 | let patterns = vec!["[TODO]".to_string(), "[DONE]".to_string()];
37 | let expected = (true, vec![11, 154]);
38 |
39 | let result = searcher.aho_corasick_search(text, &patterns);
40 |
41 | assert_eq!(result, expected);
42 | }
43 |
44 | #[test]
45 | fn test_selective_search() {
46 | use balpan::commands::pattern_search::PatternTree;
47 |
48 | let searcher = PatternTree::new();
49 | let text = r#"
50 | //[TODO] ABC
51 | //some comment
52 | //struct ABC {
53 | // field: i32,
54 | // field2: i32,
55 | //}
56 | //
57 | //[TODO] DEF
58 | //some comment about DEF
59 | //fn DEF() {
60 | // unimplemented!();
61 | //}
62 | //"#;
63 |
64 | let pattern = vec!["[TODO]".to_string()];
65 | let expected = (true, vec![11, 154]);
66 |
67 | let result = searcher.selective_search(&pattern, text);
68 |
69 | assert_eq!(result, expected);
70 | }
71 | }
72 |
73 | #[cfg(test)]
74 | mod boyer_moore_tests {
75 | use balpan::commands::boyer_moore::SearchIn;
76 |
77 | #[test]
78 | fn test_find_pending_character_index() {
79 | use balpan::commands::boyer_moore::find_pending_character_index;
80 |
81 | let chars = vec!['A', 'B', 'C', 'B', 'D'];
82 | let start = 1;
83 | let pattern = &'B';
84 |
85 | let result = find_pending_character_index(&chars, start, pattern);
86 |
87 | assert_eq!(2, result);
88 | }
89 |
90 | #[test]
91 | fn test_suffix_table() {
92 | use balpan::commands::boyer_moore::get_suffix_table;
93 |
94 | let text = "GCAGAGAG".as_bytes();
95 |
96 | let table = get_suffix_table(&text);
97 | let expected = vec![1, 0, 0, 2, 0, 4, 0, 0];
98 |
99 | assert_eq!(table, expected);
100 |
101 | let text = "abcbabcabab".as_bytes();
102 |
103 | let table = get_suffix_table(&text);
104 | let expected = vec![0, 2, 0, 1, 0, 3, 0, 0, 2, 0, 0];
105 |
106 | assert_eq!(table, expected);
107 | }
108 |
109 | #[test]
110 | fn test_build_suffix_table() {
111 | use balpan::commands::boyer_moore::build_suffixes_table;
112 |
113 | let pattern = "GCAGAGAG".as_bytes();
114 |
115 | let table = build_suffixes_table(&pattern);
116 | let expected = vec![7, 7, 7, 2, 7, 4, 7, 1];
117 |
118 | assert_eq!(table, expected);
119 |
120 | let pattern = "abcbabcabab".as_bytes();
121 |
122 | let table = build_suffixes_table(&pattern);
123 | let expected = vec![10, 10, 10, 10, 10, 10, 10, 5, 2, 7, 1];
124 |
125 | assert_eq!(table, expected);
126 | }
127 |
128 | #[test]
129 | fn test_find_first_occurrence() {
130 | use balpan::commands::boyer_moore::BoyerMooreSearch;
131 |
132 | let searcher = BoyerMooreSearch::new(b"abc");
133 | let text = "abababc";
134 |
135 | assert_eq!(Some(4), searcher.find_first_position(text.as_bytes()));
136 | }
137 |
138 | #[test]
139 | fn test_overlapping() {
140 | use balpan::commands::boyer_moore::BoyerMooreSearch;
141 |
142 | let searcher = BoyerMooreSearch::new(b"aaba");
143 | let text = b"aabaabaaba";
144 | let result = searcher.find_overlapping_in(text).collect::>();
145 |
146 | assert_eq!(vec![0, 3, 6], result);
147 | }
148 |
149 | #[test]
150 | fn test_no_pattern_found() {
151 | use balpan::commands::boyer_moore::BoyerMooreSearch;
152 |
153 | let searcher = BoyerMooreSearch::new(b"abc");
154 | let text = "ababab";
155 |
156 | assert_eq!(None, searcher.find_first_position(text.as_bytes()));
157 | }
158 |
159 | #[test]
160 | fn test_find_patterns_in_source_code() {
161 | use balpan::commands::boyer_moore::BoyerMooreSearch;
162 |
163 | let source = r#"
164 | //[TODO] main
165 | //comment for main
166 | fn main() {
167 | println!("Hello, world!");
168 | }
169 |
170 | pub trait Foo<'a, T> {
171 | fn foo(&'a self) -> None;
172 | fn foo2(&'a self) -> bool;
173 | }
174 |
175 | impl <'a, T> Foo<'a, T> for Foo {
176 | fn foo(&'a self) -> None {
177 | None
178 | }
179 |
180 | fn foo2(&'a self) -> bool {
181 | true
182 | }
183 | }
184 | "#
185 | .as_bytes();
186 |
187 | let searcher = BoyerMooreSearch::new(b"fn");
188 | let result = searcher.find_in(source).collect::>();
189 |
190 | assert_eq!(vec![58, 163, 201, 293, 368], result);
191 | }
192 |
193 | #[test]
194 | fn test_search_word() {
195 | use balpan::commands::boyer_moore::BoyerMooreSearch;
196 |
197 | let text = "
198 | MALCOM.
199 | 'Tis call'd the evil:
200 | A most miraculous work in this good king;
201 | Which often, since my here-remain in England,
202 | I have seen him do. How he solicits heaven,
203 | Himself best knows, but strangely-visited people,
204 | All swoln and ulcerous, pitiful to the eye,
205 | The mere despair of surgery, he cures;
206 | Hanging a golden stamp about their necks,
207 | Put on with holy prayers: and 'tis spoken,
208 | To the succeeding royalty he leaves
209 | The healing benediction. With this strange virtue,
210 | He hath a heavenly gift of prophecy;
211 | And sundry blessings hang about his throne,
212 | That speak him full of grace.
213 |
214 | MACDUFF.
215 | See, who comes here?
216 |
217 | MALCOLM.
218 | My countryman; but yet I know him not.
219 |
220 | MACDUFF.
221 | My ever-gentle cousin, welcome hither.
222 |
223 | MALCOLM.
224 | I know him now. Good God, betimes remove
225 | The means that makes us strangers!
226 |
227 | ROSS.
228 | Sir, amen.
229 |
230 | MACDUFF.
231 | Stands Scotland where it did?
232 |
233 | ROSS.
234 | Alas, poor country,
235 | Almost afraid to know itself! It cannot
236 | Be call'd our mother, but our grave, where nothing,
237 | But who knows nothing, is once seen to smile;
238 | Where sighs, and groans, and shrieks, that rent the air,
239 | Are made, not mark'd; where violent sorrow seems
240 | A modern ecstasy. The dead man's knell
241 | Is there scarce ask'd for who; and good men's lives
242 | Expire before the flowers in their caps,
243 | Dying or ere they sicken.
244 |
245 | MACDUFF.
246 | O, relation
247 | Too nice, and yet too true!
248 |
249 | MALCOLM.
250 | What’s the newest grief?
251 |
252 | ROSS.
253 | That of an hour’s age doth hiss the speaker;
254 | Each minute teems a new one.
255 |
256 | MACDUFF.
257 | How does my wife?
258 |
259 | ROSS.
260 | Why, well.
261 |
262 | MACDUFF.
263 | And all my children?
264 |
265 | ROSS.
266 | Well too.
267 |
268 | MACDUFF.
269 | The tyrant has not batter’d at their peace?
270 |
271 | ROSS.
272 | No; they were well at peace when I did leave ’em.
273 |
274 | MACDUFF.
275 | Be not a niggard of your speech: how goes’t?
276 |
277 | ROSS.
278 | When I came hither to transport the tidings,
279 | Which I have heavily borne, there ran a rumour
280 | Of many worthy fellows that were out;
281 | Which was to my belief witness’d the rather,
282 | For that I saw the tyrant’s power afoot.
283 | Now is the time of help. Your eye in Scotland
284 | Would create soldiers, make our women fight,
285 | To doff their dire distresses.
286 |
287 | MALCOLM.
288 | Be’t their comfort
289 | We are coming thither. Gracious England hath
290 | Lent us good Siward and ten thousand men;
291 | An older and a better soldier none
292 | That Christendom gives out.
293 |
294 | ROSS.
295 | Would I could answer
296 | This comfort with the like! But I have words
297 | That would be howl’d out in the desert air,
298 | Where hearing should not latch them.
299 |
300 | MACDUFF.
301 | What concern they?
302 | The general cause? or is it a fee-grief
303 | Due to some single breast?
304 |
305 | ROSS.
306 | No mind that’s honest
307 | But in it shares some woe, though the main part
308 | Pertains to you alone.
309 |
310 | MACDUFF.
311 | If it be mine,
312 | Keep it not from me, quickly let me have it.
313 |
314 | ROSS.
315 | Let not your ears despise my tongue for ever,
316 | Which shall possess them with the heaviest sound
317 | That ever yet they heard.
318 |
319 | MACDUFF.
320 | Humh! I guess at it.
321 |
322 | ROSS.
323 | Your castle is surpris’d; your wife and babes
324 | Savagely slaughter’d. To relate the manner
325 | Were, on the quarry of these murder’d deer,
326 | To add the death of you.
327 |
328 | MALCOLM.
329 | Merciful heaven!—
330 | What, man! ne’er pull your hat upon your brows.
331 | Give sorrow words. The grief that does not speak
332 | Whispers the o’er-fraught heart, and bids it break.
333 |
334 | MACDUFF.
335 | My children too?
336 |
337 | ROSS.
338 | Wife, children, servants, all
339 | That could be found.
340 |
341 | MACDUFF.
342 | And I must be from thence!
343 | My wife kill’d too?
344 |
345 | ROSS.
346 | I have said."
347 | .as_bytes();
348 |
349 | let searcher = BoyerMooreSearch::new(b"MALCOM");
350 | let first_occurrence = searcher.find_first_position(text);
351 | assert_eq!(Some(9), first_occurrence);
352 |
353 | let searcher = BoyerMooreSearch::new(b"MACDUFF");
354 | let find_all = searcher.find_in(text).collect::>();
355 | let expected = vec![
356 | 716, 844, 1077, 1667, 1925, 2019, 2115, 2278, 3229, 3507, 3777, 4282, 4423,
357 | ];
358 | assert_eq!(expected, find_all);
359 | }
360 |
361 | #[test]
362 | fn test_is_work_for_non_alphabet() {
363 | use balpan::commands::boyer_moore::BoyerMooreSearch;
364 |
365 | let pattern = "🦀🦀🐪🔥🐍✅".as_bytes();
366 | let searcher = BoyerMooreSearch::new(pattern);
367 | let text = "🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅🦀🦀🐪🔥🐍✅";
368 |
369 | let result = searcher.find_in(text.as_bytes()).collect::>();
370 | assert_eq!(vec![0, 23, 46, 69, 92], result);
371 | }
372 | }
373 |
--------------------------------------------------------------------------------
/tests/tree_sitter_extended_test.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | mod tree_sitter_extended_tests {
3 | use balpan::tree_sitter_extended::MembershipCheck;
4 | use tree_sitter::{Point, Range};
5 |
6 | #[test]
7 | fn test_out_of_membership() {
8 | let cursor = Point { row: 2, column: 10 };
9 |
10 | let function_scope = Range {
11 | start_byte: 0,
12 | end_byte: 0,
13 | start_point: Point { row: 4, column: 2 },
14 | end_point: Point { row: 10, column: 2 },
15 | };
16 |
17 | assert!(cursor.is_before(function_scope));
18 | assert!(!cursor.is_member_of(function_scope));
19 | }
20 |
21 | #[test]
22 | fn test_membership_with_inline_code() {
23 | let cursor = Point { row: 2, column: 10 };
24 |
25 | let inlined_scope = Range {
26 | start_byte: 0,
27 | end_byte: 0,
28 | start_point: Point { row: 2, column: 5 },
29 | end_point: Point { row: 2, column: 30 },
30 | };
31 |
32 | assert!(cursor.is_member_of(inlined_scope));
33 | }
34 |
35 | #[test]
36 | fn test_cursor_is_pointing_the_boundary_of_range() {
37 | let cursor_with_pointing_start = Point { row: 2, column: 2 };
38 |
39 | let cursor_with_pointing_end = Point { row: 30, column: 2 };
40 |
41 | let function_scope = Range {
42 | start_byte: 0,
43 | end_byte: 0,
44 | start_point: Point { row: 2, column: 2 },
45 | end_point: Point { row: 30, column: 2 },
46 | };
47 |
48 | assert!(!cursor_with_pointing_start.is_before(function_scope));
49 | assert!(cursor_with_pointing_start.is_member_of(function_scope));
50 | assert!(cursor_with_pointing_end.is_member_of(function_scope));
51 | assert!(!cursor_with_pointing_end.is_after(function_scope));
52 | }
53 |
54 | #[test]
55 | fn test_cursor_is_pointing_outside_of_boundary() {
56 | let left_of_start_point = Point { row: 2, column: 1 };
57 |
58 | let right_of_end_point = Point { row: 30, column: 3 };
59 |
60 | let function_scope = Range {
61 | start_byte: 0,
62 | end_byte: 0,
63 | start_point: Point { row: 2, column: 2 },
64 | end_point: Point { row: 30, column: 2 },
65 | };
66 |
67 | assert!(left_of_start_point.is_before(function_scope));
68 | assert!(!left_of_start_point.is_member_of(function_scope));
69 | assert!(!right_of_end_point.is_member_of(function_scope));
70 | assert!(right_of_end_point.is_after(function_scope));
71 | }
72 | }
73 |
--------------------------------------------------------------------------------