├── .dockerignore ├── .gitignore ├── .travis.yml ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── docker ├── Dockerfile ├── docker_build.sh └── docker_run.sh ├── examples ├── delimiters.rs └── from_log.rs ├── fixtures ├── input │ ├── var_log_messages_line_1.txt │ ├── var_log_messages_line_2.txt │ ├── var_log_messages_line_3.txt │ ├── var_log_messages_line_4.txt │ ├── var_log_messages_line_5.txt │ └── var_log_messages_line_6.txt └── output │ └── original │ ├── original_after_line_1.txt │ ├── original_after_line_2.txt │ ├── original_after_line_3.txt │ ├── original_after_line_4.txt │ ├── original_after_line_5.txt │ └── original_after_line_6.txt ├── python ├── .gitignore ├── pyspellrs │ ├── Cargo.toml │ ├── build.rs │ └── src │ │ └── lib.rs ├── setup.py ├── spell │ └── __init__.py └── test.py └── src ├── lib.rs ├── map.rs └── object.rs /.dockerignore: -------------------------------------------------------------------------------- 1 | target 2 | .venv 3 | data 4 | build 5 | dist 6 | cache 7 | .git 8 | .vscode 9 | docker -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | data 4 | .python-version 5 | .venv 6 | *.egg-info 7 | spellrs 8 | build 9 | *.so 10 | cache 11 | dist 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | cache: 4 | - cargo 5 | 6 | rust: 7 | - 1.31.0 8 | - stable 9 | - beta 10 | - nightly 11 | 12 | matrix: 13 | allow_failures: 14 | - rust: nightly 15 | fast_finish: true 16 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "spell" 3 | version = "0.1.6" 4 | authors = ["Nicolas Bigaouette "] 5 | edition = "2018" 6 | license = "MIT/Apache-2.0" 7 | readme = "README.md" 8 | keywords = ["spell", "lcs", "log", "parsing"] 9 | categories = ["parsing", "science", "text-processing", "value-formatting"] 10 | repository = "https://github.com/nbigaouette/spell-rs" 11 | homepage = "https://github.com/nbigaouette/spell-rs" 12 | documentation = "https://docs.rs/spell" 13 | description = "Spell is a Streaming Parser for Event Logs using an LCS (Longest Common Subsequence)" 14 | 15 | [badges] 16 | travis-ci = { repository = "nbigaouette/spell-rs" } 17 | 18 | [dependencies] 19 | serde = "1.0" 20 | serde_derive = "1.0" 21 | 22 | [dev-dependencies] 23 | csv = "1.0.5" 24 | 25 | [workspace] 26 | members = [ 27 | ".", 28 | "python/pyspellrs", 29 | ] 30 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 Nicolas Bigaouette 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Nicolas Bigaouette 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spell-rs 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/spell.svg)](https://crates.io/crates/spell) 4 | [![Build Status](https://travis-ci.org/nbigaouette/spell-rs.svg?branch=master)](https://travis-ci.org/nbigaouette/spell-rs) 5 | [![Documentation](https://docs.rs/spell/badge.svg)](https://docs.rs/spell) 6 | [![License](https://img.shields.io/crates/l/spell.svg)](#license) 7 | 8 | _spell-rs_ is a _Spell_ implementation in Rust. 9 | 10 | Spell is a _**S**treaming **P**arser for **E**vent **L**ogs using an **L**CS_ (Longest Common Subsequence). 11 | 12 | The minimum Rust version is 1.31 due to the `use crate::` syntax. 13 | 14 | ## Reference 15 | 16 | Min Du and Feifei Li. Spell: Streaming Parsing of System Event Logs. 2016 IEEE 16th International 17 | Conference on Data Mining (ICDM) pp. 859-864, Barcelona, Spain, December, 2016. 18 | [DOI: 10.1109/ICDM.2016.0103](https://www.doi.org/10.1109/ICDM.2016.0103). 19 | https://ieeexplore.ieee.org/document/7837916 20 | 21 | * Original paper: https://www.cs.utah.edu/~lifeifei/papers/spell.pdf 22 | * Presentation slides: https://www.cs.utah.edu/~mind/papers/spell_slides.pdf 23 | 24 | ## Details 25 | 26 | Spell ingests log lines one by one and build up a list of formatting strings that can be used to 27 | analyze the logs. For example (see [presentation slides](https://www.cs.utah.edu/~mind/papers/spell_slides.pdf), 28 | page 19), given the two log entries: 29 | 30 | ```text 31 | Temperature (41C) exceeds warning threshold 32 | Temperature (42C, 43C) exceeds warning threshold 33 | ``` 34 | 35 | the LCS is: 36 | 37 | ```text 38 | Temperature * exceeds warning threshold 39 | ``` 40 | 41 | which can then be used to analyze the logs. 42 | 43 | As of cedf57a7a73b1052de937d13150c2b9d8a03237c, the Rust implementation gives the exact same results 44 | as the [Java one](https://github.com/EddiePi/Spell). 45 | 46 | ## Python Wrapper 47 | 48 | Universal Python package (wheel) can be built on all platforms using: 49 | 50 | ```sh 51 | cd python 52 | 53 | # Compile Rust code 54 | pip install --editable . 55 | 56 | # Test Python package 57 | python ./test.py 58 | 59 | # Build a wheel 60 | python setup.py --verbose bdist_wheel 61 | ``` 62 | 63 | ### Docker 64 | 65 | This only requires Docker. 66 | 67 | ```sh 68 | # Build a Docker image containing everything required 69 | ./docker/docker_build.sh 70 | 71 | # Run the commands to build wheel inside a docker container 72 | ./docker/docker_run.sh 73 | ``` 74 | 75 | ## Other Implementations 76 | 77 | * [Spell](https://github.com/EddiePi/Spell), Java version 78 | * [pyspell](https://github.com/bave/pyspell), a Python version 79 | * [spell](https://github.com/logforensicator/spell), another Python version 80 | 81 | ## License 82 | 83 | This project is licensed under either of 84 | 85 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or 86 | http://www.apache.org/licenses/LICENSE-2.0) 87 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or 88 | http://opensource.org/licenses/MIT) 89 | 90 | at your option. 91 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rust:1.32.0-slim 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | curl \ 5 | python3 \ 6 | && rm -rf /var/lib/apt/lists/* 7 | 8 | RUN curl https://bootstrap.pypa.io/get-pip.py | python3 9 | 10 | # Using fork unti this is merged: 11 | # https://github.com/getsentry/milksnake/pull/25 12 | # Without this building inside docker fails. 13 | RUN pip3 install https://github.com/nbigaouette/milksnake/archive/24-find_files-in-paths.zip 14 | -------------------------------------------------------------------------------- /docker/docker_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -o nounset 5 | 6 | export script_dir=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) 7 | cd ${script_dir}/.. 8 | 9 | docker_image="spellrs_builder" 10 | git_sha1="`git rev-parse HEAD`" 11 | git_describe="`git describe --tags --always`" 12 | git_branch="`git rev-parse --abbrev-ref HEAD`" 13 | 14 | # *********************************************************************** 15 | run() { 16 | args="${@}" 17 | printf "\033[32m${args}\n\033[0m" 18 | eval ${@} 19 | echo "" 20 | } 21 | # *********************************************************************** 22 | 23 | # Build docker image 24 | run "docker build 25 | --file docker/Dockerfile 26 | --tag ${docker_image}:${git_sha1} 27 | ." 28 | -------------------------------------------------------------------------------- /docker/docker_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -o nounset 5 | 6 | export script_dir=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd) 7 | cd ${script_dir}/.. 8 | 9 | docker_image="spellrs_builder" 10 | git_sha1="`git rev-parse HEAD`" 11 | git_describe="`git describe --tags --always`" 12 | git_branch="`git rev-parse --abbrev-ref HEAD`" 13 | 14 | # *********************************************************************** 15 | run() { 16 | args="${@}" 17 | printf "\033[32m${args}\n\033[0m" 18 | eval ${@} 19 | echo "" 20 | } 21 | # *********************************************************************** 22 | 23 | # Cleanup 24 | run "rm -fr python/{build,dist,spellrs.egg-info,spell/_native*}" 25 | 26 | docker_home="/home/rust/home" 27 | docker_src="/home/rust/src" 28 | 29 | docker_cmd="docker run 30 | -it --rm 31 | --env HOME=${docker_home} 32 | --env CARGO_HOME=${docker_src}/docker/cache/cargo 33 | --env CARGO_TARGET_DIR=${docker_src}/docker/cache/target 34 | --workdir ${docker_src}/python 35 | --user $(id -u):$(id -g) 36 | --volume "$PWD":${docker_src} 37 | --volume "$PWD"/docker/cache/home:${docker_home} 38 | ${docker_image}:${git_sha1}" 39 | 40 | run "${docker_cmd} pip3 install --user --verbose --editable ." 41 | run "${docker_cmd} python3 setup.py --verbose bdist_wheel" 42 | -------------------------------------------------------------------------------- /examples/delimiters.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs::File; 3 | use std::io::{BufRead, BufReader}; 4 | 5 | type Result = std::result::Result>; 6 | 7 | use spell::map::LcsMap; 8 | 9 | fn main() -> Result<()> { 10 | let path: String = std::env::args().nth(1).unwrap(); 11 | 12 | let max_lines: usize = std::env::args() 13 | .nth(2) 14 | .map(|i| i.parse().unwrap()) 15 | .unwrap_or_else(|| usize::max_value()); 16 | 17 | let input = File::open(path)?; 18 | let buffered = BufReader::new(input); 19 | 20 | let mut map = LcsMap::with_delimiters(vec![' ', ',']); 21 | 22 | buffered 23 | .lines() 24 | .filter_map(|line| match line { 25 | Ok(line) => { 26 | map.insert(&line); 27 | Some(()) 28 | } 29 | Err(err) => { 30 | eprintln!("Error processing line: {:?}", err); 31 | None 32 | } 33 | }) 34 | .enumerate() 35 | .take_while(|(i, _line)| *i + 1 < max_lines) 36 | .for_each(|(_i, _line)| {}); 37 | 38 | println!("----------------------------"); 39 | println!("{}", map.to_string()); 40 | 41 | Ok(()) 42 | } 43 | -------------------------------------------------------------------------------- /examples/from_log.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::fs::File; 3 | use std::io::{BufRead, BufReader}; 4 | 5 | type Result = std::result::Result>; 6 | 7 | use spell::map::LcsMap; 8 | 9 | fn main() -> Result<()> { 10 | let path: String = std::env::args().nth(1).unwrap(); 11 | 12 | let max_lines: usize = std::env::args() 13 | .nth(2) 14 | .map(|i| i.parse().unwrap()) 15 | .unwrap_or_else(|| usize::max_value()); 16 | 17 | let input = File::open(path)?; 18 | let buffered = BufReader::new(input); 19 | 20 | let mut map = LcsMap::new(); 21 | 22 | buffered 23 | .lines() 24 | .filter_map(|line| match line { 25 | Ok(line) => { 26 | map.insert(&line); 27 | Some(()) 28 | } 29 | Err(err) => { 30 | eprintln!("Error processing line: {:?}", err); 31 | None 32 | } 33 | }) 34 | .enumerate() 35 | .take_while(|(i, _line)| *i + 1 < max_lines) 36 | .for_each(|(_i, _line)| {}); 37 | 38 | println!("----------------------------"); 39 | println!("{}", map.to_string()); 40 | 41 | Ok(()) 42 | } 43 | -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_1.txt: -------------------------------------------------------------------------------- 1 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_2.txt: -------------------------------------------------------------------------------- 1 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_3.txt: -------------------------------------------------------------------------------- 1 | Jan 22 04:16:40 combo su(pam_unix)[21719]: session opened for user news by (uid=0) -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_4.txt: -------------------------------------------------------------------------------- 1 | Jan 22 04:16:41 combo su(pam_unix)[21719]: session closed for user news -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_5.txt: -------------------------------------------------------------------------------- 1 | Jan 22 05:23:19 combo sshd(pam_unix)[24892]: check pass; user unknown -------------------------------------------------------------------------------- /fixtures/input/var_log_messages_line_6.txt: -------------------------------------------------------------------------------- 1 | Jan 22 05:23:19 combo sshd(pam_unix)[24892]: authentication failure; logname= uid=0 euid=0 tty=NODEVssh ruser= rhost=server3.sugolan.hu -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_1.txt: -------------------------------------------------------------------------------- 1 | 1 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | 7 | 1 total entries found, 1 expected. -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_2.txt: -------------------------------------------------------------------------------- 1 | 2 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | Object 1: 7 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] 8 | {1} 9 | 10 | 2 total entries found, 2 expected. -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_3.txt: -------------------------------------------------------------------------------- 1 | 3 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | Object 1: 7 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] 8 | {1} 9 | Object 2: 10 | Jan 22 04:16:40 combo su(pam_unix)[21719]: session opened for user news by (uid=0) 11 | {2} 12 | 13 | 3 total entries found, 3 expected. -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_4.txt: -------------------------------------------------------------------------------- 1 | 3 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | Object 1: 7 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] 8 | {1} 9 | Object 2: 10 | Jan 22 * combo su(pam_unix)[21719]: session * for user news 11 | {2, 3} 12 | 13 | 4 total entries found, 4 expected. -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_5.txt: -------------------------------------------------------------------------------- 1 | 3 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | Object 1: 7 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] 8 | {1} 9 | Object 2: 10 | Jan 22 * combo * user * 11 | {2, 3, 4} 12 | 13 | 5 total entries found, 5 expected. -------------------------------------------------------------------------------- /fixtures/output/original/original_after_line_6.txt: -------------------------------------------------------------------------------- 1 | 4 Objects in the LCSMap 2 | 3 | Object 0: 4 | Jan 22 04:11:04 combo syslogd 1.4.1: restart. 5 | {0} 6 | Object 1: 7 | Jan 22 04:11:04 combo logrotate: ALERT exited abnormally with [1] 8 | {1} 9 | Object 2: 10 | Jan 22 * combo * user * 11 | {2, 3, 4} 12 | Object 3: 13 | Jan 22 05:23:19 combo sshd(pam_unix)[24892]: authentication failure; logname= uid=0 euid=0 tty=NODEVssh ruser= rhost=server3.sugolan.hu 14 | {5} 15 | 16 | 6 total entries found, 6 expected. -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | spell 2 | *.egg-info -------------------------------------------------------------------------------- /python/pyspellrs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pyspellrs" 3 | version = "0.1.5" 4 | authors = ["Nicolas Bigaouette "] 5 | edition = "2018" 6 | build = "build.rs" 7 | 8 | [lib] 9 | name = "pyspellrs" 10 | crate-type = ["cdylib"] 11 | 12 | [dependencies] 13 | spell = { path = "../.." } 14 | log = "0.4" 15 | env_logger = "0.6" 16 | 17 | [build-dependencies] 18 | cbindgen = "0.8" 19 | 20 | -------------------------------------------------------------------------------- /python/pyspellrs/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cbindgen; 2 | 3 | use std::{env, path::Path}; 4 | 5 | fn main() { 6 | let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 7 | let mut config: cbindgen::Config = Default::default(); 8 | config.language = cbindgen::Language::C; 9 | 10 | let target_dir = env::var("CARGO_TARGET_DIR").unwrap_or(String::from("../../target")); 11 | let header_file = Path::new(&target_dir).join("spell.h"); 12 | 13 | cbindgen::generate_with_config(&crate_dir, config) 14 | .unwrap() 15 | .write_to_file(header_file); 16 | } 17 | -------------------------------------------------------------------------------- /python/pyspellrs/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::{ffi::{CStr, CString}, os::raw::c_char}; 2 | 3 | use spell::{map::LcsMap, object::LcsObject, tokenize}; 4 | 5 | #[derive(Debug)] 6 | pub struct _Map { 7 | map: LcsMap, 8 | } 9 | 10 | #[derive(Debug)] 11 | pub struct _Object { 12 | object: LcsObject, 13 | } 14 | 15 | fn _init_env_logger() { 16 | std::env::var("RUST_LOG") 17 | .or_else(|_| -> Result { 18 | let rust_log = "pyspellrs=debug,spell=debug".to_string(); 19 | println!("Environment variable 'RUST_LOG' not set."); 20 | println!("Setting to: {}", rust_log); 21 | std::env::set_var("RUST_LOG", &rust_log); 22 | Ok(rust_log) 23 | }) 24 | .unwrap(); 25 | let _ = env_logger::try_init(); 26 | } 27 | 28 | #[no_mangle] 29 | pub unsafe extern "C" fn init_env_logger() { 30 | _init_env_logger(); 31 | } 32 | 33 | #[no_mangle] 34 | pub unsafe extern "C" fn new_map() -> *const _Map { 35 | log::debug!("Creating new map..."); 36 | let map = LcsMap::new(); 37 | 38 | let map_ptr = Box::into_raw(Box::new(_Map { map })) as *const _Map; 39 | log::debug!("Created map: {:?}", map_ptr); 40 | 41 | map_ptr 42 | } 43 | 44 | #[no_mangle] 45 | pub unsafe extern "C" fn insert_in_map(map_ptr: *const _Map, line: *const c_char) { 46 | log::debug!("Inserting line in map..."); 47 | 48 | if map_ptr.is_null() { 49 | log::error!("ERROR: Passed a null pointer for the map"); 50 | } else if line.is_null() { 51 | log::error!("ERROR: Passed a null pointer for the line"); 52 | } else { 53 | let c_str = CStr::from_ptr(line); 54 | 55 | match c_str.to_str() { 56 | Ok(line) => { 57 | log::debug!("Inserting line into map: {:?}", line); 58 | 59 | // We convert back to an `LcsMap` (from an `_Map` pointer) but we need to prevent 60 | // it from being dropped when going out of scope. 61 | let mut map = Box::from_raw(map_ptr as *mut _Map); 62 | 63 | map.map.insert(line); 64 | 65 | // Don't drop the map! 66 | std::mem::forget(map); 67 | } 68 | Err(e) => { 69 | log::error!("Given line is not a valid UTF-8 string {:?}", e); 70 | } 71 | } 72 | } 73 | } 74 | 75 | #[no_mangle] 76 | pub unsafe extern "C" fn get_match(map_ptr: *const _Map, line: *const c_char) -> *const _Object { 77 | log::debug!("Matching line in map..."); 78 | 79 | if map_ptr.is_null() { 80 | log::error!("ERROR: Passed a null pointer for the map"); 81 | std::ptr::null() 82 | } else if line.is_null() { 83 | log::error!("ERROR: Passed a null pointer for the line"); 84 | std::ptr::null() 85 | } else { 86 | let c_str = CStr::from_ptr(line); 87 | 88 | match c_str.to_str() { 89 | Ok(line) => { 90 | log::debug!("Matching line into map: {:?}", line); 91 | 92 | // We convert back to an `LcsMap` (from an `_Map` pointer) but we need to prevent 93 | // it from being dropped when going out of scope. 94 | let map = Box::from_raw(map_ptr as *mut _Map); 95 | 96 | let tokens = tokenize(&line, map.map.delimiters.as_slice()) 97 | .map(|token| token.to_string()) 98 | .collect(); 99 | 100 | let object_ptr = match map.map.get_match(&tokens) { 101 | Some(lcs_objects) => { 102 | log::info!("Line found in map: {:?}", lcs_objects); 103 | 104 | Box::into_raw(Box::new(_Object { 105 | object: LcsObject { 106 | tokens: lcs_objects.tokens.clone(), 107 | lines_ids: lcs_objects.lines_ids.clone(), 108 | }, 109 | })) as *const _Object 110 | } 111 | None => { 112 | log::warn!("Line not found in map"); 113 | std::ptr::null() 114 | } 115 | }; 116 | 117 | // Don't drop the map! 118 | std::mem::forget(map); 119 | 120 | object_ptr 121 | } 122 | Err(e) => { 123 | log::error!("Given line is not a valid UTF-8 string {:?}", e); 124 | std::ptr::null() 125 | } 126 | } 127 | } 128 | } 129 | 130 | #[no_mangle] 131 | pub unsafe extern "C" fn free_map(map_ptr: *const _Map) { 132 | log::debug!("Attempting to drop map {:?}", map_ptr); 133 | if map_ptr.is_null() { 134 | log::warn!("Attempted to drop null pointer. Skipping."); 135 | } else { 136 | log::debug!("Dropping map..."); 137 | let map = Box::from_raw(map_ptr as *mut _Map); 138 | std::mem::drop(map); 139 | } 140 | } 141 | 142 | #[no_mangle] 143 | pub unsafe extern "C" fn object_tokens_len(object_ptr: *const _Object) -> u64 { 144 | if object_ptr.is_null() { 145 | 0 146 | } else { 147 | let object = Box::from_raw(object_ptr as *mut _Object); 148 | 149 | let length = object.object.tokens.len(); 150 | 151 | std::mem::forget(object); 152 | 153 | length as u64 154 | } 155 | } 156 | 157 | #[no_mangle] 158 | pub unsafe extern "C" fn object_lines_ids_len(object_ptr: *const _Object) -> u64 { 159 | if object_ptr.is_null() { 160 | 0 161 | } else { 162 | let object = Box::from_raw(object_ptr as *mut _Object); 163 | 164 | let length = object.object.lines_ids.len(); 165 | 166 | std::mem::forget(object); 167 | 168 | length as u64 169 | } 170 | } 171 | 172 | #[no_mangle] 173 | pub unsafe extern "C" fn object_ith_token(object_ptr: *const _Object, i: u64) -> *const c_char { 174 | if object_ptr.is_null() { 175 | std::ptr::null() 176 | } else { 177 | let object = Box::from_raw(object_ptr as *mut _Object); 178 | 179 | let i = i as usize; 180 | let token_str_ptr = if i < object.object.tokens.len() { 181 | let c_str = CString::new(object.object.tokens[i].clone()).unwrap(); 182 | c_str.into_raw() 183 | } else { 184 | std::ptr::null() 185 | }; 186 | 187 | std::mem::forget(object); 188 | 189 | token_str_ptr 190 | } 191 | } 192 | 193 | #[no_mangle] 194 | pub unsafe extern "C" fn object_ith_line_id(object_ptr: *const _Object, i: u64) -> usize { 195 | if object_ptr.is_null() { 196 | log::error!("Null pointer passed to object_ith_line_id(), returning 0 instead of crashing."); 197 | 0 198 | } else { 199 | let object = Box::from_raw(object_ptr as *mut _Object); 200 | 201 | let i = i as usize; 202 | let line_id_str_ptr = if i < object.object.lines_ids.len() { 203 | object.object.lines_ids[i] 204 | } else { 205 | log::error!("Index passed to object_ith_line_id() larger than length, returning 0 instead of crashing."); 206 | 0 207 | }; 208 | 209 | std::mem::forget(object); 210 | 211 | line_id_str_ptr 212 | } 213 | } 214 | 215 | #[no_mangle] 216 | pub unsafe extern "C" fn free_object(object_ptr: *const _Object) { 217 | log::debug!("Attempting to drop object {:?}", object_ptr); 218 | if object_ptr.is_null() { 219 | log::warn!("Attempted to drop null pointer. Skipping."); 220 | } else { 221 | log::debug!("Dropping object..."); 222 | let object = Box::from_raw(object_ptr as *mut _Object); 223 | std::mem::drop(object); 224 | } 225 | } 226 | 227 | // #[no_mangle] 228 | // pub unsafe extern "C" fn with_delimiters() -> *const _Map { 229 | // // delimiters: Vec 230 | // // let map = LcsMap::with_delimiters(vec![' ', ',']); 231 | 232 | // Box::into_raw(Box::new(_Map { map })) as *const _Map 233 | // } 234 | 235 | #[cfg(test)] 236 | mod tests { 237 | #[test] 238 | fn it_works() { 239 | assert_eq!(2 + 2, 4); 240 | } 241 | } 242 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os 3 | import re 4 | 5 | RUST_BUILD = "debug" 6 | # RUST_BUILD = "release" 7 | 8 | def version(): 9 | with open('pyspellrs/Cargo.toml', encoding='utf-8') as f: 10 | content = f.read() 11 | m = re.search('version = "([0-9\.]*)".*', content) 12 | return m.group(1) 13 | 14 | def readme(): 15 | """print long description""" 16 | with open('../README.md', encoding='utf-8') as f: 17 | return f.read() 18 | 19 | 20 | def build_native(spec): 21 | # Step 1: build the rust library 22 | cmd = ['cargo', 'build'] 23 | if RUST_BUILD == "release": 24 | cmd.append('--release') 25 | cmd.append('--package') 26 | cmd.append('pyspellrs') 27 | 28 | file_path = os.path.dirname(os.path.realpath(__file__)) 29 | add_external_build_path = os.path.realpath(os.path.dirname(file_path)) 30 | build = spec.add_external_build( 31 | cmd=cmd, 32 | path=add_external_build_path 33 | ) 34 | 35 | # Step 2: add a cffi module based on the dylib we built 36 | # 37 | # We use lambdas here for dylib and header_filename so that those are 38 | # only called after the external build finished. 39 | cargo_target_dir = os.getenv('CARGO_TARGET_DIR', 'target') 40 | in_path_dylib = os.path.normpath(os.path.join(cargo_target_dir, RUST_BUILD)) 41 | in_path_header = os.path.normpath("target") 42 | spec.add_cffi_module( 43 | module_path='spell._native', 44 | dylib=lambda: build.find_dylib( 45 | 'pyspellrs', in_path=in_path_dylib), 46 | header_filename=lambda: build.find_header( 47 | 'spell.h', in_path=in_path_header), 48 | rtld_flags=['NOW', 'NODELETE'] 49 | ) 50 | 51 | 52 | setup( 53 | name='spellrs', 54 | version=version(), 55 | url="https://travis-ci.org/nbigaouette/spell-rs", 56 | author="Nicolas Bigaouette", 57 | author_email="nbigaouette@gmail.com", 58 | long_description=readme(), 59 | python_requires='>=3', 60 | packages=find_packages(), 61 | include_package_data=True, 62 | zip_safe=False, 63 | platforms='any', 64 | install_requires=[ 65 | 'milksnake', 66 | 'wheel', 67 | ], 68 | milksnake_tasks=[ 69 | build_native, 70 | ] 71 | ) 72 | -------------------------------------------------------------------------------- /python/spell/__init__.py: -------------------------------------------------------------------------------- 1 | from spell._native import ffi, lib 2 | 3 | 4 | def to_cstring(text): 5 | return ffi.new("char[]", text.encode("utf-8")) 6 | 7 | class LcsObject: 8 | def __init__(self, object): 9 | self.object = object 10 | 11 | def tokens(self): 12 | length = lib.object_tokens_len(self.object) 13 | 14 | for i in range(length): 15 | ptr = lib.object_ith_token(self.object, i) 16 | 17 | yield ffi.string(ptr).decode('utf-8') 18 | 19 | def line_ids(self): 20 | length = lib.object_lines_ids_len(self.object) 21 | 22 | for i in range(length): 23 | yield lib.object_ith_line_id(self.object, i) 24 | 25 | def __del__(self): 26 | lib.free_object(self.object) 27 | self.object = ffi.NULL 28 | 29 | class Spell: 30 | def __init__(self): 31 | # FIXME: Take delimiters as argument and store them 32 | self.map = lib.new_map() 33 | 34 | def insert(self, line): 35 | lib.insert_in_map(self.map, to_cstring(line)) 36 | 37 | def match(self, line): 38 | return LcsObject(lib.get_match(self.map, to_cstring(line))) 39 | 40 | def __del__(self): 41 | lib.free_map(self.map) 42 | self.map = ffi.NULL 43 | 44 | -------------------------------------------------------------------------------- /python/test.py: -------------------------------------------------------------------------------- 1 | import spell 2 | 3 | spell.lib.init_env_logger() 4 | 5 | spell_map = spell.Spell() 6 | 7 | spell_map.insert("Command Failed on: node-127,node-234") 8 | spell_map.insert("Command Failed on: node-128,node-234") 9 | spell_map.insert("Command Failed on: node-129,node-235") 10 | 11 | lcs_object = spell_map.match("Command Failed on: node-130,node-235") 12 | for i, token in enumerate(lcs_object.tokens()): 13 | print("%d - Token: %s" % (i, token)) 14 | for i, line_id in enumerate(lcs_object.line_ids()): 15 | print("%d - Line id: %s" % (i, line_id)) 16 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod map; 2 | pub mod object; 3 | 4 | pub type LineId = usize; 5 | 6 | pub type LcsToken = String; 7 | pub type LcsSeq = Vec; 8 | pub type LcsDelimiters = Vec; 9 | 10 | /// # Examples 11 | /// 12 | /// ``` 13 | /// let tokens : Vec<_> = spell::tokenize("a,b c", &[' ', ',']).collect(); 14 | /// 15 | /// assert_eq!(vec!["a", "b", "c"], tokens); 16 | /// ``` 17 | pub fn tokenize<'a>(input: &'a str, delimiters: &'a [char]) -> impl Iterator { 18 | input 19 | .trim() 20 | .split(&delimiters[..]) 21 | .filter(|s| !s.is_empty()) 22 | } 23 | 24 | #[cfg(test)] 25 | mod tests { 26 | use super::*; 27 | 28 | #[test] 29 | fn tokenization() { 30 | let input = "Command Failed on: node-127,node-234"; 31 | let tokenized: Vec<_> = tokenize(input, &[' ']).collect(); 32 | let expected = vec!["Command", "Failed", "on:", "node-127,node-234"]; 33 | assert_eq!(tokenized, expected); 34 | } 35 | 36 | #[test] 37 | fn tokenization_with_multiple_delimiters() { 38 | let input = "Command Failed on: node-127,node-234"; 39 | let tokenized: Vec<_> = tokenize(input, &[' ', ',', ':']).collect(); 40 | let expected = vec!["Command", "Failed", "on", "node-127", "node-234"]; 41 | assert_eq!(tokenized, expected); 42 | } 43 | 44 | #[test] 45 | fn tokenization_with_no_delimiters() { 46 | let input = "Command Failed on: node-127,node-234"; 47 | let tokenized: Vec<_> = tokenize(input, &[]).collect(); 48 | assert_eq!(tokenized, [input]); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/map.rs: -------------------------------------------------------------------------------- 1 | use serde_derive::*; 2 | 3 | use crate::{object::LcsObject, tokenize, LcsDelimiters, LcsSeq, LineId}; 4 | 5 | #[derive(Default, Debug, PartialEq, Serialize, Deserialize)] 6 | pub struct LcsMap { 7 | pub seq: Vec, 8 | pub line_id: LineId, 9 | pub delimiters: LcsDelimiters, 10 | } 11 | 12 | macro_rules! fold_get_match { 13 | ($iter:expr, $tokenized:expr) => { 14 | $iter.fold((None, 0), |(best_match, best_match_length), obj| { 15 | // Pruning as described in paper 16 | if obj.length() < $tokenized.len() / 2 || obj.length() > $tokenized.len() * 2 { 17 | (best_match, best_match_length) 18 | } else { 19 | let l = obj.get_lcs($tokenized); 20 | if l >= $tokenized.len() / 2 && l > best_match_length { 21 | (Some(obj), l) 22 | } else { 23 | (best_match, best_match_length) 24 | } 25 | } 26 | }) 27 | }; 28 | } 29 | 30 | impl LcsMap { 31 | pub fn new() -> LcsMap { 32 | LcsMap { 33 | delimiters: vec![' '], 34 | ..Default::default() 35 | } 36 | } 37 | 38 | /// Constructor to create an LcsMap with different set of delimiters. 39 | pub fn with_delimiters(delimiters: Vec) -> LcsMap { 40 | LcsMap { 41 | delimiters, 42 | ..LcsMap::new() 43 | } 44 | } 45 | 46 | pub fn insert(&mut self, entry: &str) { 47 | let tokenized: LcsSeq = tokenize(entry, self.delimiters.as_slice()) 48 | .map(|token| token.to_string()) 49 | .collect(); 50 | 51 | let line_id = self.line_id; 52 | 53 | match self.get_match_mut(&tokenized) { 54 | None => { 55 | let obj = LcsObject::new(tokenized, line_id); 56 | self.seq.push(obj); 57 | } 58 | Some(obj) => { 59 | obj.insert(tokenized, line_id); 60 | } 61 | } 62 | self.line_id += 1; 63 | } 64 | 65 | pub fn get_match(&self, tokenized: &LcsSeq) -> Option<&LcsObject> { 66 | let (best_match, _best_match_length) = fold_get_match!(self.seq.iter(), tokenized); 67 | best_match 68 | } 69 | 70 | pub fn get_match_mut(&mut self, tokenized: &LcsSeq) -> Option<&mut LcsObject> { 71 | let (best_match, _best_match_length) = fold_get_match!(self.seq.iter_mut(), tokenized); 72 | best_match 73 | } 74 | 75 | // fn object_at(&self, index: usize) -> Option<&LcsObject> { 76 | // self.seq.get(index) 77 | // } 78 | 79 | fn size(&self) -> usize { 80 | self.seq.len() 81 | } 82 | } 83 | 84 | impl std::fmt::Display for LcsMap { 85 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 86 | write!(f, "\t{} Objects in the LCSMap\n\n", self.size())?; 87 | 88 | let mut entry_count = 0; 89 | for (i, obj) in self.seq.iter().enumerate() { 90 | write!(f, "\tObject {}:\n\t\t{}\n", i, obj.to_string())?; 91 | entry_count += obj.count(); 92 | } 93 | 94 | write!( 95 | f, 96 | "\n\t{} total entries found, {} expected.", 97 | entry_count, self.line_id 98 | ) 99 | } 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | 106 | fn fixtures_input_var_log_messages_lines() -> [&'static str; 6] { 107 | [ 108 | include_str!("../fixtures/input/var_log_messages_line_1.txt"), 109 | include_str!("../fixtures/input/var_log_messages_line_2.txt"), 110 | include_str!("../fixtures/input/var_log_messages_line_3.txt"), 111 | include_str!("../fixtures/input/var_log_messages_line_4.txt"), 112 | include_str!("../fixtures/input/var_log_messages_line_5.txt"), 113 | include_str!("../fixtures/input/var_log_messages_line_6.txt"), 114 | ] 115 | } 116 | 117 | fn fixtures_output_original_impl() -> [&'static str; 6] { 118 | [ 119 | include_str!("../fixtures/output/original/original_after_line_1.txt"), 120 | include_str!("../fixtures/output/original/original_after_line_2.txt"), 121 | include_str!("../fixtures/output/original/original_after_line_3.txt"), 122 | include_str!("../fixtures/output/original/original_after_line_4.txt"), 123 | include_str!("../fixtures/output/original/original_after_line_5.txt"), 124 | include_str!("../fixtures/output/original/original_after_line_6.txt"), 125 | ] 126 | } 127 | 128 | #[test] 129 | fn compare_java() { 130 | let inputs = fixtures_input_var_log_messages_lines(); 131 | let expected = fixtures_output_original_impl(); 132 | 133 | let mut map = LcsMap::new(); 134 | 135 | map.insert(inputs[0]); 136 | let to_check = map.to_string(); 137 | assert_eq!(to_check, expected[0]); 138 | 139 | map.insert(inputs[1]); 140 | let to_check = map.to_string(); 141 | assert_eq!(to_check, expected[1]); 142 | 143 | map.insert(inputs[2]); 144 | let to_check = map.to_string(); 145 | assert_eq!(to_check, expected[2]); 146 | 147 | map.insert(inputs[3]); 148 | let to_check = map.to_string(); 149 | assert_eq!(to_check, expected[3]); 150 | 151 | map.insert(inputs[4]); 152 | let to_check = map.to_string(); 153 | assert_eq!(to_check, expected[4]); 154 | 155 | map.insert(inputs[5]); 156 | let to_check = map.to_string(); 157 | assert_eq!(to_check, expected[5]); 158 | } 159 | 160 | #[test] 161 | fn parse_log() { 162 | let inputs = fixtures_input_var_log_messages_lines(); 163 | 164 | let mut map = LcsMap::new(); 165 | let expected = LcsMap { 166 | seq: Vec::new(), 167 | line_id: 0, 168 | delimiters: vec![' '], 169 | }; 170 | assert_eq!(map, expected); 171 | 172 | map.insert(inputs[0]); 173 | let expected = LcsMap { 174 | seq: vec![LcsObject { 175 | tokens: [ 176 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 177 | ] 178 | .iter() 179 | .map(|s| s.to_string()) 180 | .collect(), 181 | lines_ids: vec![0], 182 | }], 183 | line_id: 1, 184 | delimiters: vec![' '], 185 | }; 186 | assert_eq!(map, expected); 187 | 188 | map.insert(inputs[1]); 189 | let expected = LcsMap { 190 | seq: vec![ 191 | LcsObject { 192 | tokens: [ 193 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 194 | ] 195 | .iter() 196 | .map(|s| s.to_string()) 197 | .collect(), 198 | lines_ids: vec![0], 199 | }, 200 | LcsObject { 201 | tokens: [ 202 | "Jan", 203 | "22", 204 | "04:11:04", 205 | "combo", 206 | "logrotate:", 207 | "ALERT", 208 | "exited", 209 | "abnormally", 210 | "with", 211 | "[1]", 212 | ] 213 | .iter() 214 | .map(|s| s.to_string()) 215 | .collect(), 216 | lines_ids: vec![1], 217 | }, 218 | ], 219 | line_id: 2, 220 | delimiters: vec![' '], 221 | }; 222 | assert_eq!(map, expected); 223 | 224 | map.insert(inputs[2]); 225 | let expected = LcsMap { 226 | seq: vec![ 227 | LcsObject { 228 | tokens: [ 229 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 230 | ] 231 | .iter() 232 | .map(|s| s.to_string()) 233 | .collect(), 234 | lines_ids: vec![0], 235 | }, 236 | LcsObject { 237 | tokens: [ 238 | "Jan", 239 | "22", 240 | "04:11:04", 241 | "combo", 242 | "logrotate:", 243 | "ALERT", 244 | "exited", 245 | "abnormally", 246 | "with", 247 | "[1]", 248 | ] 249 | .iter() 250 | .map(|s| s.to_string()) 251 | .collect(), 252 | lines_ids: vec![1], 253 | }, 254 | LcsObject { 255 | tokens: [ 256 | "Jan", 257 | "22", 258 | "04:16:40", 259 | "combo", 260 | "su(pam_unix)[21719]:", 261 | "session", 262 | "opened", 263 | "for", 264 | "user", 265 | "news", 266 | "by", 267 | "(uid=0)", 268 | ] 269 | .iter() 270 | .map(|s| s.to_string()) 271 | .collect(), 272 | lines_ids: vec![2], 273 | }, 274 | ], 275 | line_id: 3, 276 | delimiters: vec![' '], 277 | }; 278 | assert_eq!(map, expected); 279 | 280 | map.insert(inputs[3]); 281 | let expected = LcsMap { 282 | seq: vec![ 283 | LcsObject { 284 | tokens: [ 285 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 286 | ] 287 | .iter() 288 | .map(|s| s.to_string()) 289 | .collect(), 290 | lines_ids: vec![0], 291 | }, 292 | LcsObject { 293 | tokens: [ 294 | "Jan", 295 | "22", 296 | "04:11:04", 297 | "combo", 298 | "logrotate:", 299 | "ALERT", 300 | "exited", 301 | "abnormally", 302 | "with", 303 | "[1]", 304 | ] 305 | .iter() 306 | .map(|s| s.to_string()) 307 | .collect(), 308 | lines_ids: vec![1], 309 | }, 310 | LcsObject { 311 | tokens: [ 312 | "Jan", 313 | "22", 314 | "*", 315 | "combo", 316 | "su(pam_unix)[21719]:", 317 | "session", 318 | "*", 319 | "for", 320 | "user", 321 | "news", 322 | ] 323 | .iter() 324 | .map(|s| s.to_string()) 325 | .collect(), 326 | lines_ids: vec![2, 3], 327 | }, 328 | ], 329 | line_id: 4, 330 | delimiters: vec![' '], 331 | }; 332 | assert_eq!(map, expected); 333 | 334 | map.insert(inputs[4]); 335 | let expected = LcsMap { 336 | seq: vec![ 337 | LcsObject { 338 | tokens: [ 339 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 340 | ] 341 | .iter() 342 | .map(|s| s.to_string()) 343 | .collect(), 344 | lines_ids: vec![0], 345 | }, 346 | LcsObject { 347 | tokens: [ 348 | "Jan", 349 | "22", 350 | "04:11:04", 351 | "combo", 352 | "logrotate:", 353 | "ALERT", 354 | "exited", 355 | "abnormally", 356 | "with", 357 | "[1]", 358 | ] 359 | .iter() 360 | .map(|s| s.to_string()) 361 | .collect(), 362 | lines_ids: vec![1], 363 | }, 364 | LcsObject { 365 | tokens: ["Jan", "22", "*", "combo", "*", "user", "*"] 366 | .iter() 367 | .map(|s| s.to_string()) 368 | .collect(), 369 | lines_ids: vec![2, 3, 4], 370 | }, 371 | ], 372 | line_id: 5, 373 | delimiters: vec![' '], 374 | }; 375 | assert_eq!(map, expected); 376 | 377 | map.insert(inputs[5]); 378 | let expected = LcsMap { 379 | seq: vec![ 380 | LcsObject { 381 | tokens: [ 382 | "Jan", "22", "04:11:04", "combo", "syslogd", "1.4.1:", "restart.", 383 | ] 384 | .iter() 385 | .map(|s| s.to_string()) 386 | .collect(), 387 | lines_ids: vec![0], 388 | }, 389 | LcsObject { 390 | tokens: [ 391 | "Jan", 392 | "22", 393 | "04:11:04", 394 | "combo", 395 | "logrotate:", 396 | "ALERT", 397 | "exited", 398 | "abnormally", 399 | "with", 400 | "[1]", 401 | ] 402 | .iter() 403 | .map(|s| s.to_string()) 404 | .collect(), 405 | lines_ids: vec![1], 406 | }, 407 | LcsObject { 408 | tokens: ["Jan", "22", "*", "combo", "*", "user", "*"] 409 | .iter() 410 | .map(|s| s.to_string()) 411 | .collect(), 412 | lines_ids: vec![2, 3, 4], 413 | }, 414 | LcsObject { 415 | tokens: [ 416 | "Jan", 417 | "22", 418 | "05:23:19", 419 | "combo", 420 | "sshd(pam_unix)[24892]:", 421 | "authentication", 422 | "failure;", 423 | "logname=", 424 | "uid=0", 425 | "euid=0", 426 | "tty=NODEVssh", 427 | "ruser=", 428 | "rhost=server3.sugolan.hu", 429 | ] 430 | .iter() 431 | .map(|s| s.to_string()) 432 | .collect(), 433 | lines_ids: vec![5], 434 | }, 435 | ], 436 | line_id: 6, 437 | delimiters: vec![' '], 438 | }; 439 | assert_eq!(map, expected); 440 | } 441 | 442 | #[test] 443 | fn parse_log_with_delimiters() { 444 | let inputs = fixtures_input_var_log_messages_lines(); 445 | 446 | let mut map = LcsMap::with_delimiters(vec![' ', ':']); 447 | let expected = LcsMap { 448 | seq: Vec::new(), 449 | line_id: 0, 450 | delimiters: vec![' ', ':'], 451 | }; 452 | assert_eq!(map, expected); 453 | 454 | map.insert(inputs[0]); 455 | let expected = LcsMap { 456 | seq: vec![LcsObject { 457 | tokens: [ 458 | "Jan", "22", "04", "11", "04", "combo", "syslogd", "1.4.1", "restart.", 459 | ] 460 | .iter() 461 | .map(|s| s.to_string()) 462 | .collect(), 463 | lines_ids: vec![0], 464 | }], 465 | line_id: 1, 466 | delimiters: vec![' ', ':'], 467 | }; 468 | assert_eq!(map, expected); 469 | } 470 | } 471 | -------------------------------------------------------------------------------- /src/object.rs: -------------------------------------------------------------------------------- 1 | use serde_derive::*; 2 | 3 | use crate::{tokenize, LcsSeq, LineId, LcsDelimiters}; 4 | 5 | #[derive(Default, Debug, PartialEq, Serialize, Deserialize)] 6 | pub struct LcsObject { 7 | pub tokens: LcsSeq, 8 | pub lines_ids: Vec, 9 | } 10 | 11 | impl LcsObject { 12 | pub fn from_str(s: &str, line_id: LineId, delimiters: LcsDelimiters) -> LcsObject { 13 | let tokens = tokenize(s, delimiters.as_slice()).map(|s| s.to_string()).collect(); 14 | LcsObject::new(tokens, line_id) 15 | } 16 | 17 | pub fn from_str_slice(tokens: &[&str], line_id: LineId) -> LcsObject { 18 | LcsObject { 19 | tokens: tokens.iter().map(|s| s.to_string()).collect(), 20 | lines_ids: vec![line_id], 21 | } 22 | } 23 | 24 | pub fn new(tokens: LcsSeq, line_id: LineId) -> LcsObject { 25 | LcsObject { 26 | tokens, 27 | lines_ids: vec![line_id], 28 | } 29 | } 30 | 31 | pub fn get_lcs(&self, other: &LcsSeq) -> usize { 32 | let mut count = 0; 33 | 34 | // Simple loop implementation from the paper 35 | let mut last_match = 0; 36 | for s_left in self.tokens.iter().filter(|s| *s != "*") { 37 | for (j, s_right) in other.iter().enumerate().skip(last_match) { 38 | if s_left == s_right { 39 | last_match = j + 1; 40 | count += 1; 41 | break; 42 | } 43 | } 44 | } 45 | 46 | count 47 | } 48 | 49 | pub fn insert(&mut self, tokens: LcsSeq, line_id: LineId) { 50 | self.lines_ids.push(line_id); 51 | let mut tmp = String::with_capacity(256); 52 | let mut last_match = 0; 53 | let mut placeholder = false; 54 | for s_left in self.tokens.iter() { 55 | if s_left == "*" { 56 | if !placeholder { 57 | tmp.push_str("* "); 58 | } 59 | placeholder = true; 60 | continue; 61 | } 62 | 63 | for (j, s_right) in tokens.iter().enumerate().skip(last_match) { 64 | if s_left == s_right { 65 | placeholder = false; 66 | tmp.push_str(&s_left); 67 | tmp.push(' '); 68 | last_match = j + 1; 69 | break; 70 | } else if !placeholder { 71 | tmp.push_str("* "); 72 | placeholder = true; 73 | } 74 | } 75 | } 76 | 77 | self.tokens = tmp.trim().split(' ').map(|s| s.to_string()).collect(); 78 | } 79 | 80 | pub fn length(&self) -> usize { 81 | self.tokens.len() 82 | } 83 | 84 | pub fn count(&self) -> usize { 85 | self.lines_ids.len() 86 | } 87 | } 88 | 89 | impl std::fmt::Display for LcsObject { 90 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 91 | write!( 92 | f, 93 | "{}\n\t\t{{{}}}", 94 | self.tokens 95 | .iter() 96 | .map(|s| &**s) 97 | .collect::>() 98 | .join(" "), 99 | self.lines_ids 100 | .iter() 101 | .map(|i| i.to_string()) 102 | .collect::>() 103 | .join(", ") 104 | ) 105 | } 106 | } 107 | 108 | #[cfg(test)] 109 | mod tests { 110 | use super::*; 111 | 112 | #[test] 113 | fn constructor() { 114 | let input = "Command Failed on: node-127"; 115 | let _lcs_object = LcsObject::from_str(input, 0, vec![' ']); 116 | } 117 | } 118 | --------------------------------------------------------------------------------