├── .gitignore ├── .github ├── dependabot.yml └── workflows │ ├── release.yml │ └── ci.yml ├── .pre-commit-config.yaml ├── LICENSE.txt ├── Cargo.toml ├── src ├── lib.rs ├── severity.rs ├── facility.rs ├── message.rs └── parser.rs ├── README.md ├── CHANGES.md └── examples └── bench.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | 3 | Cargo\.lock 4 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | day: "wednesday" 8 | reviewers: 9 | - Roguelazer 10 | - package-ecosystem: "cargo" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | day: "wednesday" 15 | reviewers: 16 | - Roguelazer 17 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish to crates.io 2 | on: 3 | push: 4 | tags: ['v*'] # Triggers when pushing tags starting with 'v' 5 | jobs: 6 | publish: 7 | runs-on: ubuntu-latest 8 | environment: release 9 | permissions: 10 | id-token: write 11 | steps: 12 | - uses: actions/checkout@v6 13 | - uses: rust-lang/crates-io-auth-action@v1 14 | id: auth 15 | - run: cargo publish 16 | env: 17 | CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }} 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v6.0.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | - id: check-case-conflict 8 | - id: check-illegal-windows-names 9 | - id: check-merge-conflict 10 | - id: check-symlinks 11 | - id: check-toml 12 | - id: check-yaml 13 | - id: check-json 14 | - repo: https://github.com/google/yamlfmt 15 | rev: v0.17.2 16 | hooks: 17 | - id: yamlfmt 18 | - repo: https://github.com/rhysd/actionlint 19 | rev: v1.7.8 20 | hooks: 21 | - id: actionlint 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2016-2019 James Brown 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. 4 | 5 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "syslog_rfc5424" 3 | version = "0.10.0" 4 | authors = ["James Brown "] 5 | description = "Parser for RFC5424 (IETF-format) syslog messages" 6 | documentation = "https://docs.rs/syslog_rfc5424/" 7 | homepage = "https://github.com/Roguelazer/rust-syslog-rfc5424" 8 | repository = "https://github.com/Roguelazer/rust-syslog-rfc5424" 9 | license = "ISC" 10 | readme = "README.md" 11 | edition = "2024" 12 | rust-version = "1.85" 13 | 14 | [dependencies] 15 | time = "0.3" 16 | serde = { version = "1.0", optional = true, features=["derive"] } 17 | serde_json = { version = "1.0", optional = true } 18 | thiserror = "2.0" 19 | 20 | [dev-dependencies] 21 | timeit = { version = "0.1", git = "https://github.com/Roguelazer/timeit", rev = "9e9f2e1b9ab9537a72fc4e59ccfc1e89b5b51239" } 22 | 23 | [features] 24 | serde-serialize = ["serde", "serde_json"] 25 | 26 | [package.metadata.docs.rs] 27 | all-features = true 28 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: [master, main] 5 | pull_request: 6 | env: 7 | CARGO_TERM_COLOR: always 8 | jobs: 9 | pre-commit: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v6 13 | - uses: actions/setup-python@v6 14 | with: 15 | python-version: "3.13" 16 | - uses: pre-commit/action@v3.0.1 17 | style: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v6 22 | - uses: dtolnay/rust-toolchain@stable 23 | with: 24 | components: rustfmt,clippy 25 | toolchain: 1.91.1 26 | - uses: Swatinem/rust-cache@v2 27 | - name: cargo fmt 28 | run: cargo fmt --all -- --check 29 | - name: cargo clippy 30 | run: cargo clippy -- -D warnings 31 | test: 32 | runs-on: ubuntu-latest 33 | strategy: 34 | matrix: 35 | rust: ["1.85.0", stable, beta] 36 | steps: 37 | - uses: actions/checkout@v6 38 | - uses: dtolnay/rust-toolchain@stable 39 | with: 40 | toolchain: ${{ matrix.rust }} 41 | - uses: Swatinem/rust-cache@v2 42 | - name: Test 43 | run: cargo test --all-features 44 | audit: 45 | runs-on: ubuntu-latest 46 | steps: 47 | - uses: actions/checkout@v6 48 | - uses: dtolnay/rust-toolchain@stable 49 | with: 50 | toolchain: stable 51 | - uses: taiki-e/install-action@v2 52 | with: 53 | tool: cargo-audit 54 | - run: cargo audit 55 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Parser for [RFC 5424](https://tools.ietf.org/html/rfc5424) Syslog messages. Not to be confused 2 | //! with the older [RFC 3164](https://tools.ietf.org/html/rfc3164) BSD Syslog protocol, which many 3 | //! systems still emit. 4 | //! 5 | //! In particular, supports the Structured Data fields. 6 | //! 7 | //! Usually, you'll just call the (re-exported) `parse_message` function with a stringy object. 8 | //! 9 | //! # Example 10 | //! 11 | //! A simple syslog server 12 | //! 13 | //! ```no_run 14 | //! use syslog_rfc5424::SyslogMessage; 15 | //! use std::net::UdpSocket; 16 | //! use std::str; 17 | //! 18 | //! let s = UdpSocket::bind("127.0.0.1:10514").unwrap(); 19 | //! let mut buf = [0u8; 2048]; 20 | //! loop { 21 | //! let (data_read, _) = s.recv_from(&mut buf).unwrap(); 22 | //! let msg = str::from_utf8(&buf[0..data_read]).unwrap().parse::().unwrap(); 23 | //! println!("{:?} {:?} {:?} {:?}", msg.facility, msg.severity, msg.hostname, msg.msg); 24 | //! } 25 | //! ``` 26 | //! 27 | //! # Unimplemented Features 28 | //! 29 | //! * Theoretically, you can send arbitrary (non-unicode) bytes for the message part of a syslog 30 | //! message. Rust doesn't have a convenient way to only treat *some* of a buffer as utf-8, 31 | //! so I'm just not supporting that. Most "real" syslog servers barf on it anway. 32 | //! 33 | mod facility; 34 | pub mod message; 35 | pub mod parser; 36 | mod severity; 37 | 38 | pub use facility::SyslogFacility; 39 | pub use severity::SyslogSeverity; 40 | 41 | pub use message::SyslogMessage; 42 | pub use parser::parse_message; 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This module implements an [RFC 5424](https://tools.ietf.org/html/rfc5424) IETF Syslog Protocol parser in Rust. 2 | 3 | [![CI](https://github.com/Roguelazer/rust-syslog-rfc5424/workflows/CI/badge.svg?branch=main)](https://github.com/Roguelazer/rust-syslog-rfc5424/actions/workflows/ci.yml) 4 | [![Documentation](https://docs.rs/syslog_rfc5424/badge.svg)](https://docs.rs/syslog_rfc5424) 5 | [![crates.io](https://img.shields.io/crates/v/syslog_rfc5424.svg)](https://crates.io/crates/syslog_rfc5424) 6 | 7 | This tool supports serializing the parsed messages using serde if it's built with the `serde-serialize` feature. 8 | 9 | This library is licensed under the ISC license, a copy of which can be found in [LICENSE.txt](LICENSE.txt) 10 | 11 | The minimum supported Rust version for this library is 1.85. 12 | 13 | ## Performance 14 | 15 | On a recent system[1](#sysfootnote), a release build takes approximately 8µs to parse an average message and approximately 300ns to parse the smallest legal message. Debug timings are a bit worse -- about 60µs for an average message and about 8µs for the minimal message. A single-threaded Syslog server should be able to parse at least 100,000 messages/s, as long as you run a separate thread for the parser. 16 | 17 | This compares *very* favorably to [python syslog-rfc5424-parser](https://github.com/EasyPost/syslog-rfc5424-parser)[2](#fn2), which takes about 300µs for a minimal message, and more than 700µs for an average message. 18 | 19 | ## Footnotes 20 | 21 | * 1: An Intel i7-4850HQ in a 2013 rMBP 22 | * 2: Which I also wrote 23 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | 0.10.0 (2025-11-11) 2 | ------------------- 3 | - Bump to Edition 2024 and MSRV 1.85 4 | - Rename primary branch from `master` to `main` 5 | 6 | 0.9.1 (2025-11-11) 7 | ------------------ 8 | - Allow input timestamps to have 9 digits of fractional precision, even though RFC 5424 says 6 (@cbeck88, #26) 9 | - Fix warning in `parse_param_value` on newer rustc versions 10 | - Add dependabot 11 | 12 | 0.9.0 (2022-07-15) 13 | ------------------ 14 | - Allow inserting empty structured data through the `.entry()` method on StructuredData (@thijsc, #22) 15 | 16 | 0.8.0 (2022-01-28) 17 | ------------------ 18 | - Upgrade `time` dependency to 0.3 to resolve cargo audits 19 | - Switch from Travis-CI to Github Actions for CI 20 | 21 | 0.7.0 (2020-09-24) 22 | ------------------ 23 | - Bump to Rust 2018 edition 24 | - Bump MSRV to 1.34 25 | - Add public `TryFrom` implementations for severity and facility (requested in #16) 26 | - rustfmt/clippyize/etc 27 | 28 | 0.6.1 (2019-01-19) 29 | ------------------ 30 | - Fix sign error in numeric timezone offsets (thanks to @main-- for reporting this on GitHub) 31 | 32 | 0.6.0 (2018-07-14) 33 | ------------------ 34 | - Parse subsecond part of timestamps and include it as the `timestamp_nanos` field (thanks @bwtril-justin) 35 | 36 | 0.5.1 (2018-05-15) 37 | ------------------ 38 | - Allow terms (hostnames, appnames) to start with a hyphen 39 | 40 | 0.5.0 (2018-05-15) 41 | ------------------ 42 | - Remove `Severity::from_int` 43 | - Rename `ProcIdType` to `ProcId` 44 | - Remove rustc-serialize 45 | - Implement `FromStr` for `SyslogMessage`, allowing more idiomatic parsing 46 | - Implement Ord/PartialOrd/Eq/PartialEq in more places 47 | - Make clippy and rustfmt happy 48 | 49 | 0.4.2 (2018-05-15) 50 | ------------------ 51 | - Make `docs.rs` build with all features 52 | 53 | 0.4.1 (2018-05-15) 54 | ------------------ 55 | - Fix bug parsing message with non-empty SD fields but empty message body 56 | 57 | 0.4.0 (2017-10-24) 58 | ---------- 59 | - Make `rustc-serialize` support optional behind the self-named feature flag 60 | - Add optional `serde` support behind the `serde-serialize` feature flag 61 | 62 | 0.3.1 (2017-10-24) 63 | ----------- 64 | - Use AsRef in the message parser instead of Into, since we do not *need* ownership 65 | - Support sub-second timestamp resolution (Fixes #5 / #6) 66 | - Add more tests 67 | - Fix various clippy concerns 68 | 69 | 0.3.0 (2016-05-30) 70 | ------------------ 71 | - add Deref to StructuredMessage (#4, via @pzol) 72 | - return more references instead of strings in the parser (#3) 73 | 74 | 0.2.0 (2016-02-22) 75 | ------------------ 76 | - add rustc_serialize integration 77 | - store structured data in a map instead in nested structs 78 | -------------------------------------------------------------------------------- /src/severity.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryFrom; 2 | 3 | #[cfg(feature = "serde-serialize")] 4 | use serde::{Serialize, Serializer}; 5 | 6 | use thiserror::Error; 7 | 8 | #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 9 | #[allow(non_camel_case_types)] 10 | /// Syslog Severities from RFC 5424. 11 | pub enum SyslogSeverity { 12 | SEV_EMERG = 0, 13 | SEV_ALERT = 1, 14 | SEV_CRIT = 2, 15 | SEV_ERR = 3, 16 | SEV_WARNING = 4, 17 | SEV_NOTICE = 5, 18 | SEV_INFO = 6, 19 | SEV_DEBUG = 7, 20 | } 21 | 22 | #[derive(Debug, Error)] 23 | pub enum SyslogSeverityError { 24 | #[error("integer does not correspond to a known severity")] 25 | InvalidInteger, 26 | } 27 | 28 | impl TryFrom for SyslogSeverity { 29 | type Error = SyslogSeverityError; 30 | 31 | #[inline(always)] 32 | fn try_from(i: i32) -> Result { 33 | Ok(match i { 34 | 0 => SyslogSeverity::SEV_EMERG, 35 | 1 => SyslogSeverity::SEV_ALERT, 36 | 2 => SyslogSeverity::SEV_CRIT, 37 | 3 => SyslogSeverity::SEV_ERR, 38 | 4 => SyslogSeverity::SEV_WARNING, 39 | 5 => SyslogSeverity::SEV_NOTICE, 40 | 6 => SyslogSeverity::SEV_INFO, 41 | 7 => SyslogSeverity::SEV_DEBUG, 42 | _ => return Err(SyslogSeverityError::InvalidInteger), 43 | }) 44 | } 45 | } 46 | 47 | impl SyslogSeverity { 48 | /// Convert an int (as used in the wire serialization) into a `SyslogSeverity` 49 | /// 50 | /// Returns an Option, but the wire protocol will only include 0..7, so should 51 | /// never return None in practical usage. 52 | pub(crate) fn from_int(i: i32) -> Option { 53 | Self::try_from(i).ok() 54 | } 55 | 56 | /// Convert a syslog severity into a unique string representation 57 | pub fn as_str(self) -> &'static str { 58 | match self { 59 | SyslogSeverity::SEV_EMERG => "emerg", 60 | SyslogSeverity::SEV_ALERT => "alert", 61 | SyslogSeverity::SEV_CRIT => "crit", 62 | SyslogSeverity::SEV_ERR => "err", 63 | SyslogSeverity::SEV_WARNING => "warning", 64 | SyslogSeverity::SEV_NOTICE => "notice", 65 | SyslogSeverity::SEV_INFO => "info", 66 | SyslogSeverity::SEV_DEBUG => "debug", 67 | } 68 | } 69 | } 70 | 71 | #[cfg(feature = "serde-serialize")] 72 | impl Serialize for SyslogSeverity { 73 | fn serialize(&self, ser: S) -> Result { 74 | ser.serialize_str(self.as_str()) 75 | } 76 | } 77 | 78 | #[cfg(test)] 79 | mod tests { 80 | use super::SyslogSeverity; 81 | 82 | #[test] 83 | fn test_deref() { 84 | assert_eq!(SyslogSeverity::SEV_EMERG.as_str(), "emerg"); 85 | assert_eq!(SyslogSeverity::SEV_ALERT.as_str(), "alert"); 86 | assert_eq!(SyslogSeverity::SEV_CRIT.as_str(), "crit"); 87 | assert_eq!(SyslogSeverity::SEV_ERR.as_str(), "err"); 88 | assert_eq!(SyslogSeverity::SEV_WARNING.as_str(), "warning"); 89 | assert_eq!(SyslogSeverity::SEV_NOTICE.as_str(), "notice"); 90 | assert_eq!(SyslogSeverity::SEV_INFO.as_str(), "info"); 91 | assert_eq!(SyslogSeverity::SEV_DEBUG.as_str(), "debug"); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /examples/bench.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate timeit; 3 | 4 | use syslog_rfc5424::parse_message; 5 | 6 | // Stupid benchmark tool using the timeit! macro because the official benchmarking tools are 7 | // **still* nightly-Rust-only, even though they're, like, a year old 8 | 9 | #[cfg(feature = "serde-serialize")] 10 | fn bench_serde() { 11 | println!("Parsing an average message and encoding it to json with serde"); 12 | let average_message = r#"<29>1 2016-02-21T04:32:57+00:00 web1 someservice - - [origin x-service="someservice"][meta sequenceId="14125553"] 127.0.0.1 - - 1456029177 "GET /v1/ok HTTP/1.1" 200 145 "-" "hacheck 0.9.0" 24306 127.0.0.1:40124 575"#; 13 | timeit!({ 14 | let m = parse_message(average_message).unwrap(); 15 | serde_json::to_string(&m).unwrap(); 16 | }); 17 | 18 | let average_message = r#"<14>1 2017-07-26T14:47:35.869952+05:30 my_hostname custom_appname 5678 some_unique_msgid - \u{feff}Some other message"#; 19 | timeit!({ 20 | let m = parse_message(average_message).unwrap(); 21 | serde_json::to_string(&m).unwrap(); 22 | }); 23 | } 24 | 25 | fn main() { 26 | println!("Parsing the smallest possible message:"); 27 | let simple_message = "<1>1 - - - - - -"; 28 | timeit!({ 29 | parse_message(simple_message).unwrap(); 30 | }); 31 | println!("Parsing a complicated message:"); 32 | let complicated_message = "<78>1 2016-01-15T00:04:01Z host1 CROND 10391 - [meta sequenceId=\"29\" sequenceBlah=\"foo\"][my key=\"value\"] some_message"; 33 | timeit!({ 34 | parse_message(complicated_message).unwrap(); 35 | }); 36 | println!("Parsing a very long message:"); 37 | let large_message = r#"<190>1 2016-02-21T01:19:11+00:00 batch6sj - - - [meta sequenceId="21881798" x-group="37051387"][origin x-service="tracking"] metascutellar conversationalist nephralgic exogenetic graphy streng outtaken acouasm amateurism prenotice Lyonese bedull antigrammatical diosphenol gastriloquial bayoneteer sweetener naggy roughhouser dighter addend sulphacid uneffectless ferroprussiate reveal Mazdaist plaudite Australasian distributival wiseman rumness Seidel topazine shahdom sinsion mesmerically pinguedinous ophthalmotonometer scuppler wound eciliate expectedly carriwitchet dictatorialism bindweb pyelitic idic atule kokoon poultryproof rusticial seedlip nitrosate splenadenoma holobenthic uneternal Phocaean epigenic doubtlessly indirection torticollar robomb adoptedly outspeak wappenschawing talalgia Goop domitic savola unstrafed carded unmagnified mythologically orchester obliteration imperialine undisobeyed galvanoplastical cycloplegia quinquennia foremean umbonal marcgraviaceous happenstance theoretical necropoles wayworn Igbira pseudoangelic raising unfrounced lamasary centaurial Japanolatry microlepidoptera"#; 38 | timeit!({ 39 | parse_message(large_message).unwrap(); 40 | }); 41 | println!("Parsing an average message:"); 42 | let average_message = r#"<29>1 2016-02-21T04:32:57+00:00 web1 someservice - - [origin x-service="someservice"][meta sequenceId="14125553"] 127.0.0.1 - - 1456029177 "GET /v1/ok HTTP/1.1" 200 145 "-" "hacheck 0.9.0" 24306 127.0.0.1:40124 575"#; 43 | timeit!({ 44 | parse_message(average_message).unwrap(); 45 | }); 46 | #[cfg(feature = "serde-serialize")] 47 | bench_serde(); 48 | } 49 | -------------------------------------------------------------------------------- /src/facility.rs: -------------------------------------------------------------------------------- 1 | #[cfg(feature = "serde-serialize")] 2 | use serde::{Serialize, Serializer}; 3 | 4 | use std::convert::TryFrom; 5 | 6 | use thiserror::Error; 7 | 8 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd)] 9 | #[allow(non_camel_case_types)] 10 | /// Syslog facilities. Taken From RFC 5424, but I've heard that some platforms mix these around. 11 | /// Names are from Linux. 12 | pub enum SyslogFacility { 13 | LOG_KERN = 0, 14 | LOG_USER = 1, 15 | LOG_MAIL = 2, 16 | LOG_DAEMON = 3, 17 | LOG_AUTH = 4, 18 | LOG_SYSLOG = 5, 19 | LOG_LPR = 6, 20 | LOG_NEWS = 7, 21 | LOG_UUCP = 8, 22 | LOG_CRON = 9, 23 | LOG_AUTHPRIV = 10, 24 | LOG_FTP = 11, 25 | LOG_NTP = 12, 26 | LOG_AUDIT = 13, 27 | LOG_ALERT = 14, 28 | LOG_CLOCKD = 15, 29 | LOG_LOCAL0 = 16, 30 | LOG_LOCAL1 = 17, 31 | LOG_LOCAL2 = 18, 32 | LOG_LOCAL3 = 19, 33 | LOG_LOCAL4 = 20, 34 | LOG_LOCAL5 = 21, 35 | LOG_LOCAL6 = 22, 36 | LOG_LOCAL7 = 23, 37 | } 38 | 39 | #[derive(Debug, Error)] 40 | pub enum SyslogFacilityError { 41 | #[error("integer does not correspond to a known facility")] 42 | InvalidInteger, 43 | } 44 | 45 | impl TryFrom for SyslogFacility { 46 | type Error = SyslogFacilityError; 47 | 48 | #[inline(always)] 49 | fn try_from(i: i32) -> Result { 50 | Ok(match i { 51 | 0 => SyslogFacility::LOG_KERN, 52 | 1 => SyslogFacility::LOG_USER, 53 | 2 => SyslogFacility::LOG_MAIL, 54 | 3 => SyslogFacility::LOG_DAEMON, 55 | 4 => SyslogFacility::LOG_AUTH, 56 | 5 => SyslogFacility::LOG_SYSLOG, 57 | 6 => SyslogFacility::LOG_LPR, 58 | 7 => SyslogFacility::LOG_NEWS, 59 | 8 => SyslogFacility::LOG_UUCP, 60 | 9 => SyslogFacility::LOG_CRON, 61 | 10 => SyslogFacility::LOG_AUTHPRIV, 62 | 11 => SyslogFacility::LOG_FTP, 63 | 12 => SyslogFacility::LOG_NTP, 64 | 13 => SyslogFacility::LOG_AUDIT, 65 | 14 => SyslogFacility::LOG_ALERT, 66 | 15 => SyslogFacility::LOG_CLOCKD, 67 | 16 => SyslogFacility::LOG_LOCAL0, 68 | 17 => SyslogFacility::LOG_LOCAL1, 69 | 18 => SyslogFacility::LOG_LOCAL2, 70 | 19 => SyslogFacility::LOG_LOCAL3, 71 | 20 => SyslogFacility::LOG_LOCAL4, 72 | 21 => SyslogFacility::LOG_LOCAL5, 73 | 22 => SyslogFacility::LOG_LOCAL6, 74 | 23 => SyslogFacility::LOG_LOCAL7, 75 | _ => return Err(SyslogFacilityError::InvalidInteger), 76 | }) 77 | } 78 | } 79 | 80 | impl SyslogFacility { 81 | /// Convert an int (as used in the wire serialization) into a `SyslogFacility` 82 | pub(crate) fn from_int(i: i32) -> Option { 83 | Self::try_from(i).ok() 84 | } 85 | 86 | /// Convert a syslog facility into a unique string representation 87 | pub fn as_str(self) -> &'static str { 88 | match self { 89 | SyslogFacility::LOG_KERN => "kern", 90 | SyslogFacility::LOG_USER => "user", 91 | SyslogFacility::LOG_MAIL => "mail", 92 | SyslogFacility::LOG_DAEMON => "daemon", 93 | SyslogFacility::LOG_AUTH => "auth", 94 | SyslogFacility::LOG_SYSLOG => "syslog", 95 | SyslogFacility::LOG_LPR => "lpr", 96 | SyslogFacility::LOG_NEWS => "news", 97 | SyslogFacility::LOG_UUCP => "uucp", 98 | SyslogFacility::LOG_CRON => "cron", 99 | SyslogFacility::LOG_AUTHPRIV => "authpriv", 100 | SyslogFacility::LOG_FTP => "ftp", 101 | SyslogFacility::LOG_NTP => "ntp", 102 | SyslogFacility::LOG_AUDIT => "audit", 103 | SyslogFacility::LOG_ALERT => "alert", 104 | SyslogFacility::LOG_CLOCKD => "clockd", 105 | SyslogFacility::LOG_LOCAL0 => "local0", 106 | SyslogFacility::LOG_LOCAL1 => "local1", 107 | SyslogFacility::LOG_LOCAL2 => "local2", 108 | SyslogFacility::LOG_LOCAL3 => "local3", 109 | SyslogFacility::LOG_LOCAL4 => "local4", 110 | SyslogFacility::LOG_LOCAL5 => "local5", 111 | SyslogFacility::LOG_LOCAL6 => "local6", 112 | SyslogFacility::LOG_LOCAL7 => "local7", 113 | } 114 | } 115 | } 116 | 117 | #[cfg(feature = "serde-serialize")] 118 | impl Serialize for SyslogFacility { 119 | fn serialize(&self, ser: S) -> Result { 120 | ser.serialize_str(self.as_str()) 121 | } 122 | } 123 | 124 | #[cfg(test)] 125 | mod tests { 126 | use super::SyslogFacility; 127 | 128 | #[test] 129 | fn test_deref() { 130 | assert_eq!(SyslogFacility::LOG_KERN.as_str(), "kern"); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/message.rs: -------------------------------------------------------------------------------- 1 | //! In-memory representation of a single Syslog message. 2 | 3 | use std::cmp::Ordering; 4 | use std::collections::BTreeMap; 5 | use std::convert::Into; 6 | use std::ops; 7 | use std::str::FromStr; 8 | use std::string::String; 9 | 10 | #[cfg(feature = "serde-serialize")] 11 | use serde::{Serialize, Serializer}; 12 | 13 | #[allow(non_camel_case_types)] 14 | pub type time_t = i64; 15 | #[allow(non_camel_case_types)] 16 | pub type pid_t = i32; 17 | #[allow(non_camel_case_types)] 18 | pub type msgid_t = String; 19 | 20 | use crate::facility; 21 | use crate::parser; 22 | use crate::severity; 23 | 24 | #[derive(Clone, Debug, PartialEq, Eq)] 25 | /// `ProcID`s are usually numeric PIDs; however, on some systems, they may be something else 26 | pub enum ProcId { 27 | PID(pid_t), 28 | Name(String), 29 | } 30 | 31 | impl PartialOrd for ProcId { 32 | fn partial_cmp(&self, other: &ProcId) -> Option { 33 | match (self, other) { 34 | (ProcId::PID(s_p), ProcId::PID(o_p)) => Some(s_p.cmp(o_p)), 35 | (ProcId::Name(s_n), ProcId::Name(o_n)) => Some(s_n.cmp(o_n)), 36 | _ => None, 37 | } 38 | } 39 | } 40 | 41 | #[cfg(feature = "serde-serialize")] 42 | impl Serialize for ProcId { 43 | fn serialize(&self, ser: S) -> Result { 44 | match *self { 45 | ProcId::PID(ref p) => ser.serialize_i32(*p), 46 | ProcId::Name(ref n) => ser.serialize_str(n), 47 | } 48 | } 49 | } 50 | 51 | pub type SDIDType = String; 52 | pub type SDParamIDType = String; 53 | pub type SDParamValueType = String; 54 | 55 | pub type StructuredDataElement = BTreeMap; 56 | 57 | #[derive(Clone, Debug, PartialEq, Eq)] 58 | /// Container for the `StructuredData` component of a syslog message. 59 | /// 60 | /// This is a map from `SD_ID` to pairs of `SD_ParamID`, `SD_ParamValue` 61 | /// 62 | /// The spec does not forbid repeated keys. However, for convenience, we *do* forbid repeated keys. 63 | /// That is to say, if you have a message like 64 | /// 65 | /// [foo bar="baz" bar="bing"] 66 | /// 67 | /// There's no way to retrieve the original "baz" mapping. 68 | pub struct StructuredData { 69 | elements: BTreeMap, 70 | } 71 | 72 | impl ops::Deref for StructuredData { 73 | type Target = BTreeMap; 74 | fn deref(&self) -> &Self::Target { 75 | &self.elements 76 | } 77 | } 78 | 79 | #[cfg(feature = "serde-serialize")] 80 | impl Serialize for StructuredData { 81 | fn serialize(&self, ser: S) -> Result { 82 | self.elements.serialize(ser) 83 | } 84 | } 85 | 86 | impl StructuredData { 87 | pub fn new_empty() -> Self { 88 | StructuredData { 89 | elements: BTreeMap::new(), 90 | } 91 | } 92 | 93 | /// Fetch or insert a new sd_id entry into the StructuredData 94 | pub fn entry(&mut self, sd_id: SI) -> &mut BTreeMap 95 | where 96 | SI: Into, 97 | { 98 | self.elements.entry(sd_id.into()).or_default() 99 | } 100 | 101 | /// Insert a new (sd_id, sd_param_id) -> sd_value mapping into the StructuredData 102 | pub fn insert_tuple(&mut self, sd_id: SI, sd_param_id: SPI, sd_param_value: SPV) 103 | where 104 | SI: Into, 105 | SPI: Into, 106 | SPV: Into, 107 | { 108 | self.entry(sd_id) 109 | .insert(sd_param_id.into(), sd_param_value.into()); 110 | } 111 | 112 | /// Lookup by SDID, SDParamID pair 113 | pub fn find_tuple<'b>( 114 | &'b self, 115 | sd_id: &str, 116 | sd_param_id: &str, 117 | ) -> Option<&'b SDParamValueType> { 118 | // TODO: use traits to make these based on the public types instead of &str 119 | if let Some(sub_map) = self.elements.get(sd_id) { 120 | if let Some(value) = sub_map.get(sd_param_id) { 121 | Some(value) 122 | } else { 123 | None 124 | } 125 | } else { 126 | None 127 | } 128 | } 129 | 130 | /// Find all param/value mappings for a given SDID 131 | pub fn find_sdid<'b>(&'b self, sd_id: &str) -> Option<&'b StructuredDataElement> { 132 | self.elements.get(sd_id) 133 | } 134 | 135 | /// The number of distinct SD_IDs 136 | pub fn len(&self) -> usize { 137 | self.elements.len() 138 | } 139 | 140 | /// Whether or not this is empty 141 | pub fn is_empty(&self) -> bool { 142 | self.elements.is_empty() 143 | } 144 | } 145 | 146 | #[cfg_attr(feature = "serde-serialize", derive(Serialize))] 147 | #[derive(Clone, Debug, PartialEq, Eq)] 148 | /// A RFC5424-protocol syslog message 149 | pub struct SyslogMessage { 150 | pub severity: severity::SyslogSeverity, 151 | pub facility: facility::SyslogFacility, 152 | pub version: i32, 153 | pub timestamp: Option, 154 | pub timestamp_nanos: Option, 155 | pub hostname: Option, 156 | pub appname: Option, 157 | pub procid: Option, 158 | pub msgid: Option, 159 | pub sd: StructuredData, 160 | pub msg: String, 161 | } 162 | 163 | impl FromStr for SyslogMessage { 164 | type Err = parser::ParseErr; 165 | 166 | /// Parse a string into a `SyslogMessage` 167 | /// 168 | /// Just calls `parser::parse_message` 169 | fn from_str(s: &str) -> Result { 170 | parser::parse_message(s) 171 | } 172 | } 173 | 174 | #[cfg(test)] 175 | mod tests { 176 | use super::StructuredData; 177 | use super::SyslogMessage; 178 | #[cfg(feature = "serde-serialize")] 179 | use crate::facility::SyslogFacility::*; 180 | #[cfg(feature = "serde-serialize")] 181 | use crate::severity::SyslogSeverity::*; 182 | #[cfg(feature = "serde-serialize")] 183 | use serde_json; 184 | 185 | #[test] 186 | fn test_structured_data_basic() { 187 | let mut s = StructuredData::new_empty(); 188 | s.insert_tuple("foo", "bar", "baz"); 189 | let v = s.find_tuple("foo", "bar").expect("should find foo/bar"); 190 | assert_eq!(v, "baz"); 191 | assert!(s.find_tuple("foo", "baz").is_none()); 192 | } 193 | 194 | #[cfg(feature = "serde-serialize")] 195 | #[test] 196 | fn test_structured_data_serialization_serde() { 197 | let mut s = StructuredData::new_empty(); 198 | s.insert_tuple("foo", "bar", "baz"); 199 | s.insert_tuple("foo", "baz", "bar"); 200 | s.insert_tuple("faa", "bar", "baz"); 201 | let encoded = serde_json::to_string(&s).expect("Should encode to JSON"); 202 | assert_eq!( 203 | encoded, 204 | r#"{"faa":{"bar":"baz"},"foo":{"bar":"baz","baz":"bar"}}"# 205 | ); 206 | } 207 | 208 | #[cfg(feature = "serde-serialize")] 209 | #[test] 210 | fn test_serialization_serde() { 211 | let m = SyslogMessage { 212 | severity: SEV_INFO, 213 | facility: LOG_KERN, 214 | version: 1, 215 | timestamp: None, 216 | timestamp_nanos: None, 217 | hostname: None, 218 | appname: None, 219 | procid: None, 220 | msgid: None, 221 | sd: StructuredData::new_empty(), 222 | msg: String::from(""), 223 | }; 224 | 225 | let encoded = serde_json::to_string(&m).expect("Should encode to JSON"); 226 | // XXX: we don't have a guaranteed order, I don't think, so this might break with minor 227 | // version changes. *shrug* 228 | assert_eq!( 229 | encoded, 230 | "{\"severity\":\"info\",\"facility\":\"kern\",\"version\":1,\"timestamp\":null,\"timestamp_nanos\":null,\"hostname\":null,\"appname\":null,\"procid\":null,\"msgid\":null,\"sd\":{},\"msg\":\"\"}" 231 | ); 232 | } 233 | 234 | #[test] 235 | fn test_deref_structureddata() { 236 | let mut s = StructuredData::new_empty(); 237 | s.insert_tuple("foo", "bar", "baz"); 238 | s.insert_tuple("foo", "baz", "bar"); 239 | s.insert_tuple("faa", "bar", "baz"); 240 | assert_eq!("baz", s.get("foo").and_then(|foo| foo.get("bar")).unwrap()); 241 | assert_eq!("bar", s.get("foo").and_then(|foo| foo.get("baz")).unwrap()); 242 | assert_eq!("baz", s.get("faa").and_then(|foo| foo.get("bar")).unwrap()); 243 | } 244 | 245 | #[test] 246 | fn test_fromstr() { 247 | let msg = "<1>1 1985-04-12T23:20:50.52Z host - - - -" 248 | .parse::() 249 | .expect("Should parse empty message"); 250 | assert_eq!(msg.timestamp, Some(482196050)); 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::convert::TryFrom; 3 | use std::num; 4 | use std::str; 5 | use std::str::FromStr; 6 | 7 | use thiserror::Error; 8 | 9 | use crate::facility; 10 | use crate::message::{ProcId, StructuredData, SyslogMessage}; 11 | use crate::severity; 12 | 13 | #[derive(Debug, Error)] 14 | pub enum ParseErr { 15 | #[error("regular expression does not parse")] 16 | RegexDoesNotMatchErr, 17 | #[error("bad severity in message")] 18 | BadSeverityInPri, 19 | #[error("bad facility in message")] 20 | BadFacilityInPri, 21 | #[error("unexpected eof")] 22 | UnexpectedEndOfInput, 23 | #[error("too few digits in numeric field")] 24 | TooFewDigits, 25 | #[error("too many digits in numeric field")] 26 | TooManyDigits, 27 | #[error("invalid UTC offset")] 28 | InvalidUTCOffset, 29 | #[error("unicode error: {0}")] 30 | BaseUnicodeError(#[from] str::Utf8Error), 31 | #[error("unicode error: {0}")] 32 | UnicodeError(#[from] std::string::FromUtf8Error), 33 | #[error("unexpected input at character {0}")] 34 | ExpectedTokenErr(char), 35 | #[error("integer conversion error: {0}")] 36 | IntConversionErr(#[from] num::ParseIntError), 37 | #[error("missing field {0}")] 38 | MissingField(&'static str), 39 | #[error("invalid month number {0}")] 40 | InvalidMonth(u8), 41 | #[error("date had invalid field {0}")] 42 | InvalidDate(String), 43 | #[error("date had invalid UTC offset")] 44 | InvalidOffset, 45 | } 46 | 47 | // We parse with this super-duper-dinky hand-coded recursive descent parser because we don't really 48 | // have much other choice: 49 | // 50 | // - Regexp is much slower (at least a factor of 4), and we still end up having to parse the 51 | // somewhat-irregular SD 52 | // - LALRPOP requires non-ambiguous tokenization 53 | // - Rust-PEG doesn't work on anything except nightly 54 | // 55 | // So here we are. The macros make it a bit better. 56 | // 57 | // General convention is that the parse state is represented by a string slice named "rest"; the 58 | // macros will update that slice as they consume tokens. 59 | 60 | macro_rules! maybe_expect_char { 61 | ($s:expr, $e: expr) => { 62 | match $s.chars().next() { 63 | Some($e) => Some(&$s[1..]), 64 | _ => None, 65 | } 66 | }; 67 | } 68 | 69 | macro_rules! take_item { 70 | ($e:expr, $r:expr) => {{ 71 | let (t, r) = $e?; 72 | $r = r; 73 | t 74 | }}; 75 | } 76 | 77 | type ParseResult = Result; 78 | 79 | macro_rules! take_char { 80 | ($e: expr, $c:expr) => {{ 81 | $e = match $e.chars().next() { 82 | Some($c) => &$e[1..], 83 | Some(_) => { 84 | return Err(ParseErr::ExpectedTokenErr($c)); 85 | } 86 | None => { 87 | return Err(ParseErr::UnexpectedEndOfInput); 88 | } 89 | } 90 | }}; 91 | } 92 | 93 | fn take_while(input: &str, f: F, max_chars: usize) -> (&str, Option<&str>) 94 | where 95 | F: Fn(char) -> bool, 96 | { 97 | for (idx, chr) in input.char_indices() { 98 | if !f(chr) { 99 | return (&input[..idx], Some(&input[idx..])); 100 | } 101 | if idx == max_chars { 102 | return (&input[..idx], Some(&input[idx..])); 103 | } 104 | } 105 | ("", None) 106 | } 107 | 108 | fn parse_sd_id(input: &str) -> ParseResult<(String, &str)> { 109 | let (res, rest) = take_while(input, |c| c != ' ' && c != '=' && c != ']', 128); 110 | Ok(( 111 | String::from(res), 112 | match rest { 113 | Some(s) => s, 114 | None => return Err(ParseErr::UnexpectedEndOfInput), 115 | }, 116 | )) 117 | } 118 | 119 | /** Parse a `param_value`... a.k.a. a quoted string */ 120 | fn parse_param_value(input: &'_ str) -> ParseResult<(Cow<'_, str>, &'_ str)> { 121 | let mut rest = input; 122 | take_char!(rest, '"'); 123 | // Can't do a 0-copy &str slice here because we need to un-escape escaped quotes 124 | // in the string. :-( 125 | let mut result = String::new(); 126 | 127 | let mut saw_any_escapes = false; 128 | let mut escaped = false; 129 | 130 | for (idx, chr) in rest.char_indices() { 131 | if escaped { 132 | escaped = false 133 | } else { 134 | if chr == '\\' { 135 | escaped = true; 136 | if !saw_any_escapes { 137 | result.push_str(&rest[..idx]); 138 | } 139 | saw_any_escapes = true; 140 | continue; 141 | } 142 | if chr == '"' { 143 | let res_cow = if saw_any_escapes { 144 | Cow::Owned(result) 145 | } else { 146 | Cow::Borrowed(&rest[..idx]) 147 | }; 148 | return Ok((res_cow, &rest[(idx + 1)..])); 149 | } 150 | } 151 | if saw_any_escapes { 152 | result.push(chr); 153 | } 154 | } 155 | 156 | Err(ParseErr::UnexpectedEndOfInput) 157 | } 158 | 159 | type ParsedSDParams = Vec<(String, String)>; 160 | 161 | fn parse_sd_params(input: &str) -> ParseResult<(ParsedSDParams, &str)> { 162 | let mut params = Vec::new(); 163 | let mut top = input; 164 | loop { 165 | let Some(rest2) = maybe_expect_char!(top, ' ') else { 166 | return Ok((params, top)); 167 | }; 168 | let mut rest = rest2; 169 | let param_name = take_item!(parse_sd_id(rest), rest); 170 | take_char!(rest, '='); 171 | let param_value = take_item!(parse_param_value(rest), rest); 172 | // is there an uglier modifier than &* 173 | params.push((param_name, String::from(&*param_value))); 174 | top = rest; 175 | } 176 | } 177 | 178 | fn parse_sde(sde: &str) -> ParseResult<((String, ParsedSDParams), &str)> { 179 | let mut rest = sde; 180 | take_char!(rest, '['); 181 | let id = take_item!(parse_sd_id(rest), rest); 182 | let params = take_item!(parse_sd_params(rest), rest); 183 | take_char!(rest, ']'); 184 | Ok(((id, params), rest)) 185 | } 186 | 187 | fn parse_sd(structured_data_raw: &str) -> ParseResult<(StructuredData, &str)> { 188 | let mut sd = StructuredData::new_empty(); 189 | if let Some(rest) = structured_data_raw.strip_prefix('-') { 190 | return Ok((sd, rest)); 191 | } 192 | let mut rest = structured_data_raw; 193 | while !rest.is_empty() { 194 | let (sd_id, params) = take_item!(parse_sde(rest), rest); 195 | let sub_map = sd.entry(sd_id.clone()); 196 | for (sd_param_id, sd_param_value) in params { 197 | sub_map.insert(sd_param_id, sd_param_value); 198 | } 199 | if rest.starts_with(' ') { 200 | break; 201 | } 202 | } 203 | Ok((sd, rest)) 204 | } 205 | 206 | fn parse_pri_val(pri: i32) -> ParseResult<(severity::SyslogSeverity, facility::SyslogFacility)> { 207 | let sev = severity::SyslogSeverity::from_int(pri & 0x7).ok_or(ParseErr::BadSeverityInPri)?; 208 | let fac = facility::SyslogFacility::from_int(pri >> 3).ok_or(ParseErr::BadFacilityInPri)?; 209 | Ok((sev, fac)) 210 | } 211 | 212 | /// Parse an i32 213 | fn parse_num(s: &str, min_digits: usize, max_digits: usize) -> ParseResult<(i32, &str)> { 214 | let (res, rest1) = take_while(s, |c| c.is_ascii_digit(), max_digits); 215 | let rest = rest1.ok_or(ParseErr::UnexpectedEndOfInput)?; 216 | if res.len() < min_digits { 217 | Err(ParseErr::TooFewDigits) 218 | } else if res.len() > max_digits { 219 | Err(ParseErr::TooManyDigits) 220 | } else { 221 | Ok(( 222 | i32::from_str(res).map_err(ParseErr::IntConversionErr)?, 223 | rest, 224 | )) 225 | } 226 | } 227 | 228 | /// Parse an i32 229 | fn parse_num_generic(s: &str, min_digits: usize, max_digits: usize) -> ParseResult<(NT, &str)> 230 | where 231 | NT: FromStr, 232 | { 233 | let (res, rest1) = take_while(s, |c| c.is_ascii_digit(), max_digits); 234 | let rest = rest1.ok_or(ParseErr::UnexpectedEndOfInput)?; 235 | if res.len() < min_digits { 236 | Err(ParseErr::TooFewDigits) 237 | } else if res.len() > max_digits { 238 | Err(ParseErr::TooManyDigits) 239 | } else { 240 | Ok((NT::from_str(res).map_err(ParseErr::IntConversionErr)?, rest)) 241 | } 242 | } 243 | 244 | fn parse_decimal(d: &str, min_digits: usize, max_digits: usize) -> ParseResult<(i32, &str)> { 245 | parse_num(d, min_digits, max_digits).map(|(val, s)| { 246 | let mut multiplicand = 1; 247 | let z = 10 - (d.len() - s.len()); 248 | 249 | for _i in 1..(z) { 250 | multiplicand *= 10; 251 | } 252 | (val * multiplicand, s) 253 | }) 254 | } 255 | 256 | fn parse_timestamp(m: &str) -> ParseResult<(Option, &str)> { 257 | let mut rest = m; 258 | if let Some(rest) = rest.strip_prefix('-') { 259 | return Ok((None, rest)); 260 | } 261 | let year = take_item!(parse_num(rest, 4, 4), rest); 262 | take_char!(rest, '-'); 263 | let month_num = take_item!(parse_num_generic(rest, 2, 2), rest); 264 | let month = time::Month::try_from(month_num).map_err(|_| ParseErr::InvalidMonth(month_num))?; 265 | take_char!(rest, '-'); 266 | let mday = take_item!(parse_num_generic(rest, 2, 2), rest); 267 | let date = time::Date::from_calendar_date(year, month, mday) 268 | .map_err(|e| ParseErr::InvalidDate(e.name().to_string()))?; 269 | take_char!(rest, 'T'); 270 | let hour = take_item!(parse_num_generic(rest, 2, 2), rest); 271 | take_char!(rest, ':'); 272 | let minute = take_item!(parse_num_generic(rest, 2, 2), rest); 273 | take_char!(rest, ':'); 274 | let second = take_item!(parse_num_generic(rest, 2, 2), rest); 275 | let nano = if rest.starts_with('.') { 276 | take_char!(rest, '.'); 277 | // Note: RFC states 6 decimals, but here we allow nanosecond precision. 278 | take_item!(parse_decimal(rest, 1, 9), rest) as u32 279 | } else { 280 | 0 281 | }; 282 | let time = time::Time::from_hms_nano(hour, minute, second, nano) 283 | .map_err(|e| ParseErr::InvalidDate(e.name().to_string()))?; 284 | // Tm::utcoff is totally broken, don't use it. 285 | let utc_offset = match rest.chars().next() { 286 | None => None, 287 | Some('Z') => { 288 | rest = &rest[1..]; 289 | None 290 | } 291 | Some(c) => { 292 | let (sign, irest) = match c { 293 | // Note: signs are backwards as per RFC3339 294 | '-' => (-1, &rest[1..]), 295 | '+' => (1, &rest[1..]), 296 | _ => { 297 | return Err(ParseErr::InvalidUTCOffset); 298 | } 299 | }; 300 | let hours = i8::from_str(&irest[0..2]).map_err(ParseErr::IntConversionErr)?; 301 | let minutes = i8::from_str(&irest[3..5]).map_err(ParseErr::IntConversionErr)?; 302 | rest = &irest[5..]; 303 | Some( 304 | time::UtcOffset::from_hms(hours * sign, minutes * sign, 0) 305 | .map_err(|_| ParseErr::InvalidOffset)?, 306 | ) 307 | } 308 | }; 309 | let naive_dt = time::PrimitiveDateTime::new(date, time); 310 | let dt = if let Some(utc_offset) = utc_offset { 311 | naive_dt.assume_offset(utc_offset) 312 | } else { 313 | naive_dt.assume_utc() 314 | }; 315 | Ok((Some(dt), rest)) 316 | } 317 | 318 | fn parse_term( 319 | m: &str, 320 | min_length: usize, 321 | max_length: usize, 322 | ) -> ParseResult<(Option, &str)> { 323 | if m.starts_with('-') && (m.len() <= 1 || m.as_bytes()[1] == 0x20) { 324 | return Ok((None, &m[1..])); 325 | } 326 | let byte_ary = m.as_bytes(); 327 | for (idx, chr) in byte_ary.iter().enumerate() { 328 | if *chr < 33 || *chr > 126 { 329 | if idx < min_length { 330 | return Err(ParseErr::TooFewDigits); 331 | } 332 | let utf8_ary = str::from_utf8(&byte_ary[..idx]).map_err(ParseErr::BaseUnicodeError)?; 333 | return Ok((Some(String::from(utf8_ary)), &m[idx..])); 334 | } 335 | if idx >= max_length { 336 | let utf8_ary = str::from_utf8(&byte_ary[..idx]).map_err(ParseErr::BaseUnicodeError)?; 337 | return Ok((Some(String::from(utf8_ary)), &m[idx..])); 338 | } 339 | } 340 | Err(ParseErr::UnexpectedEndOfInput) 341 | } 342 | 343 | fn parse_message_s(m: &str) -> ParseResult { 344 | let mut rest = m; 345 | take_char!(rest, '<'); 346 | let prival = take_item!(parse_num(rest, 1, 3), rest); 347 | take_char!(rest, '>'); 348 | let (sev, fac) = parse_pri_val(prival)?; 349 | let version = take_item!(parse_num(rest, 1, 2), rest); 350 | take_char!(rest, ' '); 351 | let event_time = take_item!(parse_timestamp(rest), rest); 352 | take_char!(rest, ' '); 353 | let hostname = take_item!(parse_term(rest, 1, 255), rest); 354 | take_char!(rest, ' '); 355 | let appname = take_item!(parse_term(rest, 1, 48), rest); 356 | take_char!(rest, ' '); 357 | let procid = take_item!(parse_term(rest, 1, 128), rest).map(|s| match i32::from_str(&s) { 358 | Ok(n) => ProcId::PID(n), 359 | Err(_) => ProcId::Name(s), 360 | }); 361 | take_char!(rest, ' '); 362 | let msgid = take_item!(parse_term(rest, 1, 32), rest); 363 | take_char!(rest, ' '); 364 | let sd = take_item!(parse_sd(rest), rest); 365 | rest = match maybe_expect_char!(rest, ' ') { 366 | Some(r) => r, 367 | None => rest, 368 | }; 369 | let msg = String::from(rest); 370 | 371 | Ok(SyslogMessage { 372 | severity: sev, 373 | facility: fac, 374 | version, 375 | timestamp: event_time.map(|t| t.unix_timestamp()), 376 | timestamp_nanos: event_time.map(|t| t.time().nanosecond()), 377 | hostname, 378 | appname, 379 | procid, 380 | msgid, 381 | sd, 382 | msg, 383 | }) 384 | } 385 | 386 | /// Parse a string into a `SyslogMessage` object 387 | /// 388 | /// # Arguments 389 | /// 390 | /// * `s`: Anything convertible to a string 391 | /// 392 | /// # Returns 393 | /// 394 | /// * `ParseErr` if the string is not parseable as an RFC5424 message 395 | /// 396 | /// # Example 397 | /// 398 | /// ``` 399 | /// use syslog_rfc5424::parse_message; 400 | /// 401 | /// let message = parse_message("<78>1 2016-01-15T00:04:01+00:00 host1 CROND 10391 - [meta sequenceId=\"29\"] some_message").unwrap(); 402 | /// 403 | /// assert!(message.hostname.unwrap() == "host1"); 404 | /// ``` 405 | pub fn parse_message>(s: S) -> ParseResult { 406 | parse_message_s(s.as_ref()) 407 | } 408 | 409 | #[cfg(test)] 410 | mod tests { 411 | use std::collections::BTreeMap; 412 | use std::mem; 413 | 414 | use super::{ParseErr, parse_message}; 415 | use crate::message; 416 | 417 | use crate::facility::SyslogFacility; 418 | use crate::severity::SyslogSeverity; 419 | 420 | #[test] 421 | fn test_simple() { 422 | let msg = parse_message("<1>1 - - - - - -").expect("Should parse empty message"); 423 | assert!(msg.facility == SyslogFacility::LOG_KERN); 424 | assert!(msg.severity == SyslogSeverity::SEV_ALERT); 425 | assert!(msg.timestamp.is_none()); 426 | assert!(msg.hostname.is_none()); 427 | assert!(msg.appname.is_none()); 428 | assert!(msg.procid.is_none()); 429 | assert!(msg.msgid.is_none()); 430 | assert!(msg.sd.is_empty()); 431 | } 432 | 433 | #[test] 434 | fn test_with_time_zulu() { 435 | let msg = parse_message("<1>1 2015-01-01T00:00:00Z host - - - -") 436 | .expect("Should parse empty message"); 437 | assert_eq!(msg.timestamp, Some(1420070400)); 438 | } 439 | 440 | #[test] 441 | fn test_with_time_offset() { 442 | let msg = parse_message("<1>1 2015-01-01T00:00:00+00:00 - - - - -") 443 | .expect("Should parse empty message"); 444 | assert_eq!(msg.timestamp, Some(1420070400)); 445 | } 446 | 447 | #[test] 448 | fn test_with_time_offset_nonzero() { 449 | let msg = parse_message("<1>1 2015-01-01T00:00:00-10:00 - - - - -") 450 | .expect("Should parse empty message"); 451 | assert_eq!(msg.timestamp, Some(1420106400)); 452 | // example from RFC 3339 453 | let msg1 = parse_message("<1>1 2015-01-01T18:50:00-04:00 - - - - -") 454 | .expect("Should parse empty message"); 455 | let msg2 = parse_message("<1>1 2015-01-01T22:50:00Z - - - - -") 456 | .expect("Should parse empty message"); 457 | assert_eq!(msg1.timestamp, msg2.timestamp); 458 | // example with fractional minutes 459 | let msg1 = parse_message("<1>1 2019-01-20T00:46:39+05:45 - - - - -") 460 | .expect("Should parse empty message"); 461 | let msg2 = parse_message("<1>1 2019-01-19T11:01:39-08:00 - - - - -") 462 | .expect("Should parse empty message"); 463 | assert_eq!(msg1.timestamp, msg2.timestamp); 464 | } 465 | 466 | #[test] 467 | fn test_complex() { 468 | let msg = parse_message("<78>1 2016-01-15T00:04:01+00:00 host1 CROND 10391 - [meta sequenceId=\"29\"] some_message").expect("Should parse complex message"); 469 | assert_eq!(msg.facility, SyslogFacility::LOG_CRON); 470 | assert_eq!(msg.severity, SyslogSeverity::SEV_INFO); 471 | assert_eq!(msg.hostname, Some(String::from("host1"))); 472 | assert_eq!(msg.appname, Some(String::from("CROND"))); 473 | assert_eq!(msg.procid, Some(message::ProcId::PID(10391))); 474 | assert_eq!(msg.msg, String::from("some_message")); 475 | assert_eq!(msg.timestamp, Some(1452816241)); 476 | assert_eq!(msg.sd.len(), 1); 477 | let v = msg 478 | .sd 479 | .find_tuple("meta", "sequenceId") 480 | .expect("Should contain meta sequenceId"); 481 | assert_eq!(v, "29"); 482 | } 483 | 484 | #[test] 485 | fn test_sd_empty() { 486 | let msg = parse_message( 487 | "<78>1 2016-01-15T00:04:01Z host1 CROND 10391 - [meta@1234] some_message", 488 | ) 489 | .expect("Should parse message with empty structured data"); 490 | assert_eq!(msg.facility, SyslogFacility::LOG_CRON); 491 | assert_eq!(msg.severity, SyslogSeverity::SEV_INFO); 492 | assert_eq!(msg.hostname, Some(String::from("host1"))); 493 | assert_eq!(msg.appname, Some(String::from("CROND"))); 494 | assert_eq!(msg.procid, Some(message::ProcId::PID(10391))); 495 | assert_eq!(msg.msg, String::from("some_message")); 496 | assert_eq!(msg.timestamp, Some(1452816241)); 497 | assert_eq!(msg.sd.len(), 1); 498 | assert_eq!( 499 | msg.sd 500 | .find_sdid("meta@1234") 501 | .expect("should contain meta") 502 | .len(), 503 | 0 504 | ); 505 | } 506 | 507 | #[test] 508 | fn test_sd_features() { 509 | let msg = parse_message("<78>1 2016-01-15T00:04:01Z host1 CROND 10391 - [meta sequenceId=\"29\" sequenceBlah=\"foo\"][my key=\"value\"][meta bar=\"baz=\"] some_message").expect("Should parse complex message"); 510 | assert_eq!(msg.facility, SyslogFacility::LOG_CRON); 511 | assert_eq!(msg.severity, SyslogSeverity::SEV_INFO); 512 | assert_eq!(msg.hostname, Some(String::from("host1"))); 513 | assert_eq!(msg.appname, Some(String::from("CROND"))); 514 | assert_eq!(msg.procid, Some(message::ProcId::PID(10391))); 515 | assert_eq!(msg.msg, String::from("some_message")); 516 | assert_eq!(msg.timestamp, Some(1452816241)); 517 | assert_eq!(msg.sd.len(), 2); 518 | assert_eq!( 519 | msg.sd.find_sdid("meta").expect("should contain meta").len(), 520 | 3 521 | ); 522 | } 523 | 524 | #[test] 525 | fn test_sd_with_escaped_quote() { 526 | let msg_text = r#"<1>1 - - - - - [meta key="val\"ue"] message"#; 527 | let msg = parse_message(msg_text).expect("should parse"); 528 | assert_eq!( 529 | msg.sd 530 | .find_tuple("meta", "key") 531 | .expect("Should contain meta key"), 532 | r#"val"ue"# 533 | ); 534 | } 535 | 536 | #[test] 537 | fn test_other_message() { 538 | let msg_text = r#"<190>1 2016-02-21T01:19:11+00:00 batch6sj - - - [meta sequenceId="21881798" x-group="37051387"][origin x-service="tracking"] metascutellar conversationalist nephralgic exogenetic graphy streng outtaken acouasm amateurism prenotice Lyonese bedull antigrammatical diosphenol gastriloquial bayoneteer sweetener naggy roughhouser dighter addend sulphacid uneffectless ferroprussiate reveal Mazdaist plaudite Australasian distributival wiseman rumness Seidel topazine shahdom sinsion mesmerically pinguedinous ophthalmotonometer scuppler wound eciliate expectedly carriwitchet dictatorialism bindweb pyelitic idic atule kokoon poultryproof rusticial seedlip nitrosate splenadenoma holobenthic uneternal Phocaean epigenic doubtlessly indirection torticollar robomb adoptedly outspeak wappenschawing talalgia Goop domitic savola unstrafed carded unmagnified mythologically orchester obliteration imperialine undisobeyed galvanoplastical cycloplegia quinquennia foremean umbonal marcgraviaceous happenstance theoretical necropoles wayworn Igbira pseudoangelic raising unfrounced lamasary centaurial Japanolatry microlepidoptera"#; 539 | parse_message(msg_text).expect("should parse as text"); 540 | } 541 | 542 | #[test] 543 | fn test_bad_pri() { 544 | let msg = parse_message("<4096>1 - - - - - -"); 545 | assert!(msg.is_err()); 546 | } 547 | 548 | #[test] 549 | fn test_bad_match() { 550 | // we shouldn't be able to parse RFC3164 messages 551 | let msg = parse_message("<134>Feb 18 20:53:31 haproxy[376]: I am a message"); 552 | assert!(msg.is_err()); 553 | } 554 | 555 | #[test] 556 | fn test_example_timestamps() { 557 | // these are the example timestamps in the rfc 558 | 559 | let msg = parse_message("<1>1 1985-04-12T23:20:50.52Z host - - - -") 560 | .expect("Should parse empty message"); 561 | assert_eq!(msg.timestamp, Some(482196050)); 562 | assert_eq!(msg.timestamp_nanos, Some(520000000)); 563 | 564 | let msg = parse_message("<1>1 1985-04-12T19:20:50.52+04:00 host - - - -") 565 | .expect("Should parse empty message"); 566 | assert_eq!(msg.timestamp, Some(482167250)); 567 | assert_eq!(msg.timestamp_nanos, Some(520000000)); 568 | 569 | let msg = parse_message("<1>1 1985-04-12T19:20:50+04:00 host - - - -") 570 | .expect("Should parse empty message"); 571 | assert_eq!(msg.timestamp, Some(482167250)); 572 | assert_eq!(msg.timestamp_nanos, Some(0)); 573 | 574 | let msg = parse_message("<1>1 2003-08-24T05:14:15.000003+07:00 host - - - -") 575 | .expect("Should parse empty message"); 576 | assert_eq!(msg.timestamp, Some(1061676855)); 577 | assert_eq!(msg.timestamp_nanos, Some(3000)); 578 | 579 | // Nanosecond precision is permitted 580 | let msg = parse_message("<1>1 2003-08-24T05:14:15.000000003+07:00 host - - - -").unwrap(); 581 | assert_eq!(msg.timestamp, Some(1061676855)); 582 | assert_eq!(msg.timestamp_nanos, Some(3)); 583 | 584 | let msg = parse_message("<1>1 2003-08-24T05:14:15.123456789+07:00 host - - - -").unwrap(); 585 | assert_eq!(msg.timestamp, Some(1061676855)); 586 | assert_eq!(msg.timestamp_nanos, Some(123456789)); 587 | 588 | // 10 decimals of precision is an error 589 | let msg = parse_message("<1>1 2003-08-24T05:14:15.1122334455+07:00 host - - - -"); 590 | assert!(msg.is_err()); 591 | } 592 | 593 | #[test] 594 | fn test_empty_sd_value() { 595 | let msg = parse_message(r#"<29>1 2018-05-14T08:23:01.520Z leyal_test4 mgd 13894 UI_CHILD_EXITED [junos@2636.1.1.1.2.57 pid="14374" return-value="5" core-dump-status="" command="/usr/sbin/mustd"]"#).expect("must parse"); 596 | assert_eq!(msg.facility, SyslogFacility::LOG_DAEMON); 597 | assert_eq!(msg.severity, SyslogSeverity::SEV_NOTICE); 598 | assert_eq!(msg.hostname, Some(String::from("leyal_test4"))); 599 | assert_eq!(msg.appname, Some(String::from("mgd"))); 600 | assert_eq!(msg.procid, Some(message::ProcId::PID(13894))); 601 | assert_eq!(msg.msg, String::from("")); 602 | assert_eq!(msg.timestamp, Some(1526286181)); 603 | assert_eq!(msg.timestamp_nanos, Some(520000000)); 604 | assert_eq!(msg.sd.len(), 1); 605 | let sd = msg 606 | .sd 607 | .find_sdid("junos@2636.1.1.1.2.57") 608 | .expect("should contain root SD"); 609 | let expected = { 610 | let mut expected = BTreeMap::new(); 611 | expected.insert("pid", "14374"); 612 | expected.insert("return-value", "5"); 613 | expected.insert("core-dump-status", ""); 614 | expected.insert("command", "/usr/sbin/mustd"); 615 | expected 616 | .into_iter() 617 | .map(|(k, v)| (k.to_string(), v.to_string())) 618 | .collect::>() 619 | }; 620 | assert_eq!(sd, &expected); 621 | } 622 | 623 | #[test] 624 | fn test_fields_start_with_dash() { 625 | let msg = parse_message("<39>1 2018-05-15T20:56:58+00:00 -web1west -201805020050-bc5d6a47c3-master - - [meta sequenceId=\"28485532\"] 25450-uWSGI worker 6: getaddrinfo*.gaih_getanswer: got type \"DNAME\"").expect("should parse"); 626 | assert_eq!(msg.hostname, Some("-web1west".to_string())); 627 | assert_eq!( 628 | msg.appname, 629 | Some("-201805020050-bc5d6a47c3-master".to_string()) 630 | ); 631 | assert_eq!( 632 | msg.sd.find_tuple("meta", "sequenceId"), 633 | Some(&"28485532".to_string()) 634 | ); 635 | assert_eq!( 636 | msg.msg, 637 | "25450-uWSGI worker 6: getaddrinfo*.gaih_getanswer: got type \"DNAME\"".to_string() 638 | ); 639 | } 640 | 641 | #[test] 642 | fn test_truncated() { 643 | let err = 644 | parse_message("<39>1 2018-05-15T20:56:58+00:00 -web1west -").expect_err("should fail"); 645 | assert_eq!( 646 | mem::discriminant(&err), 647 | mem::discriminant(&ParseErr::UnexpectedEndOfInput) 648 | ); 649 | } 650 | } 651 | --------------------------------------------------------------------------------