├── .vscode ├── settings.json └── launch.json ├── bors.toml ├── .gitignore ├── Cargo.toml ├── .github └── workflows │ └── ci.yml ├── CHANGELOG.md ├── src ├── lib.rs ├── separators.rs ├── segments.rs ├── fields.rs ├── message.rs └── escape_sequence.rs ├── examples ├── demo.rs └── typed_segment.rs ├── readme.md └── benches ├── simple_parse.rs └── decoder.rs /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "cSpell.words": [ 3 | "rusthl", 4 | "subcomponent" 5 | ] 6 | } -------------------------------------------------------------------------------- /bors.toml: -------------------------------------------------------------------------------- 1 | status = [ 2 | "build (ubuntu-latest)", 3 | "build (windows-latest)", 4 | ] 5 | delete_merged_branches = true 6 | timeout_sec = 600 # 10 min 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | dotnet/BenchmarkDotNet.Artifacts/ 5 | dotnet/bin/ 6 | dotnet/obj/ 7 | dotnet/.vs/ 8 | BenchmarkDotNet.Artifacts/ 9 | cargo-timing*.html -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-hl7" 3 | version = "0.6.0" 4 | authors = ["wokket "] 5 | edition = "2018" 6 | description = "HL7 Parser and object builder? query'er? - experimental only at any rate" 7 | license = "MIT OR Apache-2.0" 8 | repository = "https://github.com/wokket/rust-hl7/" 9 | 10 | [features] 11 | string_index = [] 12 | 13 | [lib] 14 | name="rusthl7" 15 | path="src/lib.rs" 16 | 17 | [dependencies] 18 | hex = "0.4" 19 | log = "0.4" 20 | regex = "1.5" 21 | thiserror = "1.0" 22 | 23 | [dev-dependencies] 24 | criterion = "0.5" 25 | 26 | [[bench]] 27 | name = "simple_parse" 28 | harness = false 29 | 30 | [[bench]] 31 | name = "decoder" 32 | harness = false 33 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "(Windows) Launch", 9 | "type": "cppvsdbg", 10 | "request": "launch", 11 | "program": "${workspaceFolder}/target/debug/nom-hl7.exe", 12 | "args": [], 13 | "stopAtEntry": false, 14 | "cwd": "${workspaceFolder}", 15 | "environment": [], 16 | "externalConsole": true 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI Ubunutu 2 | 3 | on: 4 | push: 5 | branches: [ master, staging, trying ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest, windows-latest] 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | 24 | - name: Cache Dependencies 25 | uses: Swatinem/rust-cache@v1.3.0 26 | 27 | - name: Compile 28 | run: cargo test --no-run 29 | 30 | - name: Test 31 | run: cargo test --all-features -- --nocapture --quiet 32 | 33 | - name: Run Benchmarks 34 | run: cargo bench --all-features 35 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.6.0 4 | - Breaking Change ([#25](https://github.com/wokket/rust-hl7/issues/25)): Moved the core structs to the top-level module to avoid the noisy using statements. 5 | 6 | ## 0.5.0 7 | - Add `query` functions to replace the string based `Index` impls in the version version. These are functionally identical to the string `Index` implementations, but avoid some lifetime issues (returning `&&str`) and have visible documentation. 8 | - Add `EscapeSequence` struct to support decoding [escape sequences](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) back to their original values. 9 | 10 | ## 0.4.0 11 | - Large change (thanks @sempervictus) to allow querying of message content by both numerical indexer and dot-notation string indexers 12 | - Note that the string indexers will be replaced with a normal function call in a future release. 13 | 14 | ## 0.3.0 15 | - Extensive work by @sempervictus to expose the segments/fields as collections (which I hadn't got back to after the re-write to slices.) 16 | 17 | ## 0.2.0 18 | - Re-Write to not expose cloned/copied vecs of vecs everywhere. We have all the data in a single string slice to begin with so lets return slices from that. 19 | 20 | ## 0.1.0 21 | - Initial string.clone() heavy library, nothing to see here... 22 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | 3 | # RustHl7 - A HL7 V2 message parser and library 4 | 5 | This crate is attempting to provide the tooling for a fully spec-compliant HL7 V2 message parser. Note that _interpreting_ the parsed message elements into a strongly 6 | typed segment/message format is specifically **out of scope** as there's simply too many variants over too many versions for me to go there (maybe 7 | someone else could code-gen a crate using this this crate to provide the source information?). 8 | 9 | This crate tries to provide the tools to build HL7 systems without dictating _how_ to build your system, there's no such thing as one-size-fits all in healthcare! 10 | 11 | */ 12 | 13 | mod escape_sequence; 14 | mod fields; 15 | mod message; 16 | mod segments; 17 | mod separators; 18 | 19 | // re-exports to simplify namespacing (#25) 20 | pub use fields::Field; 21 | pub use message::Message; 22 | pub use segments::Segment; 23 | 24 | pub use escape_sequence::EscapeSequence; 25 | pub use separators::Separators; 26 | 27 | #[derive(Debug, thiserror::Error)] 28 | pub enum Hl7ParseError { 29 | #[error("Unexpected error: {0}")] 30 | Generic(String), 31 | 32 | #[error("Failure parsing MSH1/MSH2 while discovering separator chars: {0}")] 33 | Msh1Msh2(String), 34 | 35 | #[error("Required value missing")] 36 | MissingRequiredValue(), 37 | } 38 | -------------------------------------------------------------------------------- /examples/demo.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | A short example demonstrating one way to use this library for HL7 processing. 3 | */ 4 | 5 | use rusthl7::{EscapeSequence, Message}; 6 | use std::{convert::TryFrom, error::Error}; 7 | 8 | fn main() -> Result<(), Box> { 9 | // Normally message would come over the wire from a remote service etc. 10 | // Consider using the hl7-mllp-codec crate or similar to make building those network services easier. 11 | let hl7_string = get_sample_message(); 12 | 13 | // Parse the string into a structured entity 14 | let message = Message::try_from(hl7_string)?; 15 | 16 | // We can deep query message fields using the `query` functionality 17 | let postcode = message.query("PID.F11.C5"); // Field 11, Component 5 18 | assert_eq!(postcode, "35292"); 19 | 20 | // If you have the potential for escape sequences in your data you can process those using `EscapeSequence` 21 | let charge_to_practice = message.query("OBR.F23"); 22 | assert_eq!(charge_to_practice, r#"Joes Obs \T\ Gynae"#); 23 | 24 | let decoder = EscapeSequence::new(message.get_separators()); 25 | let charge_to_practice = decoder.decode(charge_to_practice); // Handle the escape sequences 26 | assert_eq!(charge_to_practice, "Joes Obs & Gynae"); // converted the \T\ sequence to an ampersand 27 | 28 | Ok(()) 29 | } 30 | 31 | fn get_sample_message() -> &'static str { 32 | "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||Joes Obs \\T\\ Gynae||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" 33 | } 34 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## An experimental HL7 library ## 2 | 3 | [![CI Ubunutu](https://github.com/wokket/rust-hl7/actions/workflows/ci.yml/badge.svg)](https://github.com/wokket/rust-hl7/actions/workflows/ci.yml) 4 | [![Crates IO](https://img.shields.io/crates/v/rust-hl7.svg)](https://crates.io/crates/rust-hl7) 5 | 6 | Totally kind of like production ready! 7 | 8 | This second cut provides consistent structure down to the sub-sub-field, efficient accessors to shared string reference data, with standardized implementations of common functionality. 9 | 10 | Interpreting these facets (type conversion, determining which fields they represent etc) is a future problem... there is **no plan whatsoever** for message conformance checks or anything of that nature. 11 | 12 | This library is trying to provide the _tooling_ you need to build robust HL7 based systems, without dictating _how_ you go about it. There's no one-size-fits-all here, so we try to provide a box of separate tools rather than a full framework. 13 | 14 | ### Intended Features and Design Notes: 15 | - [x] Initially use hl7 default separator chars 16 | - [x] Use separator chars from the message 17 | - [X] Add support for sub-field (component/subcomponent) items 18 | - [x] Field repeats (via `~`) 19 | - [x] Initially, avoid any per-segment knowledge, requirement to read the spec too much etc. 20 | - Implementing all the segments, across all the hl7 versions, version-specific parsing etc is tooooo much while we're getting started. 21 | - [ ] Add support for [HL7 escape sequences](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) ([#22](https://github.com/wokket/rust-hl7/issues/22)) 22 | - [x] Decoding of the most common escape sequences including `\E\`, `\R\`, `\S\` & `\T\` 23 | - [x] Correctly passes through `\H\`, `\N\` and custom `\Z..\` sequences unchanged 24 | - [X] Decodes `\X..\` sequences for hex-encoded chars 25 | - [ ] Support for various unicode sequences (`\C..\`, `\M..\`). These are lower priority as [HL7 Australia considers them deprecated](https://confluence.hl7australia.com/display/OO/3+Datatypes#id-3Datatypes-3.1.1.6EscapesequencessupportingmultiplecharactersetsforFT,ST,andTXdatatypes) 26 | - [ ] Add tighter MSH as an exception to the above 27 | - [ ] The above allows us to parse everything as strings, and provide helper methods for type conversions as required. 28 | - [x] Parse a message using a `TryFrom<&str>` impl rather than a dedicated parser 29 | - [x] Index into messages using HL7 string index notation and binary methods 30 | - [x] Index into sub-fields using HL7 string index notation and binary methods 31 | - [x] Index into the segment enum using HL7 string index notation and binary methods 32 | - [x] Implement buffer-copy-free generic indexing into MSH 33 | -------------------------------------------------------------------------------- /benches/simple_parse.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use rusthl7::Message; 3 | use std::convert::TryFrom; 4 | 5 | fn get_sample_message() -> &'static str { 6 | "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" 7 | } 8 | 9 | fn message_parse(c: &mut Criterion) { 10 | c.bench_function("ORU parse", |b| { 11 | b.iter(|| { 12 | let _ = Message::try_from(get_sample_message()).unwrap(); 13 | }) 14 | }); 15 | } 16 | 17 | fn get_segments_by_name(c: &mut Criterion) { 18 | c.bench_function("Get Segment By Name", |b| { 19 | let m = Message::try_from(get_sample_message()).unwrap(); 20 | 21 | b.iter(|| { 22 | let _segs = m.segments_by_identifier("OBR").unwrap(); 23 | //assert!(segs.len() == 1); 24 | }) 25 | }); 26 | } 27 | 28 | fn get_pid_and_read_field_via_vec(c: &mut Criterion) { 29 | c.bench_function("Read Field from PID (lookup)", |b| { 30 | let m = Message::try_from(get_sample_message()).unwrap(); 31 | 32 | b.iter(|| { 33 | let pid = &m.segments[1]; 34 | let _field = pid[3]; 35 | assert_eq!(_field, "555-44-4444"); // lookup from vec 36 | }) 37 | }); 38 | } 39 | 40 | fn get_pid_and_read_field_via_query(c: &mut Criterion) { 41 | c.bench_function("Read Field from PID (query)", |b| { 42 | let m = Message::try_from(get_sample_message()).unwrap(); 43 | 44 | b.iter(|| { 45 | let _val = m.query("PID.F3"); // query via Message 46 | assert_eq!(_val, "555-44-4444"); // lookup from vec 47 | }) 48 | }); 49 | } 50 | 51 | #[cfg(feature = "string_index")] 52 | fn get_pid_and_read_field_via_index(c: &mut Criterion) { 53 | c.bench_function("Read Field from PID (index)", |b| { 54 | let m = Message::try_from(get_sample_message()).unwrap(); 55 | 56 | b.iter(|| { 57 | let _val = m["PID.F3"]; // query via Message 58 | assert_eq!(_val, "555-44-4444"); // lookup from vec 59 | }) 60 | }); 61 | } 62 | 63 | #[cfg(feature = "string_index")] 64 | criterion_group!( 65 | benches, 66 | message_parse, 67 | get_segments_by_name, 68 | get_pid_and_read_field_via_vec, 69 | get_pid_and_read_field_via_query, 70 | get_pid_and_read_field_via_index 71 | ); 72 | 73 | #[cfg(not(feature = "string_index"))] 74 | criterion_group!( 75 | benches, 76 | message_parse, 77 | get_segments_by_name, 78 | get_pid_and_read_field_via_vec, 79 | get_pid_and_read_field_via_query 80 | ); 81 | criterion_main!(benches); 82 | -------------------------------------------------------------------------------- /benches/decoder.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use rusthl7::{EscapeSequence, Separators}; 3 | 4 | // Note that we;re calkling decode on a whole message here, although it would normally be on an individual field... 5 | // this is just to make it work a bit harder on a larger dataset, not because it makes sense in a HL7 sense 6 | 7 | fn no_sequences(c: &mut Criterion) { 8 | c.bench_function("No Escape Sequences", |b| { 9 | let delims = Separators::default(); 10 | let decoder = EscapeSequence::new(delims); 11 | 12 | b.iter(|| { 13 | let _ = decoder.decode(get_sample_message_no_sequence()); 14 | }) 15 | }); 16 | } 17 | 18 | // We expect creation to be a little slower, as we init the regexes to make decode() calls faster 19 | // Amortizing this cost across multiple calls makes sense if we're caching the struct 20 | fn create_struct(c: &mut Criterion) { 21 | c.bench_function("Create struct", |b| { 22 | let delims = Separators::default(); 23 | 24 | b.iter(|| { 25 | let _ = EscapeSequence::new(delims); 26 | }) 27 | }); 28 | } 29 | 30 | fn no_sequences_but_backslash(c: &mut Criterion) { 31 | c.bench_function("No Escape Sequences But Backslash", |b| { 32 | let delims = Separators::default(); 33 | let decoder = EscapeSequence::new(delims); 34 | 35 | b.iter(|| { 36 | let _ = decoder.decode(get_sample_message_with_backslash()); 37 | }) 38 | }); 39 | } 40 | 41 | fn has_escape_sequences(c: &mut Criterion) { 42 | c.bench_function("Has Escape Sequences", |b| { 43 | let delims = Separators::default(); 44 | let decoder = EscapeSequence::new(delims); 45 | 46 | b.iter(|| { 47 | let _ = decoder.decode(get_sample_message_with_escape_sequences()); 48 | }) 49 | }); 50 | } 51 | 52 | fn get_sample_message_no_sequence() -> &'static str { 53 | // note we've stripped the backslash from the MSH 54 | "MSH|^~*&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" 55 | } 56 | 57 | fn get_sample_message_with_backslash() -> &'static str { 58 | //there's a backslash down at char 487! 59 | "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F" 60 | } 61 | 62 | fn get_sample_message_with_escape_sequences() -> &'static str { 63 | //there's a backslash down at char 487! 64 | "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||\\F\\555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F" 65 | } 66 | 67 | criterion_group!( 68 | decoder, 69 | create_struct, 70 | no_sequences, 71 | no_sequences_but_backslash, 72 | has_escape_sequences 73 | ); 74 | criterion_main!(decoder); 75 | -------------------------------------------------------------------------------- /src/separators.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use std::fmt::Display; 3 | use std::str::FromStr; 4 | 5 | /// A helper struct to store the separator (delimiter) characters used to parse this message. 6 | /// Note that HL7 allows each _message_ to define it's own separators, although most messages 7 | /// use a default set (available from [`Separators::default()`]) 8 | #[derive(Debug, PartialEq, Clone, Copy)] 9 | pub struct Separators { 10 | /// constant value, spec fixed to '\r' (ASCII 13, 0x0D) 11 | pub segment: char, 12 | /// Field separator char, defaults to `|` 13 | pub field: char, 14 | /// Field repeat separator char, defaults to `~` 15 | pub repeat: char, 16 | /// Component separator char, defaults to `^` 17 | pub component: char, 18 | /// Sub-Component separator char, defaults to `&` 19 | pub subcomponent: char, 20 | /// Character used to wrap an [`EscapeSequence`], defaults to `\` (a single back slash) 21 | pub escape_char: char, 22 | } 23 | 24 | impl Default for Separators { 25 | /// Create a Separator with the default (most common) HL7 values 26 | fn default() -> Separators { 27 | Separators { 28 | segment: '\r', 29 | field: '|', 30 | repeat: '~', 31 | component: '^', 32 | subcomponent: '&', 33 | escape_char: '\\', 34 | } 35 | } 36 | } 37 | 38 | impl Separators { 39 | 40 | // Create a Separators with the values provided in the message. 41 | // This assumes the message starts with `MSH|^~\&|` or equiv for custom Separators 42 | fn new(message: &str) -> Result { 43 | //assuming we have a valid message 44 | let mut chars = message.char_indices(); 45 | 46 | if Some((0, 'M')) != chars.next() 47 | || Some((1, 'S')) != chars.next() 48 | || Some((2, 'H')) != chars.next() 49 | { 50 | return Err(Hl7ParseError::Msh1Msh2( 51 | "Message doesn't start with 'MSH'".to_string(), 52 | )); 53 | } 54 | 55 | Ok(Separators { 56 | segment: '\r', 57 | field: chars.next().unwrap().1, 58 | component: chars.next().unwrap().1, 59 | repeat: chars.next().unwrap().1, 60 | escape_char: chars.next().unwrap().1, 61 | subcomponent: chars.next().unwrap().1, 62 | }) 63 | } 64 | } 65 | 66 | impl Display for Separators { 67 | /// Required for to_string() and other formatter consumers 68 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 69 | write!( 70 | f, 71 | "{}{}{}{}", 72 | self.component, self.repeat, self.escape_char, self.subcomponent 73 | ) 74 | } 75 | } 76 | 77 | /// Expects to receive a full message (or at least a MSH segment) in order to parse 78 | /// out the separator chars. 79 | impl FromStr for Separators { 80 | type Err = Hl7ParseError; 81 | 82 | fn from_str(input: &str) -> Result { 83 | Separators::new(input) 84 | } 85 | } 86 | 87 | #[cfg(test)] 88 | mod tests { 89 | use super::separators::Separators; 90 | use super::*; 91 | 92 | #[test] 93 | fn ensure_separators_load_correctly() -> Result<(), Hl7ParseError> { 94 | let expected = Separators::default(); 95 | let actual = Separators::new("MSH|^~\\&|CATH|StJohn|AcmeHIS|StJohn|20061019172719||ACK^O01|MSGID12349876|P|2.3\rMSA|AA|MSGID12349876")?; 96 | 97 | assert_eq!(expected.component, actual.component); 98 | assert_eq!(expected.escape_char, actual.escape_char); 99 | assert_eq!(expected.field, actual.field); 100 | assert_eq!(expected.repeat, actual.repeat); 101 | assert_eq!(expected.segment, actual.segment); 102 | assert_eq!(expected.subcomponent, actual.subcomponent); 103 | 104 | Ok(()) 105 | } 106 | 107 | #[test] 108 | fn ensure_separators_load_from_string() -> Result<(), Hl7ParseError> { 109 | let expected = Separators::default(); 110 | let actual = str::parse::("MSH|^~\\&|CATH|StJohn|AcmeHIS|StJohn|20061019172719||ACK^O01|MSGID12349876|P|2.3\rMSA|AA|MSGID12349876")?; 111 | 112 | assert_eq!(expected.component, actual.component); 113 | assert_eq!(expected.escape_char, actual.escape_char); 114 | assert_eq!(expected.field, actual.field); 115 | assert_eq!(expected.repeat, actual.repeat); 116 | assert_eq!(expected.segment, actual.segment); 117 | assert_eq!(expected.subcomponent, actual.subcomponent); 118 | 119 | Ok(()) 120 | } 121 | 122 | #[test] 123 | fn ensure_missing_msh_causes_error() { 124 | //note the missing M 125 | let result = Separators::new("SH|^~\\&|CATH|StJohn|AcmeHIS|StJohn|20061019172719||ACK^O01|MSGID12349876|P|2.3\rMSA|AA|MSGID12349876"); 126 | assert!(result.is_err()); 127 | } 128 | 129 | #[test] 130 | fn ensure_separators_to_string() { 131 | assert_eq!("^~\\&", Separators::default().to_string()); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /examples/typed_segment.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | A short example demonstrating one way to use this library for HL7 processing. 3 | */ 4 | 5 | use rusthl7::{Field, Hl7ParseError, Message, Separators}; 6 | use std::{convert::TryFrom, error::Error, fmt::Display}; 7 | 8 | /// The most important Segment, almost all HL7 messages have an MSH (MLLP simple ack I'm looking at you). 9 | /// Given the importance of this segment for driving application behaviour, it gets the special treatment 10 | /// of a fully typed segment, not just a bag of fields.... 11 | #[derive(Debug, PartialEq)] 12 | pub struct MshSegment<'a> { 13 | pub source: &'a str, 14 | //this initial layout largely stolen from the _other_ hl7 crate: https://github.com/njaremko/hl7 15 | pub msh_1_field_separator: char, 16 | pub msh_2_encoding_characters: Separators, 17 | pub msh_3_sending_application: Option>, 18 | pub msh_4_sending_facility: Option>, 19 | pub msh_5_receiving_application: Option>, 20 | pub msh_6_receiving_facility: Option>, 21 | pub msh_7_date_time_of_message: Field<'a>, 22 | pub msh_8_security: Option>, 23 | pub msh_9_message_type: Field<'a>, 24 | pub msh_10_message_control_id: Field<'a>, 25 | pub msh_11_processing_id: Field<'a>, 26 | pub msh_12_version_id: Field<'a>, 27 | pub msh_13_sequence_number: Option>, 28 | pub msh_14_continuation_pointer: Option>, 29 | pub msh_15_accept_acknowledgment_type: Option>, 30 | pub msh_16_application_acknowledgment_type: Option>, 31 | pub msh_17_country_code: Option>, 32 | pub msh_18_character_set: Option>, //TODO: repeating field 33 | pub msh_19_principal_language_of_message: Option>, 34 | // pub msh_20_alternate_character_set_handling_scheme: Option>, 35 | // pub msh_21_message_profile_identifier: Option>>, 36 | // pub msh_22_sending_responsible_organization: Option>, 37 | // pub msh_23_receiving_responsible_organization: Option>, 38 | // pub msh_24_sending_network_address: Option>, 39 | // pub msh_25_receiving_network_address: Option>, 40 | } 41 | 42 | impl<'a> MshSegment<'a> { 43 | pub fn parse>( 44 | input: S, 45 | delims: &Separators, 46 | ) -> Result, Hl7ParseError> { 47 | let input = input.into(); 48 | 49 | let mut fields = input.split(delims.field); 50 | 51 | assert!(fields.next().unwrap() == "MSH"); 52 | 53 | let _ = fields.next(); //consume the delimiter chars 54 | 55 | let msh = MshSegment { 56 | source: input, 57 | msh_1_field_separator: delims.field, 58 | msh_2_encoding_characters: delims.to_owned(), 59 | msh_3_sending_application: Field::parse_optional(fields.next(), delims)?, 60 | msh_4_sending_facility: Field::parse_optional(fields.next(), delims)?, 61 | msh_5_receiving_application: Field::parse_optional(fields.next(), delims)?, 62 | msh_6_receiving_facility: Field::parse_optional(fields.next(), delims)?, 63 | msh_7_date_time_of_message: Field::parse_mandatory(fields.next(), delims)?, 64 | msh_8_security: Field::parse_optional(fields.next(), delims)?, 65 | msh_9_message_type: Field::parse_mandatory(fields.next(), delims)?, 66 | msh_10_message_control_id: Field::parse_mandatory(fields.next(), delims)?, 67 | msh_11_processing_id: Field::parse_mandatory(fields.next(), delims)?, 68 | msh_12_version_id: Field::parse_mandatory(fields.next(), delims)?, 69 | msh_13_sequence_number: Field::parse_optional(fields.next(), delims)?, 70 | msh_14_continuation_pointer: Field::parse_optional(fields.next(), delims)?, 71 | msh_15_accept_acknowledgment_type: Field::parse_optional(fields.next(), delims)?, 72 | msh_16_application_acknowledgment_type: Field::parse_optional(fields.next(), delims)?, 73 | msh_17_country_code: Field::parse_optional(fields.next(), delims)?, 74 | msh_18_character_set: Field::parse_optional(fields.next(), delims)?, 75 | msh_19_principal_language_of_message: Field::parse_optional(fields.next(), delims)?, 76 | }; 77 | 78 | Ok(msh) 79 | } 80 | } 81 | /// Common formatter trait implementation for the strongly-typed segment 82 | impl<'a> Display for MshSegment<'a> { 83 | /// Required for to_string() and other formatter consumers 84 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 85 | write!(f, "{}", self.source) 86 | } 87 | } 88 | /// Common clone trait implementation for the strongly-typed segment 89 | impl<'a> Clone for MshSegment<'a> { 90 | /// Creates a new Message object using _the same source_ slice as the original. 91 | fn clone(&self) -> Self { 92 | let delims = self.msh_2_encoding_characters; 93 | MshSegment::parse(self.source, &delims).unwrap() 94 | } 95 | } 96 | 97 | /// Extracts header element for external use 98 | pub fn msh<'a>(msg: &Message<'a>) -> Result, Hl7ParseError> { 99 | let seg = msg.segments_by_identifier("MSH").unwrap()[0]; 100 | let segment = 101 | MshSegment::parse(seg.source, &msg.get_separators()).expect("Failed to parse MSH segment"); 102 | Ok(segment) 103 | } 104 | 105 | fn main() -> Result<(), Box> { 106 | // Normally message would come over the wire from a remote service etc. 107 | // Consider using the hl7-mllp-codec crate or similar to make building those network services easier. 108 | let hl7_string = get_sample_message(); 109 | 110 | // Parse the string into a structured entity 111 | let message = Message::try_from(hl7_string)?; 112 | 113 | // Get a strongly-typed segment from generic data 114 | let header = msh(&message).expect("Failed to extract MSH"); 115 | let send_fac = header.msh_4_sending_facility.unwrap().source; 116 | assert_eq!(send_fac, message.segments[0].fields[3].source); 117 | 118 | Ok(()) 119 | } 120 | 121 | fn get_sample_message() -> &'static str { 122 | "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||Joes Obs \\T\\ Gynae||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" 123 | } 124 | -------------------------------------------------------------------------------- /src/segments.rs: -------------------------------------------------------------------------------- 1 | use crate::{Field, Hl7ParseError, Separators}; 2 | use std::fmt::Display; 3 | use std::ops::Index; 4 | 5 | /// A generic bag o' fields, representing an arbitrary segment. 6 | #[derive(Debug, PartialEq, Clone)] 7 | pub struct Segment<'a> { 8 | pub source: &'a str, 9 | delim: char, 10 | pub fields: Vec>, 11 | } 12 | 13 | impl<'a> Segment<'a> { 14 | /// Convert the given line of text into a Segment. NOTE: This is not normally needed to be called directly by 15 | /// consumers but is used indirectly via [`crate::Message::new()`]. 16 | pub fn parse>( 17 | input: S, 18 | delims: &Separators, 19 | ) -> Result, Hl7ParseError> { 20 | // non-generic inner to reduce compile times/code bloat 21 | fn inner<'a>(input: &'a str, delims: &Separators) -> Result, Hl7ParseError> { 22 | let fields: Result>, Hl7ParseError> = input 23 | .split(delims.field) 24 | .map(|line| Field::parse(line, delims)) 25 | .collect(); 26 | 27 | let fields = fields?; 28 | let seg = Segment { 29 | source: input, 30 | delim: delims.segment, 31 | fields, 32 | }; 33 | Ok(seg) 34 | } 35 | 36 | let input = input.into(); 37 | inner(input, delims) 38 | } 39 | 40 | /// Get the identifier (ie type, or name) for this segment. 41 | /// ## Example: 42 | /// ``` 43 | /// # use rusthl7::Hl7ParseError; 44 | /// # use rusthl7::{Segment, Separators}; 45 | /// # fn main() -> Result<(), Hl7ParseError> { 46 | /// let segment = Segment::parse("OBR|field1|field2", &Separators::default())?; 47 | /// assert_eq!("OBR", segment.identifier()); 48 | /// # Ok(()) 49 | /// # } 50 | /// ``` 51 | /// eg a segment `EVN||200708181123||` has an identifer of `EVN`. 52 | pub fn identifier(&self) -> &'a str { 53 | self.fields[0].source 54 | } 55 | 56 | /// Returns the original `&str` used to initialise this Segment. This method does not allocate. 57 | /// ## Example: 58 | /// ``` 59 | /// # use rusthl7::Hl7ParseError; 60 | /// # use rusthl7::Message; 61 | /// # use std::convert::TryFrom; 62 | /// # fn main() -> Result<(), Hl7ParseError> { 63 | /// let source = "MSH|^~\\&|GHH LAB|ELAB-3\rOBR|field1|field2"; 64 | /// let m = Message::try_from(source)?; 65 | /// let obr = &m.segments[1]; 66 | /// assert_eq!("OBR|field1|field2", obr.as_str()); 67 | /// # Ok(()) 68 | /// # } 69 | /// ``` 70 | #[inline] 71 | pub fn as_str(&'a self) -> &'a str { 72 | self.source 73 | } 74 | 75 | /// Access Field as string reference 76 | pub fn query<'b, S>(&self, fidx: S) -> &'a str 77 | where 78 | S: Into<&'b str>, 79 | { 80 | let fidx = fidx.into(); 81 | let sections = fidx.split('.').collect::>(); 82 | 83 | match sections.len() { 84 | 1 => { 85 | let stringnum = sections[0] 86 | .chars() 87 | .filter(|c| c.is_ascii_digit()) 88 | .collect::(); 89 | let idx: usize = stringnum.parse().unwrap(); 90 | self[idx] 91 | } 92 | _ => { 93 | let stringnum = sections[0] 94 | .chars() 95 | .filter(|c| c.is_ascii_digit()) 96 | .collect::(); 97 | let idx: usize = stringnum.parse().unwrap(); 98 | if idx > self.fields.len() - 1 { 99 | return ""; 100 | } 101 | let field = &self.fields[idx]; 102 | let query = sections[1..].join("."); 103 | 104 | field.query(&*query) 105 | } 106 | } 107 | } 108 | } 109 | 110 | impl<'a> Display for Segment<'a> { 111 | /// Required for to_string() and other formatter consumers. This returns the source string that represents the segment. 112 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 113 | write!(f, "{}", self.source) 114 | } 115 | } 116 | 117 | impl<'a> Index for Segment<'a> { 118 | type Output = &'a str; 119 | /// Access Field as string reference 120 | fn index(&self, fidx: usize) -> &Self::Output { 121 | if fidx > self.fields.len() - 1 { 122 | return &""; 123 | }; 124 | &self.fields[fidx].source 125 | } 126 | } 127 | 128 | impl<'a> Index<(usize, usize)> for Segment<'a> { 129 | type Output = &'a str; 130 | /// Access Field component as string reference 131 | fn index(&self, fidx: (usize, usize)) -> &Self::Output { 132 | if fidx.0 > self.fields.len() - 1 || fidx.1 > self.fields[fidx.0].components.len() - 1 { 133 | return &""; 134 | } 135 | &self.fields[fidx.0][fidx.1] 136 | } 137 | } 138 | 139 | impl<'a> Index<(usize, usize, usize)> for Segment<'a> { 140 | type Output = &'a str; 141 | /// Access Field subcomponent as string reference 142 | fn index(&self, fidx: (usize, usize, usize)) -> &Self::Output { 143 | if fidx.0 > self.fields.len() - 1 144 | || fidx.1 > self.fields[fidx.0].components.len() - 1 145 | || fidx.2 > self.fields[fidx.0].subcomponents[fidx.1].len() - 1 146 | { 147 | return &""; 148 | } 149 | &self.fields[fidx.0][(fidx.1, fidx.2)] 150 | } 151 | } 152 | 153 | #[cfg(feature = "string_index")] 154 | impl<'a> Index<&str> for Segment<'a> { 155 | type Output = &'a str; 156 | /// Access Field as string reference 157 | fn index(&self, fidx: &str) -> &Self::Output { 158 | let sections = fidx.split('.').collect::>(); 159 | let stringnum = sections[0] 160 | .chars() 161 | .filter(|c| c.is_digit(10)) 162 | .collect::(); 163 | let mut idx: usize = stringnum.parse().unwrap(); 164 | // MSH segment has an off-by-one problem in that the first 165 | // field separator is considered to be a field in the spec 166 | // https://hl7-definition.caristix.com/v2/HL7v2.8/Segments/MSH 167 | if self.fields[0].source == "MSH" { 168 | if idx == 1 { 169 | // return &&self.source[3..3]; //TODO figure out how to return a string ref safely 170 | return &"|"; 171 | } else { 172 | idx = idx - 1 173 | } 174 | } 175 | match sections.len() { 176 | 1 => &self[idx], 177 | _ => { 178 | if idx < self.fields.len() { 179 | &self.fields[idx][sections[1..].join(".")] 180 | } else { 181 | &"" 182 | } 183 | } 184 | } 185 | } 186 | } 187 | 188 | #[cfg(feature = "string_index")] 189 | impl<'a> Index for Segment<'a> { 190 | type Output = &'a str; 191 | 192 | /// Access Segment, Field, or sub-field string references by string index 193 | fn index(&self, idx: String) -> &Self::Output { 194 | &self[idx.as_str()] 195 | } 196 | } 197 | 198 | #[cfg(test)] 199 | mod tests { 200 | use crate::Message; 201 | use std::convert::TryFrom; 202 | 203 | #[test] 204 | fn ensure_numeric_index() { 205 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment^sub&segment"; 206 | let msg = Message::try_from(hl7).unwrap(); 207 | let x = &msg.segments[1]; 208 | let (f, c, s) = (x[1], x[(1, 0)], x[(1, 0, 1)]); 209 | assert_eq!(f, "segment^sub&segment"); 210 | assert_eq!(c, f); 211 | assert_eq!(s, "sub&segment"); 212 | } 213 | 214 | #[test] 215 | fn ensure_string_query() { 216 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment^sub&segment"; 217 | let msg = Message::try_from(hl7).unwrap(); 218 | let x = &msg.segments[1]; 219 | let (f, c, s, oob) = ( 220 | x.query("F1"), //&str 221 | x.query("F1.R1"), // &str 222 | x.query(&*String::from("F1.R1.C1")), //String 223 | String::from(x.query("F10")) + x.query("F1.R10") + x.query("F1.R2.C10"), 224 | ); 225 | assert_eq!(f, "segment^sub&segment"); 226 | assert_eq!(c, f); 227 | assert_eq!(s, "segment"); 228 | assert_eq!(oob, ""); 229 | } 230 | 231 | #[cfg(feature = "string_index")] 232 | mod string_index_tests { 233 | use super::*; 234 | #[test] 235 | fn ensure_string_index() { 236 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment^sub&segment"; 237 | let msg = Message::try_from(hl7).unwrap(); 238 | let x = &msg.segments[1]; 239 | let (f, c, s, oob) = ( 240 | x["F1"], // &str 241 | x["F1.R1"], // &str 242 | x["F1.R1.C1".to_owned()], // String 243 | x["F1.R2.C2"], 244 | ); 245 | assert_eq!(f, "segment^sub&segment"); 246 | assert_eq!(c, "segment^sub&segment"); 247 | assert_eq!(s, "segment"); 248 | assert_eq!(oob, ""); 249 | } 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /src/fields.rs: -------------------------------------------------------------------------------- 1 | use super::separators::Separators; 2 | use super::*; 3 | use std::fmt::Display; 4 | use std::ops::Index; 5 | 6 | /// Represents a single field inside the HL7. Note that fields can include repeats, components and sub-components. 7 | /// See [the spec](http://www.hl7.eu/HL7v2x/v251/std251/ch02.html#Heading13) for more info 8 | #[derive(Debug, PartialEq)] 9 | pub struct Field<'a> { 10 | pub source: &'a str, 11 | delims: Separators, 12 | pub repeats: Vec<&'a str>, 13 | pub components: Vec>, 14 | pub subcomponents: Vec>>, 15 | } 16 | 17 | impl<'a> Field<'a> { 18 | /// Convert the given line of text into a field. 19 | pub fn parse>( 20 | input: S, 21 | delims: &Separators, 22 | ) -> Result, Hl7ParseError> { 23 | let input = input.into(); 24 | let repeats: Vec<&'a str> = input.split(delims.repeat).collect(); 25 | let components: Vec> = repeats 26 | .iter() 27 | .map(|r| r.split(delims.component).collect::>()) 28 | .collect(); 29 | let subcomponents: Vec>> = components 30 | .iter() 31 | .map(|r| { 32 | r.iter() 33 | .map(|c| c.split(delims.subcomponent).collect::>()) 34 | .collect::>>() 35 | }) 36 | .collect(); 37 | let field = Field { 38 | source: input, 39 | delims: *delims, 40 | repeats, 41 | components, 42 | subcomponents, 43 | }; 44 | Ok(field) 45 | } 46 | 47 | /// Used to hide the removal of NoneError for #2... 48 | /// If passed `Some()` value it returns a field with that value. 49 | /// If passed `None` it returns an `Err(Hl7ParseError::MissingRequiredValue{})` 50 | pub fn parse_mandatory( 51 | input: Option<&'a str>, 52 | delims: &Separators, 53 | ) -> Result, Hl7ParseError> { 54 | match input { 55 | Some(string_value) => Field::parse(string_value, delims), 56 | None => Err(Hl7ParseError::MissingRequiredValue {}), 57 | } 58 | } 59 | 60 | /// Converts a possibly blank string into a possibly blank field! 61 | /// Note this handles optional fields, not the nul (`""`) value. 62 | /// Specfically: 63 | /// - If passed `None` it returns `Ok(None)` 64 | /// - If passed `Some("")` it returns `Ok(None)` 65 | /// - If Passed `Some(real_value)` it returns `Ok(Some(Field))` 66 | pub fn parse_optional( 67 | input: Option<&'a str>, 68 | delims: &Separators, 69 | ) -> Result>, Hl7ParseError> { 70 | match input { 71 | None => Ok(None), 72 | Some("") => Ok(None), 73 | Some(x) => Ok(Some(Field::parse(x, delims)?)), 74 | } 75 | } 76 | 77 | /// Compatibility method to get the underlying value of this field. 78 | /// NOTE that this is deprecated as a duplicate of [`Field::as_str()`]. 79 | /// 80 | /// This function was chosen as the deprecation victim as a future version of the library may include strongly typed Field's (eg DateTime) 81 | /// at which point a generically typed 'value()' function will need to be implemented. 82 | #[inline] 83 | #[deprecated( 84 | since = "0.6.0", 85 | note = "This function is a duplicate of the `as_str()` function which should be used instead." 86 | )] 87 | pub fn value(&self) -> &'a str { 88 | self.source 89 | } 90 | 91 | /// Gets the raw string value that was used to create this field. This method does not allocate. 92 | #[inline] 93 | pub fn as_str(&'a self) -> &'a str { 94 | self.source 95 | } 96 | 97 | /// Access string reference of a Field component by String index 98 | /// Adjust the index by one as medical people do not count from zero 99 | pub fn query<'b, S>(&self, sidx: S) -> &'a str 100 | where 101 | S: Into<&'b str>, 102 | { 103 | let sidx = sidx.into(); 104 | let parts = sidx.split('.').collect::>(); 105 | 106 | if parts.len() == 1 { 107 | let stringnums = parts[0] 108 | .chars() 109 | .filter(|c| c.is_ascii_digit()) 110 | .collect::(); 111 | let idx: usize = stringnums.parse().unwrap(); 112 | 113 | self[idx - 1] 114 | } else if parts.len() == 2 { 115 | let stringnums = parts[0] 116 | .chars() 117 | .filter(|c| c.is_ascii_digit()) 118 | .collect::(); 119 | 120 | let idx0: usize = stringnums.parse().unwrap(); 121 | 122 | let stringnums = parts[1] 123 | .chars() 124 | .filter(|c| c.is_ascii_digit()) 125 | .collect::(); 126 | 127 | let idx1: usize = stringnums.parse().unwrap(); 128 | 129 | self[(idx0 - 1, idx1 - 1)] 130 | } else { 131 | "" 132 | } 133 | } 134 | } 135 | 136 | impl<'a> Display for Field<'a> { 137 | /// Required for to_string() and other formatter consumers 138 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 139 | write!(f, "{}", self.source) 140 | } 141 | } 142 | 143 | impl<'a> Clone for Field<'a> { 144 | /// Creates a new Message object using a clone of the original's source 145 | fn clone(&self) -> Self { 146 | Field::parse(self.source, &self.delims.clone()).unwrap() 147 | } 148 | } 149 | 150 | impl<'a> Index for Field<'a> { 151 | type Output = &'a str; 152 | /// Access string reference of a Field component by numeric index 153 | fn index(&self, idx: usize) -> &Self::Output { 154 | if idx > self.repeats.len() - 1 { 155 | return &""; //TODO: We're returning &&str here which doesn't seem right?!? 156 | } 157 | 158 | &self.repeats[idx] 159 | } 160 | } 161 | 162 | impl<'a> Index<(usize, usize)> for Field<'a> { 163 | type Output = &'a str; 164 | /// Access string reference of a Field subcomponent by numeric index 165 | fn index(&self, idx: (usize, usize)) -> &Self::Output { 166 | if idx.0 > self.repeats.len() - 1 || idx.1 > self.components[idx.0].len() - 1 { 167 | return &""; //TODO: We're returning &&str here which doesn't seem right?!? 168 | } 169 | 170 | &self.components[idx.0][idx.1] 171 | } 172 | } 173 | 174 | impl<'a> Index<(usize, usize, usize)> for Field<'a> { 175 | type Output = &'a str; 176 | /// Access string reference of a Field subcomponent by numeric index 177 | fn index(&self, idx: (usize, usize, usize)) -> &Self::Output { 178 | if idx.0 > self.repeats.len() - 1 179 | || idx.1 > self.components[idx.0].len() - 1 180 | || idx.2 > self.subcomponents[idx.0][idx.1].len() - 1 181 | { 182 | return &""; //TODO: We're returning &&str here which doesn't seem right?!? 183 | } 184 | 185 | &self.subcomponents[idx.0][idx.1][idx.2] 186 | } 187 | } 188 | 189 | #[cfg(feature = "string_index")] 190 | impl<'a> Index for Field<'a> { 191 | type Output = &'a str; 192 | 193 | /// Access string reference of a Field component by String index 194 | #[cfg(feature = "string_index")] 195 | fn index(&self, sidx: String) -> &Self::Output { 196 | let parts = sidx.split('.').collect::>(); 197 | match parts.len() { 198 | 1 => { 199 | let stringnums = parts[0] 200 | .chars() 201 | .filter(|c| c.is_digit(10)) 202 | .collect::(); 203 | let idx: usize = stringnums.parse().unwrap(); 204 | 205 | &self[idx - 1] 206 | } 207 | 2 => { 208 | let stringnums = parts[0] 209 | .chars() 210 | .filter(|c| c.is_digit(10)) 211 | .collect::(); 212 | 213 | let idx0: usize = stringnums.parse().unwrap(); 214 | 215 | let stringnums = parts[1] 216 | .chars() 217 | .filter(|c| c.is_digit(10)) 218 | .collect::(); 219 | 220 | let idx1: usize = stringnums.parse().unwrap(); 221 | 222 | &self[(idx0 - 1, idx1 - 1)] 223 | } 224 | 3 => { 225 | let stringnums = parts[0] 226 | .chars() 227 | .filter(|c| c.is_digit(10)) 228 | .collect::(); 229 | 230 | let idx0: usize = stringnums.parse().unwrap(); 231 | 232 | let stringnums = parts[1] 233 | .chars() 234 | .filter(|c| c.is_digit(10)) 235 | .collect::(); 236 | 237 | let idx1: usize = stringnums.parse().unwrap(); 238 | 239 | let stringnums = parts[2] 240 | .chars() 241 | .filter(|c| c.is_digit(10)) 242 | .collect::(); 243 | 244 | let idx2: usize = stringnums.parse().unwrap(); 245 | 246 | &self[(idx0 - 1, idx1 - 1, idx2 - 1)] 247 | } 248 | _ => &"", 249 | } 250 | } 251 | } 252 | 253 | #[cfg(feature = "string_index")] 254 | impl<'a> Index<&str> for Field<'a> { 255 | type Output = &'a str; 256 | 257 | /// Access Segment, Field, or sub-field string references by string index 258 | #[cfg(feature = "string_index")] 259 | fn index(&self, idx: &str) -> &Self::Output { 260 | &self[String::from(idx)] 261 | } 262 | } 263 | 264 | #[cfg(test)] 265 | mod tests { 266 | use super::*; 267 | 268 | #[test] 269 | fn test_conditional_parse_handles_none() { 270 | let d = Separators::default(); 271 | 272 | //if we pass a none value, we get a None back 273 | match Field::parse_optional(None, &d) { 274 | Ok(None) => assert!(true), 275 | _ => assert!(false), 276 | } 277 | } 278 | 279 | #[test] 280 | fn test_conditional_parse_handles_empty_string() { 281 | let d = Separators::default(); 282 | 283 | //an empty string (as seen when `split()`ing) should be none 284 | match Field::parse_optional(Some(""), &d) { 285 | Ok(None) => assert!(true), 286 | _ => assert!(false), 287 | } 288 | } 289 | 290 | #[test] 291 | fn test_conditional_parse_handles_value_string() { 292 | let d = Separators::default(); 293 | 294 | //an empty string (as seen when `split()`ing) should be none 295 | match Field::parse_optional(Some("xxx"), &d) { 296 | Ok(Some(field)) => assert_eq!(field.as_str(), "xxx"), 297 | _ => assert!(false), 298 | } 299 | } 300 | 301 | #[test] 302 | fn test_parse_mandatory_handles_some_value() { 303 | let d = Separators::default(); 304 | 305 | match Field::parse_mandatory(Some("xxx"), &d) { 306 | Ok(field) => assert_eq!(field.as_str(), "xxx"), 307 | _ => assert!(false), 308 | } 309 | } 310 | 311 | #[test] 312 | fn test_parse_mandatory_throws_on_none() { 313 | let d = Separators::default(); 314 | 315 | match Field::parse_mandatory(None, &d) { 316 | Err(Hl7ParseError::MissingRequiredValue()) => assert!(true), 317 | _ => assert!(false), 318 | } 319 | } 320 | #[test] 321 | fn test_parse_repeats() { 322 | let d = Separators::default(); 323 | let f = Field::parse_mandatory(Some("x&x^y&y~a&a^b&b"), &d).unwrap(); 324 | assert_eq!(f.repeats.len(), 2) 325 | } 326 | 327 | #[test] 328 | fn test_parse_components() { 329 | let d = Separators::default(); 330 | let f = Field::parse_mandatory(Some("xxx^yyy"), &d).unwrap(); 331 | assert_eq!(f.components[0].len(), 2) 332 | } 333 | 334 | #[test] 335 | fn test_parse_subcomponents() { 336 | let d = Separators::default(); 337 | let f = Field::parse_mandatory(Some("xxx^yyy&zzz"), &d).unwrap(); 338 | assert_eq!(f.subcomponents[0][1].len(), 2) 339 | } 340 | 341 | #[test] 342 | fn test_to_string() { 343 | let d = Separators::default(); 344 | let f = Field::parse_mandatory(Some("xxx^yyy&zzz"), &d).unwrap(); 345 | assert_eq!(f.to_string(), String::from("xxx^yyy&zzz")) 346 | } 347 | 348 | #[test] 349 | fn test_clone() { 350 | let d = Separators::default(); 351 | let f = Field::parse_mandatory(Some("xxx^yyy&zzz"), &d).unwrap(); 352 | assert_eq!(f.to_string(), f.clone().as_str()) 353 | } 354 | 355 | #[test] 356 | fn test_uint_index() { 357 | let d = Separators::default(); 358 | let f = Field::parse_mandatory(Some("xxx^yyy&zzz"), &d).unwrap(); 359 | assert_eq!(f[(0, 1)], "yyy&zzz"); 360 | assert_eq!(f[(0, 1, 1)], "zzz"); 361 | } 362 | 363 | #[test] 364 | fn test_string_query() { 365 | let d = Separators::default(); 366 | let f = Field::parse_mandatory(Some("x&x^y&y~a&a^b&b"), &d).unwrap(); 367 | let idx0 = String::from("R2"); 368 | let oob = "R2.C3"; 369 | assert_eq!(f.query(&*idx0), "a&a^b&b"); 370 | assert_eq!(f.query("R2.C2"), "b&b"); 371 | assert_eq!(f.query(oob), ""); 372 | } 373 | 374 | #[cfg(feature = "string_index")] 375 | mod string_index_tests { 376 | use super::*; 377 | #[test] 378 | fn test_string_index() { 379 | let d = Separators::default(); 380 | let f = Field::parse_mandatory(Some("x&x^y&y~a&a^b&b"), &d).unwrap(); 381 | assert_eq!(f["R2"], "a&a^b&b"); 382 | assert_eq!(f["R2.C2"], "b&b"); 383 | assert_eq!(f["R2.C3"], ""); 384 | } 385 | } 386 | } 387 | -------------------------------------------------------------------------------- /src/message.rs: -------------------------------------------------------------------------------- 1 | use super::segments::Segment; 2 | use super::separators::Separators; 3 | use super::*; 4 | use std::convert::TryFrom; 5 | use std::fmt::Display; 6 | use std::ops::Index; 7 | 8 | /// A Message is an entire HL7 message parsed into it's constituent segments, fields, repeats and subcomponents, 9 | /// and it consists of (1 or more) Segments. 10 | /// Message parses the source string into `&str` slices (minimising copying) and can be created using either the [`Message::new()`] function or `TryFrom::try_from()` impl. 11 | /// ## Example: 12 | /// ``` 13 | /// # use rusthl7::Hl7ParseError; 14 | /// # use rusthl7::Message; 15 | /// use std::convert::TryFrom; 16 | /// # fn main() -> Result<(), Hl7ParseError> { 17 | /// let source = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|1|Foo\rOBR|2|Bar"; 18 | /// let m = Message::new(source); // Note that this method can panic 19 | /// let result = Message::try_from(source); // while try_from() returns a `Result` 20 | /// assert!(result.is_ok()); 21 | /// # Ok(()) 22 | /// # } 23 | /// ``` 24 | #[derive(Debug, PartialEq)] 25 | pub struct Message<'a> { 26 | source: &'a str, 27 | pub segments: Vec>, 28 | separators: Separators, 29 | } 30 | 31 | impl<'a> Message<'a> { 32 | /// Takes the source HL7 string and parses it into a message. Segments 33 | /// and other data are slices (`&str`) into the source HL7 for minimal (preferably 0) copying. 34 | /// ⚠ If an error occurs this method will panic (for back-compat reasons)! For the preferred non-panicing alternative import the `std::convert::TryFrom` trait and use the `try_from()` function. ⚠ 35 | /// ## Example: 36 | /// ``` 37 | /// # use rusthl7::Hl7ParseError; 38 | /// # use rusthl7::Message; 39 | /// # use std::convert::TryFrom; 40 | /// # fn main() -> Result<(), Hl7ParseError> { 41 | /// let m = Message::try_from("MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4")?; 42 | /// # Ok(()) 43 | /// # } 44 | /// ``` 45 | pub fn new(source: &'a str) -> Message<'a> { 46 | Message::try_from(source).unwrap() 47 | } 48 | 49 | /// Queries for segments of the given type (i.e. matches by identifier, or name), returning a set of 0 or more segments. 50 | /// ## Example: 51 | /// ``` 52 | /// # use rusthl7::Hl7ParseError; 53 | /// # use rusthl7::Message; 54 | /// # fn main() -> Result<(), Hl7ParseError> { 55 | /// let source = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|1|Foo\rOBR|2|Bar"; 56 | /// let m = Message::new(source); 57 | /// let obr_segments = m.segments_by_identifier("OBR")?; 58 | /// assert_eq!(obr_segments.len(), 2); 59 | /// # Ok(()) 60 | /// # } 61 | /// ``` 62 | pub fn segments_by_identifier(&self, name: &str) -> Result>, Hl7ParseError> { 63 | let found: Vec<&Segment<'a>> = self 64 | .segments 65 | .iter() 66 | .filter(|s| s.identifier() == name) 67 | .collect(); 68 | Ok(found) 69 | } 70 | 71 | /// Present input vectors of &generics to vectors of &str 72 | pub fn segments_to_str_vecs( 73 | segments: Vec<&'a Segment<'a>>, 74 | ) -> Result>, Hl7ParseError> { 75 | let vecs = segments 76 | .iter() 77 | .map(|s| s.fields.iter().map(|f| f.as_str()).collect()) 78 | .collect(); 79 | 80 | Ok(vecs) 81 | } 82 | 83 | /// Returns the source string slice used to create this Message initially. This method does not allocate. 84 | /// ## Example: 85 | /// ``` 86 | /// # use rusthl7::Hl7ParseError; 87 | /// # use rusthl7::Message; 88 | /// # use std::convert::TryFrom; 89 | /// # fn main() -> Result<(), Hl7ParseError> { 90 | /// let source = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4"; 91 | /// let m = Message::try_from(source)?; 92 | /// assert_eq!(source, m.as_str()); 93 | /// # Ok(()) 94 | /// # } 95 | /// ``` 96 | #[inline] 97 | pub fn as_str(&self) -> &'a str { 98 | self.source 99 | } 100 | 101 | /// Gets the delimiter information for this Message. 102 | /// Remember that in HL7 _each individual message_ can have unique characters as separators between fields, repeats, components and sub-components, and so this is a per-message value. 103 | /// This method does not allocate 104 | pub fn get_separators(&self) -> Separators { 105 | self.separators 106 | } 107 | 108 | /// Access Segment, Field, or sub-field string references by string index 109 | pub fn query<'b, S>(&self, idx: S) -> &'a str 110 | where 111 | S: Into<&'b str>, 112 | { 113 | let idx = idx.into(); 114 | 115 | // Parse index elements 116 | let indices = Self::parse_query_string(idx); 117 | let seg_name = indices[0]; 118 | // Find our first segment without offending the borow checker 119 | let seg_index = self 120 | .segments 121 | .iter() 122 | .position(|r| &r.as_str()[..seg_name.len()] == seg_name) 123 | .expect("Segment not found"); 124 | let seg = &self.segments[seg_index]; 125 | if indices.len() < 2 { 126 | seg.source 127 | } else { 128 | let query = indices[1..].join("."); 129 | seg.query(&*query) 130 | } 131 | } 132 | 133 | /// Parse query/index string to fill-in missing values. 134 | /// Required when conumer requests "PID.F3.C1" to pass integers down 135 | /// to the usize indexers at the appropriate positions 136 | fn parse_query_string(query: &str) -> Vec<&str> { 137 | fn query_idx_pos(indices: &[&str], idx: &str) -> Option { 138 | indices[1..] 139 | .iter() 140 | .position(|r| r[0..1].to_uppercase() == idx) 141 | } 142 | let indices: Vec<&str> = query.split('.').collect(); 143 | // Leave segment name untouched - complex match 144 | let mut res = vec![indices[0]]; 145 | // Get segment positions, if any 146 | let sub_pos = query_idx_pos(&indices, "S"); 147 | let com_pos = query_idx_pos(&indices, "C"); 148 | let rep_pos = query_idx_pos(&indices, "R"); 149 | let fld_pos = query_idx_pos(&indices, "F"); 150 | // Push segment values to result, returning early if possible 151 | match fld_pos { 152 | Some(f) => res.push(indices[f + 1]), 153 | None => { 154 | // If empty but we have subsections, default to F1 155 | if rep_pos.is_some() || com_pos.is_some() || sub_pos.is_some() { 156 | res.push("F1") 157 | } else { 158 | return res; 159 | } 160 | } 161 | }; 162 | match rep_pos { 163 | Some(r) => res.push(indices[r + 1]), 164 | None => { 165 | // If empty but we have subsections, default to R1 166 | if com_pos.is_some() || sub_pos.is_some() { 167 | res.push("R1") 168 | } else { 169 | return res; 170 | } 171 | } 172 | }; 173 | match com_pos { 174 | Some(c) => res.push(indices[c + 1]), 175 | None => { 176 | // If empty but we have a subcomponent, default to C1 177 | if sub_pos.is_some() { 178 | res.push("C1") 179 | } else { 180 | return res; 181 | } 182 | } 183 | }; 184 | if let Some(s) = sub_pos { 185 | res.push(indices[s + 1]) 186 | } 187 | res 188 | } 189 | } 190 | 191 | impl<'a> TryFrom<&'a str> for Message<'a> { 192 | type Error = Hl7ParseError; 193 | 194 | /// Takes the source HL7 string and parses it into a message. Segments 195 | /// and other data are slices (`&str`) into the source HL7 for minimal (preferably 0) copying. 196 | /// ## Example: 197 | /// ``` 198 | /// # use rusthl7::Hl7ParseError; 199 | /// # use rusthl7::Message; 200 | /// # use std::convert::TryFrom; 201 | /// # fn main() -> Result<(), Hl7ParseError> { 202 | /// let m = Message::try_from("MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4")?; 203 | /// # Ok(()) 204 | /// # } 205 | /// ``` 206 | fn try_from(source: &'a str) -> Result { 207 | let separators = str::parse::(source)?; 208 | 209 | let possible = source 210 | .split(separators.segment) 211 | .map(|line| Segment::parse(line, &separators)); 212 | 213 | let segments: Vec = possible.collect::, Self::Error>>()?; 214 | 215 | let m = Message { 216 | source, 217 | segments, 218 | separators, 219 | }; 220 | 221 | Ok(m) 222 | } 223 | } 224 | 225 | impl<'a> Display for Message<'a> { 226 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 227 | write!(f, "{}", self.source) 228 | } 229 | } 230 | 231 | impl<'a> Clone for Message<'a> { 232 | /// Creates a new cloned Message object referencing the same source slice as the original. 233 | /// ## Example: 234 | /// ``` 235 | /// # use rusthl7::Hl7ParseError; 236 | /// # use rusthl7::Message; 237 | /// # use std::convert::TryFrom; 238 | /// # fn main() -> Result<(), Hl7ParseError> { 239 | /// let m = Message::try_from("MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4")?; 240 | /// let cloned = m.clone(); // this object is looking at the same string slice as m 241 | /// # Ok(()) 242 | /// # } 243 | /// ``` 244 | fn clone(&self) -> Self { 245 | Message::try_from(self.source).unwrap() 246 | } 247 | } 248 | 249 | impl<'a> Index for Message<'a> { 250 | type Output = &'a str; 251 | 252 | /// Access Segment string reference by numeric index 253 | fn index(&self, idx: usize) -> &Self::Output { 254 | if idx > self.segments.len() { 255 | return &""; 256 | } 257 | &self.segments[idx].source 258 | } 259 | } 260 | #[cfg(feature = "string_index")] 261 | impl<'a> Index for Message<'a> { 262 | type Output = &'a str; 263 | 264 | /// Access Segment, Field, or sub-field string references by string index 265 | #[cfg(feature = "string_index")] 266 | fn index(&self, idx: String) -> &Self::Output { 267 | // Parse index elements 268 | let indices = Self::parse_query_string(&idx); 269 | let seg_name = indices[0]; 270 | // Find our first segment without offending the borow checker 271 | let seg_index = self 272 | .segments 273 | .iter() 274 | .position(|r| &r.as_str()[..seg_name.len()] == seg_name) 275 | .expect("Segment not found"); 276 | let seg = &self.segments[seg_index]; 277 | if indices.len() < 2 { 278 | &seg.source 279 | } else { 280 | &seg[indices[1..].join(".")] 281 | } 282 | } 283 | } 284 | 285 | #[cfg(feature = "string_index")] 286 | impl<'a> Index<&str> for Message<'a> { 287 | type Output = &'a str; 288 | 289 | #[cfg(feature = "string_index")] 290 | fn index(&self, idx: &str) -> &Self::Output { 291 | &self[String::from(idx)] 292 | } 293 | } 294 | 295 | #[cfg(test)] 296 | mod tests { 297 | use super::*; 298 | 299 | #[test] 300 | fn ensure_segments_are_returned() -> Result<(), Hl7ParseError> { 301 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 302 | let msg = Message::try_from(hl7)?; 303 | 304 | assert_eq!(msg.segments.len(), 2); 305 | Ok(()) 306 | } 307 | 308 | #[test] 309 | fn ensure_missing_segments_are_not_found() -> Result<(), Hl7ParseError> { 310 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 311 | let msg = Message::try_from(hl7)?; 312 | assert_eq!(msg.segments_by_identifier("EVN").unwrap().len(), 0); 313 | Ok(()) 314 | } 315 | 316 | #[test] 317 | fn ensure_segments_convert_to_vectors() -> Result<(), Hl7ParseError> { 318 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 319 | let msg = Message::try_from(hl7)?; 320 | let segs = msg.segments_by_identifier("OBR")?; 321 | let sval = segs.first().unwrap().fields.first().unwrap().as_str(); 322 | let vecs = Message::segments_to_str_vecs(segs).unwrap(); 323 | let vval = vecs.first().unwrap().first().unwrap(); 324 | 325 | assert_eq!(vval, &sval); 326 | Ok(()) 327 | } 328 | #[test] 329 | fn ensure_clones_are_owned() -> Result<(), Hl7ParseError> { 330 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 331 | let msg = Message::try_from(hl7)?; 332 | // Verify that we can clone and take ownership 333 | let dolly = msg.clone(); 334 | let dolly = dolly.to_owned(); 335 | assert_eq!(msg.query("MSH.F7"), dolly.query("MSH.F7")); 336 | Ok(()) 337 | } 338 | 339 | #[test] 340 | fn ensure_to_string() -> Result<(), Hl7ParseError> { 341 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 342 | let msg = Message::try_from(hl7)?; 343 | assert_eq!(msg.to_string(), String::from(hl7)); 344 | Ok(()) 345 | } 346 | 347 | #[test] 348 | fn ensure_message_creation() -> Result<(), Hl7ParseError> { 349 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment"; 350 | let msg0 = Message::try_from(hl7)?; 351 | let msg1 = Message::new(hl7); 352 | 353 | assert_eq!(msg0, msg1); 354 | Ok(()) 355 | } 356 | 357 | #[test] 358 | fn ensure_query() -> Result<(), Hl7ParseError> { 359 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment^sub&segment"; 360 | let msg = Message::try_from(hl7)?; 361 | assert_eq!(msg.query("OBR.F1.R1.C2"), "sub&segment"); 362 | assert_eq!(msg.query(&*"OBR.F1.R1.C1".to_string()), "segment"); // Test the Into param with a String 363 | assert_eq!(msg.query(&*String::from("OBR.F1.R1.C1")), "segment"); 364 | assert_eq!(msg.query("MSH.F1"), "^~\\&"); 365 | Ok(()) 366 | } 367 | 368 | #[cfg(feature = "string_index")] 369 | mod string_index_tests { 370 | use super::*; 371 | #[test] 372 | fn ensure_index() -> Result<(), Hl7ParseError> { 373 | let hl7 = "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rOBR|segment^sub&segment"; 374 | let msg = Message::try_from(hl7)?; 375 | assert_eq!(msg["OBR.F1.R1.C2"], "sub&segment"); 376 | assert_eq!(msg[&*"OBR.F1.R1.C1".to_string()], "segment"); // Test the Into param with a String 377 | assert_eq!(msg[String::from("OBR.F1.R1.C1")], "segment"); 378 | assert_eq!(msg[String::from("OBR.F1.C1")], "segment"); // Test missing element in selector 379 | assert_eq!(msg[String::from("OBR.F1.R1.C2.S1")], "sub"); 380 | println!("{}", Message::parse_query_string("MSH.F2").join(".")); 381 | assert_eq!(msg["MSH.F2"], "^~\\&"); 382 | Ok(()) 383 | } 384 | } 385 | } 386 | -------------------------------------------------------------------------------- /src/escape_sequence.rs: -------------------------------------------------------------------------------- 1 | use log::{debug, trace}; 2 | use regex::Regex; 3 | 4 | use crate::separators::Separators; 5 | use std::borrow::Cow; 6 | 7 | /// This struct provides the decoding functionality to parse escape sequences from the source string back to their original chars. 8 | /// 9 | /// For more info see [here](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) or [here](https://confluence.hl7australia.com/display/OOADRM20181/Appendix+1+Parsing+HL7v2#Appendix1ParsingHL7v2-Dealingwithreservedcharactersanddelimiters) 10 | /// 11 | /// ## Example: 12 | /// ``` 13 | /// # use rusthl7::EscapeSequence; 14 | /// # use rusthl7::Separators; 15 | /// let delims = Separators::default(); 16 | /// let decoder = EscapeSequence::new(delims); 17 | /// let hl7_field_value = r#"Obstetrician \T\ Gynaecologist"#; 18 | /// let decoded = decoder.decode(hl7_field_value); 19 | /// assert_eq!(decoded, r#"Obstetrician & Gynaecologist"#); 20 | /// ``` 21 | /// 22 | /// ## Details 23 | /// 24 | /// This decoder will replace some, **but not all** of the standard HL7 escape sequences. 25 | /// - `\E\`,`\F\`, '\R\`, `\S\`, `\T\` are all handled, and replaced with the Escape, Field, Repeat, Component and Sub-Component separator chars respectively 26 | /// - `\X..\` hexidecimal erscape sequences are supported (2 hex digits per char) 27 | /// 28 | /// The following sequences are **NOT** replaced by design and will be left in the string: 29 | /// - `\H\` Indicates the start of highlighted text, this is a consuming application problem and will not be replaced. 30 | /// - `\N\` Indicates the end of highlighted text and resumption of normal text. This is a consuming application problem and will not be replaced. 31 | /// - `\Z...\` Custom application escape sequences, these are custom (as are most `Z` items in HL7) and will not be replaced. 32 | /// 33 | /// Also, not all of the sequences that _should_ be replaced are currently being handled, specifically: 34 | /// /// - `\Cxxyy\`, '\Mxxyyzz\ arguably _should_ be handled, but aren't currently. There's [some suggestion](https://confluence.hl7australia.com/display/OOADRM20181/Appendix+1+Parsing+HL7v2#Appendix1ParsingHL7v2-Unicodecharacters) that these are discouraged in lieu of html-escaped values 35 | /// 36 | /// If there's _no possibility_ of escape sequences (because there's no escape characters, typically backslashes) in the value, this function short circuits as early as possible and returns the original string slice for optimum performance. 37 | pub struct EscapeSequence { 38 | escape_buf: [u8; 1], 39 | field_buf: [u8; 1], 40 | repeat_buf: [u8; 1], 41 | component_buf: [u8; 1], 42 | subcomponent_buf: [u8; 1], 43 | escape_regex: Regex, 44 | } 45 | 46 | impl<'a> EscapeSequence { 47 | /// Create a new struct ready for processing of escape sequences. 48 | /// Escape sequences in HL7 are dependent on the actual delimiters used _for that message_, and so we need a [Separators] instance to know what chars we're working with. 49 | /// 50 | /// Creating a new [EscapeSequence] does involve some non-trivial work in order to improve the performance of the `decode()` operations. It's expected that instances of this struct will be cached 51 | /// per message, or per sending application if it will always use the same separators, or for the lifetime of the process if you're only dealing with known (often default) separators. 52 | pub fn new(delims: Separators) -> EscapeSequence { 53 | let regex = if delims.escape_char == '\\' { 54 | Regex::new(r#"\\"#) // needs special handling because backslashes have meaning in regexes, and need to be escaped 55 | } else { 56 | Regex::new(String::from(delims.escape_char).as_str()) //everything else just works (I hope!) 57 | } 58 | .unwrap(); 59 | 60 | let mut return_val = EscapeSequence { 61 | escape_buf: [0; 1], // The spec specifically requires single byte (actually 7-bit ASCII) delim chars 62 | field_buf: [0; 1], 63 | repeat_buf: [0; 1], 64 | component_buf: [0; 1], 65 | subcomponent_buf: [0; 1], 66 | escape_regex: regex, 67 | }; 68 | 69 | // We need &str to inject into the output buffer, convert the `Char` here 70 | let _bytes = delims.escape_char.encode_utf8(&mut return_val.escape_buf); 71 | let _bytes = delims.field.encode_utf8(&mut return_val.field_buf); 72 | let _bytes = delims.repeat.encode_utf8(&mut return_val.repeat_buf); 73 | let _bytes = delims.component.encode_utf8(&mut return_val.component_buf); 74 | let _bytes = delims 75 | .subcomponent 76 | .encode_utf8(&mut return_val.subcomponent_buf); 77 | 78 | return_val 79 | } 80 | 81 | /// This is where the magic happens. Call this to update any escape sequences in the given &str. 82 | pub fn decode(&self, input: S) -> Cow<'a, str> 83 | where 84 | S: Into>, 85 | { 86 | // The comments below will almost certainly reference backslashes as that is by far the most common escape character 87 | // the reality is any reference to "backslash" is actually referencing the escape char in the MSH segemnt, and stored in `self.delims.escape_char` 88 | 89 | let input = input.into(); 90 | let first = self.escape_regex.find(&input); //using `regex.find` here is about twice as fast for the 'no sequences' benchmark as using &str.find()... 91 | 92 | match first { 93 | Some(first) => { 94 | let first = first.start(); 95 | 96 | // We know there's a backslash, so we need to process stuff 97 | 98 | // we're going to be replacing (mainly) 3 char escape sequences (eg `\F\`) with a single char sequence (eg `|`) so the initial length of the input should be sufficient 99 | let mut output: Vec = Vec::with_capacity(input.len()); 100 | output.extend_from_slice(input[0..first].as_bytes()); // this doesn't include the escape char we found 101 | 102 | // index in input that we're up to 103 | let mut i = first; 104 | 105 | debug!("Found first escape char at {}", first); 106 | 107 | while i < input.len() { 108 | let start_of_sequence = self.escape_regex.find(&input[i..]); 109 | if start_of_sequence.is_none() { 110 | // there's nothing left to process, no more backslashes in the rest of the buffer 111 | 112 | trace!("No more sequence starts in input, completing..."); 113 | output.extend_from_slice(input[i..].as_bytes()); // add the rest of the input 114 | break; // break out of while loop 115 | } 116 | 117 | let start_index = start_of_sequence.unwrap().start() + i; // index is offset into input by i chars as that's what's we subsliced above 118 | trace!("Found the next escape char at {}", start_index); 119 | 120 | let end_of_sequence = self.escape_regex.find(&input[start_index + 1..]); 121 | 122 | if end_of_sequence.is_none() { 123 | // there's nothing left to process, the backslash we are curently looking at is NOT an escape sequence 124 | trace!("No more sequence ends in input, completing..."); 125 | output.extend_from_slice(input[start_index..].as_bytes()); // add the rest of the input (including the escape char that brought us here) in one go 126 | break; // break out of while loop 127 | } 128 | 129 | // else we have found another escape char, get the slice in between 130 | let end_index = end_of_sequence.unwrap().start() + start_index + 1; // the end is the number of chars after the start_index, not from the start of input 131 | trace!("Found end of sequence at {}", end_index); 132 | 133 | let sequence = &input[start_index + 1..end_index]; 134 | trace!("Found escape sequence: '{}'", sequence); 135 | 136 | // we have a possible window of data between i and start_index that we've just read through as text, but isn't yet in output... append it now 137 | output.extend_from_slice(input[i..start_index].as_bytes()); 138 | 139 | match sequence { 140 | "E" => output.extend_from_slice(&self.escape_buf), 141 | "F" => output.extend_from_slice(&self.field_buf), 142 | "R" => output.extend_from_slice(&self.repeat_buf), 143 | "S" => output.extend_from_slice(&self.component_buf), 144 | "T" => output.extend_from_slice(&self.subcomponent_buf), 145 | 146 | // Highlighted/Normal text sequences need to remain for consuming libraries to act on as they see fit 147 | "H" | "N" => { 148 | output.extend_from_slice(&self.escape_buf); 149 | output.extend_from_slice(sequence.as_bytes()); 150 | output.extend_from_slice(&self.escape_buf); 151 | } 152 | 153 | _ => { 154 | if sequence.starts_with('Z') { 155 | trace!("Into custom escape sequence, ignoring..."); 156 | output.extend_from_slice(&self.escape_buf); 157 | output.extend_from_slice(sequence.as_bytes()); 158 | output.extend_from_slice(&self.escape_buf); 159 | } else if let Some(hex_code) = sequence.strip_prefix('X') { 160 | let hex = hex::decode(hex_code) 161 | .expect("Unable to parse X-value into valid hex"); 162 | println!("Converted hex code {} to {:?}", hex_code, hex); 163 | output.extend_from_slice(&hex); 164 | 165 | // TODO: Add more sequences 166 | } else { 167 | // not a known sequence, must just be two backslashes randomly in a string 168 | trace!("Unknown sequence, extending output..."); 169 | output.extend_from_slice( 170 | input[start_index - 1..end_index].as_bytes(), 171 | ); 172 | // include both the initial escape char, and also the final one. 173 | } 174 | } 175 | } 176 | 177 | i = end_index + 1; // move through buffer, we we've covered everything up to this point now 178 | } // while more chars in input to loop through 179 | 180 | Cow::Owned(String::from_utf8(output).unwrap()) 181 | } 182 | None => { 183 | // no escape char in the string at all, just return what we have 184 | input 185 | } 186 | } 187 | } 188 | } 189 | 190 | #[cfg(test)] 191 | mod tests { 192 | use std::str::FromStr; 193 | 194 | use super::*; 195 | 196 | #[test] 197 | fn test_decode_does_nothing_if_not_required() { 198 | let delims = Separators::default(); 199 | let escaper = EscapeSequence::new(delims); 200 | 201 | let input = "There are no escape sequences here/there/."; 202 | let output = escaper.decode(input); 203 | assert_eq!(output, input); 204 | } 205 | 206 | #[test] 207 | fn test_decode_handles_simple_x_codes() { 208 | let delims = Separators::default(); 209 | let escaper = EscapeSequence::new(delims); 210 | 211 | let input = "Escape sequence with \\X0D\\."; 212 | let output = escaper.decode(input); 213 | assert_eq!(output, "Escape sequence with \r."); 214 | } 215 | 216 | #[test] 217 | fn test_decode_handles_multi_byte_x_codes() { 218 | let delims = Separators::default(); 219 | let escaper = EscapeSequence::new(delims); 220 | 221 | let input = "Sentence 1.\\X0D0A\\Sentence 2."; 222 | let output = escaper.decode(input); 223 | assert_eq!(output, "Sentence 1.\r\nSentence 2."); 224 | } 225 | 226 | #[test] 227 | fn test_decode_does_nothing_if_backslash_is_not_escape_sequence() { 228 | let delims = Separators::default(); 229 | let escaper = EscapeSequence::new(delims); 230 | 231 | let input = r#"There are no escape sequences here\there."#; 232 | let output = escaper.decode(input); 233 | assert_eq!(output, input); 234 | } 235 | 236 | #[test] 237 | fn test_decode_handles_field_sequence() { 238 | let delims = Separators::default(); 239 | let escaper = EscapeSequence::new(delims); 240 | 241 | let input = r#"Escape this \F\ please"#; 242 | let output = escaper.decode(input); 243 | assert_eq!(output, "Escape this | please"); 244 | } 245 | 246 | #[test] 247 | fn ensure_decode_does_not_eat_chars_it_shouldnt() { 248 | let delims = Separators::default(); 249 | let escaper = EscapeSequence::new(delims); 250 | 251 | let input = r#"Escape this \F please"#; 252 | let output = escaper.decode(input); 253 | assert_eq!(output, input); 254 | } 255 | 256 | #[test] 257 | fn ensure_decode_handles_custom_delims() { 258 | let delims = Separators::from_str("MSH^!@#$").unwrap(); 259 | let escaper = EscapeSequence::new(delims); 260 | 261 | let input = r#"Escape this #F# please"#; 262 | let output = escaper.decode(input); 263 | assert_eq!(output, "Escape this ^ please"); 264 | } 265 | 266 | #[test] 267 | fn ensure_decode_handles_eescape_sequence() { 268 | let delims = Separators::default(); 269 | let escaper = EscapeSequence::new(delims); 270 | 271 | let input = r#"Escape this \E\ please"#; // convert the escape sequence 272 | let output = escaper.decode(input); 273 | assert_eq!(output, r#"Escape this \ please"#); // into a single escape char 274 | 275 | // ensure it moves on past the char it just added 276 | let input = r#"Escape this \E\ pretty \F\ please"#; // convert the escape sequence 277 | let output = escaper.decode(input); 278 | assert_eq!(output, r#"Escape this \ pretty | please"#); // into a single escape char and still handle future sequences ok 279 | } 280 | 281 | #[test] 282 | fn test_decode_handles_repeat_sequence() { 283 | let delims = Separators::default(); 284 | let escaper = EscapeSequence::new(delims); 285 | 286 | let input = r#"Escape this \R\ please"#; 287 | let output = escaper.decode(input); 288 | assert_eq!(output, "Escape this ~ please"); 289 | } 290 | 291 | #[test] 292 | fn test_decode_handles_component_sequence() { 293 | let delims = Separators::default(); 294 | let escaper = EscapeSequence::new(delims); 295 | 296 | let input = r#"Escape this \S\ please"#; 297 | let output = escaper.decode(input); 298 | assert_eq!(output, "Escape this ^ please"); 299 | } 300 | 301 | #[test] 302 | fn test_decode_handles_subcomponent_sequence() { 303 | let delims = Separators::default(); 304 | let escaper = EscapeSequence::new(delims); 305 | 306 | let input = r#"Obstetrician \T\ Gynaecologist"#; 307 | let output = escaper.decode(input); 308 | assert_eq!(output, "Obstetrician & Gynaecologist"); 309 | } 310 | 311 | #[test] 312 | fn ensure_decode_ignores_highlighting_sequence() { 313 | let delims = Separators::default(); 314 | let escaper = EscapeSequence::new(delims); 315 | 316 | let input = r#"Don't escape this \H\highlighted text\N\ please"#; 317 | let output = escaper.decode(input); 318 | assert_eq!(output, input); 319 | } 320 | 321 | #[test] 322 | fn ensure_decode_ignores_custom_sequence() { 323 | let delims = Separators::default(); 324 | let escaper = EscapeSequence::new(delims); 325 | 326 | let input = r#"Don't escape this custom sequence \Z1234\ please"#; 327 | let output = escaper.decode(input); 328 | assert_eq!(output, input); 329 | } 330 | } 331 | --------------------------------------------------------------------------------