├── .gitignore ├── .rustfmt.toml ├── src ├── soup.rs ├── explain.rs ├── disperse.rs ├── driver.rs ├── network.rs ├── retrieve.rs ├── sql.rs ├── bin │ ├── owoof-csv.rs │ └── owoof.rs ├── types.rs └── lib.rs ├── LICENSE ├── Cargo.toml ├── schema.sql └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | memes 4 | goodbooks-10k 5 | *.sqlite 6 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width=99 2 | struct_lit_width=99 3 | struct_variant_width=99 4 | match_arm_leading_pipes="Preserve" 5 | edition='2021' 6 | # I have to use nightly for this??? 7 | # enum_discrim_align_threshold=8 8 | # struct_field_align_threshold=8 9 | # ??????????? 10 | # format_code_in_doc_comments=true 11 | -------------------------------------------------------------------------------- /src/soup.rs: -------------------------------------------------------------------------------- 1 | /// A proof/receipt that a value exists in the database so it can be used in a triple 2 | #[derive(Debug, PartialEq)] 3 | pub struct Encoded { 4 | pub(crate) rowid: i64, 5 | p: std::marker::PhantomData, 6 | } 7 | 8 | impl Encoded { 9 | pub(crate) fn from_rowid(rowid: i64) -> Self { 10 | let p = Default::default(); 11 | Encoded { rowid, p } 12 | } 13 | } 14 | 15 | impl Clone for Encoded { 16 | fn clone(&self) -> Self { 17 | Self::from_rowid(self.rowid) 18 | } 19 | } 20 | 21 | impl Copy for Encoded {} 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 somebody@froghat.ca 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "owoof" 3 | version = "0.2.0-pre.4" 4 | authors = ["sqwishy "] 5 | edition = "2021" 6 | license = "Apache-2.0" 7 | repository = "https://github.com/sqwishy/owoof" 8 | homepage = "https://github.com/sqwishy/owoof" 9 | description = "Uses SQLite to store data and a datalog-like format to query it. " 10 | keywords = ["datalog", "sqlite"] 11 | 12 | [[bin]] 13 | name = "owoof" 14 | required-features = ["cli"] 15 | 16 | [[bin]] 17 | name = "owoof-csv" 18 | required-features = ["cli", "csv"] 19 | 20 | [package.metadata.docs.rs] 21 | features = ["serde", "serde_json"] 22 | 23 | [features] 24 | default = ["explain"] 25 | explain = [] 26 | cli = ["serde", "serde_json", "anyhow", "uuid/serde", "atty"] 27 | # serde = ["serde", "uuid/serde"] 28 | 29 | [dependencies] 30 | thiserror = "1" 31 | rusqlite = { version = "0.26", features = ["uuid", "functions", "hooks"] } 32 | uuid = { version = "0.8", features = ["v4"] } 33 | 34 | # chrono = { version = "0.4", features = ["serde"] } 35 | 36 | anyhow = { version = "1", optional = true } 37 | serde = { version = "1", optional = true, features = ["derive"] } 38 | serde_json = { version = "1", optional = true } 39 | csv = { version = "1", optional = true } 40 | atty = { version = "0.2", optional = true } 41 | 42 | [dev-dependencies] 43 | anyhow = "1" 44 | -------------------------------------------------------------------------------- /src/explain.rs: -------------------------------------------------------------------------------- 1 | //! SQLite's EXPLAIN QUERY PLAN. 2 | //! 3 | //! Requires the `explain` feature. 4 | use std::fmt; 5 | 6 | use crate::{sql::Query, DontWoof}; 7 | 8 | impl<'tx> DontWoof<'tx> { 9 | pub fn explain_plan<'n, V>(&self, query: &Query) -> rusqlite::Result 10 | where 11 | for<'p> &'p [V]: rusqlite::Params, 12 | { 13 | let sql = format!("EXPLAIN QUERY PLAN\n{}", query.as_str()); 14 | 15 | let mut stmt = self.tx.prepare(&sql)?; 16 | 17 | let rows = stmt.query_map(query.params(), PlanExplainLine::from_row)?; 18 | 19 | rows.collect::, _>>() 20 | .map(|lines| PlanExplanation { lines }) 21 | } 22 | } 23 | 24 | #[derive(Debug, Clone)] 25 | pub struct Explanation { 26 | pub lines: Vec, 27 | } 28 | 29 | /// (From SQLite shell.c:explain_data_prepare ...) 30 | /// 31 | /// The indenting rules are: 32 | /// 33 | /// * For each "Next", "Prev", "VNext" or "VPrev" instruction, indent 34 | /// all opcodes that occur between the p2 jump destination and the opcode 35 | /// itself by 2 spaces. 36 | /// 37 | /// * For each "Goto", if the jump destination is earlier in the program 38 | /// and ends on one of: 39 | /// Yield SeekGt SeekLt RowSetRead Rewind 40 | /// or if the P1 parameter is one instead of zero, 41 | /// then indent all opcodes between the earlier instruction 42 | /// and "Goto" by 2 spaces. 43 | impl fmt::Display for Explanation { 44 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 45 | let mut indent = vec![]; 46 | indent.resize_with(self.lines.len(), || 0u8); 47 | 48 | let mut yields = vec![]; 49 | yields.resize_with(self.lines.len(), || false); 50 | 51 | let yield_codes = ["Yield", "SeekGt", "SeekLt", "RowSetRead", "Rewind"]; 52 | let next_codes = ["Next", "Prev", "VNext", "VPref"]; 53 | 54 | for (e, line) in self.lines.iter().enumerate() { 55 | let p2 = line.p2 as usize; 56 | 57 | match line.opcode.as_str() { 58 | op if yield_codes.contains(&op) => yields.get_mut(e).map(|y| *y = true), 59 | op if next_codes.contains(&op) => indent 60 | .get_mut(p2..e) 61 | .map(|slice| slice.iter_mut().for_each(|i| *i += 1)), 62 | "Goto" if p2 < e && (yields.get(p2) == Some(&true) || line.p1 == 1) => indent 63 | .get_mut(p2..e) 64 | .map(|slice| slice.iter_mut().for_each(|i| *i += 1)), 65 | _ => None, 66 | }; 67 | } 68 | 69 | for (indent, line) in indent.into_iter().zip(self.lines.iter()) { 70 | writeln!(f, "{1:0$} {2}", indent as usize, "", line)?; 71 | } 72 | 73 | Ok(()) 74 | } 75 | } 76 | 77 | #[derive(Debug, Clone)] 78 | pub struct ExplainLine { 79 | pub addr: i64, 80 | pub opcode: String, 81 | pub p1: i64, 82 | pub p2: i64, 83 | pub p3: i64, 84 | pub p4: Option, 85 | pub p5: i64, 86 | pub comment: Option, 87 | } 88 | 89 | impl ExplainLine { 90 | pub fn from_row(c: &rusqlite::Row) -> rusqlite::Result { 91 | Ok(ExplainLine { 92 | addr: c.get(0)?, 93 | opcode: c.get(1)?, 94 | p1: c.get(2)?, 95 | p2: c.get(3)?, 96 | p3: c.get(4)?, 97 | p4: c.get(5)?, 98 | p5: c.get(6)?, 99 | comment: c.get(7)?, 100 | }) 101 | } 102 | } 103 | 104 | impl fmt::Display for ExplainLine { 105 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 106 | write!( 107 | f, 108 | "{addr:>4} {opcode:<16} {p1:>4} {p2:>4} {p3:>4} {p4:<12} {p5:<22} {comment}", 109 | addr = self.addr, 110 | opcode = self.opcode, 111 | p1 = self.p1, 112 | p2 = self.p2, 113 | p3 = self.p3, 114 | p4 = self.p4.as_deref().unwrap_or(""), 115 | p5 = self.p5, 116 | comment = self.comment.as_deref().unwrap_or(""), 117 | ) 118 | } 119 | } 120 | 121 | #[derive(Debug, Clone)] 122 | pub struct PlanExplanation { 123 | pub lines: Vec, 124 | } 125 | 126 | impl fmt::Display for PlanExplanation { 127 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 128 | let mut indents = vec![0u8; self.lines.len()]; 129 | 130 | for (e, line) in self.lines.iter().enumerate() { 131 | let indent = if line.parent > 0 { 132 | let search = self 133 | .lines 134 | .iter() 135 | .zip(indents.iter()) 136 | .take(e) 137 | .find(|(p, _)| p.id == line.parent) 138 | .map(|(_, indent)| indent); 139 | if let Some(parent_indent) = search { 140 | let indent = parent_indent + 1; 141 | indents.get_mut(e).map(|i| *i = indent); 142 | indent 143 | } else { 144 | debug_assert!(false); 145 | 0 146 | } 147 | } else { 148 | 0 149 | }; 150 | 151 | writeln!(f, "{: rusqlite::Result { 167 | Ok(PlanExplainLine { id: row.get(0)?, parent: row.get(1)?, text: row.get(3)? }) 168 | } 169 | } 170 | 171 | impl fmt::Display for PlanExplainLine { 172 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 173 | f.write_str(&self.text) 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | -- pragma foreign_keys=on; 2 | -- begin; 3 | -- FYI: "At this time SQLite supports only FOR EACH ROW triggers" 4 | 5 | 6 | -------------------------------------------------------------------------------- 7 | 8 | create table "soup" 9 | ( rowid integer primary key 10 | , t integer not null 11 | , v blob not null 12 | , rc integer not null default 0 13 | ); 14 | 15 | -- I guess t values are decided at compile time? So we can do this fairly reliably? 16 | -- This seems to improve query time a bit but insert performance is much slower so idk. 17 | create unique index "soup-tv-0" on "soup" (v) where t = 0; 18 | create unique index "soup-tv-1" on "soup" (v) where t = 1; 19 | create unique index "soup-tv-2" on "soup" (v) where t = 2; 20 | 21 | create trigger "soup/no-updates" before update 22 | on "soup" when new.t != old.t or new.v != old.v 23 | begin select raise (abort, 'not yet implemented (also confusing)'); 24 | end; 25 | 26 | create trigger "soup/forget-zero-rc" before update 27 | on "soup" when new.rc = 0 28 | begin delete from "soup" where rowid = new.rowid; 29 | end; 30 | 31 | -------------------------------------------------------------------------------- 32 | 33 | create table "triples" 34 | ( e integer not null references "entities" (rowid) 35 | , a integer not null references "attributes" (rowid) 36 | , v integer not null references "soup" (rowid) 37 | , primary key (e, a, v) 38 | ) without rowid; -- <_< 39 | 40 | -- Created programatically by the owoof library. 41 | --- create index "triples-ave-N" on "triples" (v, e) where a = N; 42 | 43 | -- TODO this kills the following query... 44 | --- owoof '?calvin :book/title "The Complete Calvin and Hobbes"' \ 45 | --- '?rating :rating/book ?calvin' \ 46 | --- '?rating :rating/score 1' \ 47 | --- '?rating :rating/user ?u' \ 48 | --- '?more-great-takes :rating/user ?u' \ 49 | --- '?more-great-takes :rating/book ?b' \ 50 | --- '?more-great-takes :rating/score 5' \ 51 | --- --show '?b :book/title :book/avg-rating' \ 52 | --- --asc '?b :book/avg-rating' --db /tmp/owoof-three.sqlite 53 | -- The following index is very slow to populate :( 54 | --- create index "triples-v" on "triples" (v); 55 | 56 | -- When creating a triple, increment the value's soup.rc. 57 | create trigger "triples/soup-inc-rc" after insert 58 | on "triples" 59 | begin update "soup" set rc = rc + 1 where rowid = new.v; 60 | end; 61 | 62 | -- When removing a triple, decrement the value's soup.rc. 63 | create trigger "triples/soup-dec-rc" after delete 64 | on "triples" 65 | begin update "soup" set rc = rc - 1 where rowid = old.v; 66 | end; 67 | 68 | -------------------------------------------------------------------------------- 69 | -- "entities" is a materialized view maintained by triggers. 70 | 71 | create table "entities" 72 | ( rowid integer primary key references "soup" (rowid) ); 73 | 74 | create trigger "soup/replicate-entities" after insert 75 | on "soup" when new.t = 1 76 | begin insert into "entities" (rowid) values (new.rowid); 77 | end; 78 | 79 | create trigger "soup/unreplicate-entities" after delete 80 | on "soup" when old.t = 1 81 | begin delete from "entities" where rowid = old.rowid; 82 | end; 83 | 84 | -- When an entity id is encoded, a row in soup with t = 1 is inserted. This row must 85 | -- have a corresponding row in "triples" (soup.rowid :db/id soup.rowid) for its entire 86 | -- lifetime and a row in "entities" so that the triple is valid. 87 | create trigger "soup/assert-dbid-triples" after insert 88 | on "soup" when new.t = 1 -- src/driver.rs ENTITY_ID_TAG 89 | -- a = 1, the soup rowid for :db/id 90 | begin insert into "triples" (e, a, v) values (new.rowid, 1, new.rowid); 91 | end; 92 | 93 | -- If the triple is deleted, the "entities" and "soup" rows must also be deleted, 94 | -- meaning they can't be referenced anywhere else. So you can't delete (a :db/id a) 95 | -- while (b :buddy/friend a) exists or while (a :pet/name "Spot") exists. 96 | create trigger "triples/retract-dbid-soup" after delete 97 | on "triples" when old.a = 1 -- the soup rowid for :db/id 98 | and old.e = old.v 99 | begin delete from "soup" where rowid = old.e; 100 | end; 101 | 102 | 103 | -------------------------------------------------------------------------------- 104 | -- "attributes" is a materialized view maintained by triggers. 105 | 106 | create table "attributes" 107 | -- rowid points to the attribute's uuid value in "soup" ... not the identifier 108 | ( rowid integer primary key references "entities" (rowid) 109 | , ident integer not null references "soup" (rowid) ); 110 | 111 | create unique index "attribute-ident-unique" on "attributes" (ident); 112 | 113 | create trigger "triples/no-updates" before update 114 | on "triples" 115 | begin select raise (abort, 'not yet implemented (also confusing)'); 116 | end; 117 | 118 | -- tiggers to maintain the attributes materialized view 119 | 120 | create trigger "triples/replicate-attributes" after insert 121 | on "triples" when new.a = 3 -- :db/attributes's :db/id 122 | begin insert into "attributes" (rowid, ident) values (new.e, new.v); 123 | end; 124 | 125 | create trigger "triples/unreplicate-attributes" after delete 126 | on "triples" when old.a = 3 -- :db/attributes's :db/id 127 | begin delete from "attributes" where rowid = old.e; 128 | end; 129 | 130 | 131 | -- Initial data; the entity and attribute identifier for :db/id and :db/attribute. 132 | 133 | -- TODO is randomblob(16) a valid v4 uuid? 134 | insert into "soup" (rowid, t, v) 135 | values (1, 1, randomblob(16)) -- :db/id's :db/id 136 | , (2, 2, "db/id") 137 | , (3, 1, randomblob(16)) -- :db/attribute's :db/id 138 | , (4, 2, "db/attribute"); 139 | 140 | -- Also, two other triples should already exist (v :db/id v) 141 | -- for both :db/id and :db/attribute entities. 142 | insert into "triples" (e, a, v) 143 | values (1, 3, 2) -- :db/id :db/attribute :db/id 144 | , (3, 3, 4) -- :db/attribute :db/attribute :db/attribute 145 | ; 146 | 147 | 148 | -- commit; 149 | -------------------------------------------------------------------------------- /src/disperse.rs: -------------------------------------------------------------------------------- 1 | //! To do with reading values off of [`rusqlite::Row`] using [`FromSqlRow`]. 2 | //! 3 | //! You can call [`Query::disperse`] to execute it in SQLite. That function takes a [`FromSqlRow`] 4 | //! that is used to get nice value from a [`rusqlite::Row`]. 5 | //! 6 | //! This module is supposed to be stuff implementing [`FromSqlRow`] but right now it's just 7 | //! [`zip_with_keys`], which lets you get a kind of key-value mapping from a row. 8 | //! 9 | //! Otherwise, you can get a single value using [`just`] (which this module exports) 10 | //! and sequences using slices or tuples & arrays up to `[_;9]` or do a bit of branching using [`Either`]. 11 | //! ``` 12 | //! # use owoof::{AttributeRef, Attribute, sql::QueryWriter, either::{left, right}, Value}; 13 | //! # use owoof::disperse::{just, zip_with_keys, Query}; 14 | //! # use rusqlite::Connection; 15 | //! # 16 | //! # let mut db = owoof::new_in_memory().unwrap(); 17 | //! # let woof = owoof::DontWoof::new(&mut db).unwrap(); 18 | //! # 19 | //! // FromSqlRow is implemented on tuples & arrays. 20 | //! let mut fromsql = ( 21 | //! just::(), 22 | //! [just::(), just::()], 23 | //! zip_with_keys([Attribute::from_static(":db/id")]), 24 | //! ); 25 | //! 26 | //! let results = Query::default() 27 | //! .push_sql( 28 | //! r#" 29 | //! SELECT 0, 123 30 | //! , 0, "is your refrigerator running?" 31 | //! , 0, "better go catch itjasdkfjlsdfjalskdfjdklsf" 32 | //! , 1, x'b3ddeb4ca61f44338acd7e10117f142e' 33 | //! "#, 34 | //! ) 35 | //! .disperse(fromsql, &woof) 36 | //! .unwrap(); 37 | //! 38 | //! assert_eq!(results.len(), 1); // one result returned 39 | //! 40 | //! match results.into_iter().next().unwrap() { 41 | //! (Value::Integer(123), texts, obj) => { 42 | //! assert_eq!( 43 | //! texts, 44 | //! vec![ 45 | //! Value::Text(String::from( 46 | //! "is your refrigerator running?" 47 | //! )), 48 | //! Value::Text(String::from( 49 | //! "better go catch itjasdkfjlsdfjalskdfjdklsf" 50 | //! )), 51 | //! ] 52 | //! ); 53 | //! assert_eq!( 54 | //! obj.into_iter().collect::>(), 55 | //! vec![( 56 | //! ":db/id".parse::().unwrap(), 57 | //! "#b3ddeb4c-a61f-4433-8acd-7e10117f142e" 58 | //! .parse() 59 | //! .map(Value::Entity) 60 | //! .unwrap(), 61 | //! ),] 62 | //! ); 63 | //! } 64 | //! result => assert!(false, "{:#?}", result), 65 | //! } 66 | //! ``` 67 | //! 68 | //! See [`crate::driver`] for a bit more information about [`FromSqlRow`]. 69 | use rusqlite::{Row, ToSql}; 70 | 71 | use crate::either::Either; 72 | use crate::types::Value; 73 | use crate::DontWoof; 74 | 75 | pub use crate::driver::{just, ColumnIndex, FromSqlRow, Result}; 76 | pub use crate::sql::Query; 77 | 78 | impl Query<&dyn ToSql> { 79 | /// [`FromSqlRow`] 80 | pub fn disperse<'tx, D: FromSqlRow>( 81 | &self, 82 | mut wat: D, 83 | db: &DontWoof<'tx>, 84 | ) -> rusqlite::Result::Out>> { 85 | let mut stmt = db.prepare(self.as_str())?; 86 | let query = stmt.query_map(self.params(), |row| wat.from_start_of_row(&row))?; 87 | query.collect::>>() 88 | } 89 | 90 | pub fn count<'tx>(&self, db: &DontWoof<'tx>) -> rusqlite::Result { 91 | let mut stmt = db.prepare(self.as_str())?; 92 | let query = stmt.query_map(self.params(), |_| Ok(()))?; 93 | Ok(query.count()) 94 | } 95 | } 96 | 97 | /// Given a sequence of keys (like attributes) returns an implementation of [`FromSqlRow`] that 98 | /// reads one [`Value`] per key and outputs an [`ObjectMap`], a type that can 99 | /// `serde::Serialize` to a map of keys zipped with values. 100 | /// 101 | /// For example, with a suitable query, you might pass two attributes `":db/id"` and 102 | /// `":db/attribute"` you get a map like: 103 | /// 104 | /// ```skip 105 | /// { 106 | /// ":db/id": "#b181a977-a8a1-2998-16df-a314c607ecde", 107 | /// ":db/attribute": ":db/attribute" 108 | /// } 109 | /// ``` 110 | pub fn zip_with_keys( 111 | keys: K, 112 | ) -> impl FromSqlRow::IntoIter, Vec>> 113 | where 114 | K: IntoIterator, 115 | ::IntoIter: Clone + ExactSizeIterator, 116 | { 117 | let keys = keys.into_iter(); 118 | row_fn(move |row, idx| { 119 | let keys = keys.clone(); 120 | std::iter::repeat(just::()) 121 | .map(|mut v| v.from_sql_row(row, idx)) 122 | .take(keys.len()) 123 | .collect::, _>>() 124 | .map(|values| ObjectMap::new(keys, values)) 125 | }) 126 | } 127 | 128 | /// Create an implementation of [`FromSqlRow`] from a function. 129 | pub fn row_fn(f: F) -> RowFn 130 | where 131 | F: FnMut(&Row, &mut ColumnIndex) -> Result, 132 | { 133 | RowFn(f) 134 | } 135 | 136 | #[derive(Debug, Copy, Clone)] 137 | pub struct RowFn(F); 138 | 139 | impl FromSqlRow for RowFn 140 | where 141 | F: FnMut(&Row, &mut ColumnIndex) -> Result, 142 | { 143 | type Out = O; 144 | 145 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 146 | (self.0)(row, idx) 147 | } 148 | } 149 | 150 | /// Serializes a map by zipping `K` and `V`. 151 | /// Initialize this with `zip_with_keys`. 152 | #[derive(Debug)] 153 | pub struct ObjectMap(K, V); 154 | 155 | impl ObjectMap { 156 | pub fn new(k: K, v: V) -> ObjectMap { 157 | ObjectMap(k, v) 158 | } 159 | } 160 | 161 | impl ObjectMap 162 | where 163 | K: Iterator, 164 | V: IntoIterator, 165 | { 166 | pub fn into_iter( 167 | self, 168 | ) -> impl Iterator::Item, ::Item)> { 169 | let ObjectMap(key, value) = self; 170 | key.zip(value.into_iter()) 171 | } 172 | } 173 | 174 | #[cfg(feature = "serde")] 175 | mod _serde { 176 | use super::ObjectMap; 177 | use serde::ser::{Serialize, SerializeMap, Serializer}; 178 | impl Serialize for ObjectMap 179 | where 180 | K: Clone + ExactSizeIterator, 181 | ::Item: Serialize, 182 | V: Clone + IntoIterator, 183 | ::Item: Serialize, 184 | { 185 | fn serialize(&self, serializer: S) -> Result 186 | where 187 | S: Serializer, 188 | { 189 | let ObjectMap(key, value) = self; 190 | let mut map = serializer.serialize_map(Some(key.len()))?; 191 | for (k, v) in key.clone().zip(value.clone().into_iter()) { 192 | map.serialize_entry(&k, &v)?; 193 | } 194 | map.end() 195 | } 196 | } 197 | } 198 | 199 | impl FromSqlRow for Either 200 | where 201 | L: FromSqlRow, 202 | R: FromSqlRow, 203 | { 204 | type Out = Either<::Out, ::Out>; 205 | 206 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 207 | match self { 208 | Either::Left(l) => l.from_sql_row(row, idx).map(Either::Left), 209 | Either::Right(l) => l.from_sql_row(row, idx).map(Either::Right), 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | owoof 2 | ===== 3 | 4 | [github](https://github.com/sqwishy/owoof) 5 | [crates.io](https://crates.io/crates/owoof) 6 | [docs.rs](https://docs.rs/owoof) 7 | 8 | A glorified query-builder inspired by [Datomic](https://docs.datomic.com/cloud/index.html) 9 | that uses a datalog-like format for querying and modifying information around a SQLite 10 | database. 11 | 12 | This is a pet project and probably shouldn't be used for anything serious. 13 | 14 | This is implemented as a rust library. It is documented, you can read the source or 15 | maybe find the [documentation published on docs.rs](https://docs.rs/owoof/*/owoof/). 16 | 17 | There are two rust executable targets. One provides a command-line-interface (as shown 18 | below) and another can be used for importing data from a csv file. 19 | 20 | ## CLI 21 | 22 | Compile this with `cargo build` using `--features cli --bin cli`. 23 | 24 | The CLI can be used to initialize new database files, assert/create, retract/remove, or 25 | query information. 26 | 27 | Here are some examples: 28 | 29 | ```shell 30 | $ echo '[{":db/attribute": ":pet/name"}, 31 | {":pet/name": "Garfield"}, 32 | {":pet/name": "Odie"}, 33 | {":pet/name": "Spot"}, 34 | {":db/attribute": ":person/name"}, 35 | {":db/attribute": ":person/starship"}, 36 | {":person/name": "Jon Arbuckle"}, 37 | {":person/name": "Lieutenant Commander Data", 38 | ":person/starship": "USS Enterprise (NCC-1701-D)"}]' \ 39 | | owoof assert 40 | [ 41 | "#45e9d8e9-51ea-47e6-8172-fc8179f8fbb7", 42 | "#4aa95e29-8d45-470b-98a7-ee39aae1b9c9", 43 | "#2450b9e6-71a4-4311-b93e-3920eebb2c06", 44 | "#c544251c-a279-4809-b9b6-7d3cd68d2f2c", 45 | "#19a4cba1-6fc7-4904-ad36-e8502445412f", 46 | "#f1bf032d-b036-4633-b6f1-78664e44603c", 47 | "#e7ecd66e-222f-44bc-9932-c778aa26d6ea", 48 | "#af32cfdb-b0f1-4bbc-830f-1eb83e4380a3" 49 | ] 50 | 51 | $ echo '[{":db/attribute": ":pet/owner"}, 52 | {":db/id": "#4aa95e29-8d45-470b-98a7-ee39aae1b9c9", 53 | ":pet/owner": "#e7ecd66e-222f-44bc-9932-c778aa26d6ea"}, 54 | {":db/id": "#2450b9e6-71a4-4311-b93e-3920eebb2c06", 55 | ":pet/owner": "#e7ecd66e-222f-44bc-9932-c778aa26d6ea"}, 56 | {":db/id": "#c544251c-a279-4809-b9b6-7d3cd68d2f2c", 57 | ":pet/owner": "#af32cfdb-b0f1-4bbc-830f-1eb83e4380a3"}]' \ 58 | | owoof assert 59 | [ 60 | "#ffc46ae2-1bde-4c08-bfea-09db8241aa2b", 61 | "#4aa95e29-8d45-470b-98a7-ee39aae1b9c9", 62 | "#2450b9e6-71a4-4311-b93e-3920eebb2c06", 63 | "#c544251c-a279-4809-b9b6-7d3cd68d2f2c" 64 | ] 65 | 66 | $ owoof '?pet :pet/owner ?owner' \ 67 | --show '?pet :pet/name' \ 68 | --show '?owner :person/name' 69 | [ 70 | [ 71 | { ":pet/name": "Garfield" }, 72 | { ":person/name": "Jon Arbuckle" } 73 | ], 74 | [ 75 | { ":pet/name": "Odie" }, 76 | { ":person/name": "Jon Arbuckle" } 77 | ], 78 | [ 79 | { ":pet/name": "Spot" }, 80 | { ":person/name": "Lieutenant Commander Data" } 81 | ] 82 | ] 83 | 84 | $ owoof '?person :person/starship "USS Enterprise (NCC-1701-D)"' \ 85 | '?pet :pet/owner ?person' \ 86 | '?pet :pet/name ?n' 87 | [ 88 | "Spot" 89 | ] 90 | 91 | # Or, suppose you know someone's name and their pet's name but don't know the attribute 92 | # that relates them... (But also this doesn't use indexes well so don't do it.) 93 | 94 | $ owoof '?person :person/name "Lieutenant Commander Data"' \ 95 | '?pet ?owner ?person' \ 96 | '?pet :pet/name "Spot"' \ 97 | --show '?owner :db/attribute' 98 | [ 99 | { ":db/attribute": ":pet/owner" } 100 | ] 101 | ``` 102 | 103 | Imported from the [goodbooks-10k](https://github.com/zygmuntz/goodbooks-10k) dataset. 104 | 105 | ```shell 106 | $ owoof '?r :rating/score 1' \ 107 | '?r :rating/book ?b' \ 108 | '?b :book/authors "Dan Brown"' \ 109 | --show '?r :rating/user' \ 110 | --show '?b :book/title' \ 111 | --limit 5 112 | [ 113 | [ 114 | { ":rating/user": 9 }, 115 | { ":book/title": "Angels & Demons (Robert Langdon, #1)" } 116 | ], 117 | [ 118 | { ":rating/user": 58 }, 119 | { ":book/title": "The Da Vinci Code (Robert Langdon, #2)" } 120 | ], 121 | [ 122 | { ":rating/user": 65 }, 123 | { ":book/title": "The Da Vinci Code (Robert Langdon, #2)" } 124 | ], 125 | [ 126 | { ":rating/user": 80 }, 127 | { ":book/title": "The Da Vinci Code (Robert Langdon, #2)" } 128 | ], 129 | [ 130 | { ":rating/user": 89 }, 131 | { ":book/title": "The Da Vinci Code (Robert Langdon, #2)" } 132 | ] 133 | ] 134 | ``` 135 | 136 | ## Importing goodbooks-10k 137 | 138 | 1. Initialize an empty database. 139 | ```shell 140 | $ owoof init 141 | ``` 142 | 143 | 2. Import books & `--output` a copy of the data with the `:db/id` column for each 144 | imported row. 145 | ```shell 146 | $ owoof-csv --output -- \ 147 | :book/title \ 148 | :book/authors \ 149 | :book/isbn \ 150 | :book/avg-rating\ average_rating \ 151 | < goodbooks-10k/books.csv \ 152 | > /tmp/imported-books 153 | ``` 154 | 155 | 3. Import ratings, we're using `mlr` to join the ratings with the imported books. 156 | ```shell 157 | $ mlr --csv join \ 158 | -f /tmp/imported-books \ 159 | -j book_id \ 160 | < goodbooks-10k/ratings.csv \ 161 | | owoof-csv -- \ 162 | ':rating/book :db/id' \ 163 | ':rating/score rating' \ 164 | ':rating/user user_id' 165 | ``` 166 | 167 | 4. That takes some time (probably minutes) but then you can do something like. 168 | ```shell 169 | $ owoof '?calvin :book/title "The Complete Calvin and Hobbes"' \ 170 | '?rating :rating/book ?calvin' \ 171 | '?rating :rating/score 1' \ 172 | '?rating :rating/user ?u' \ 173 | '?more-great-takes :rating/user ?u' \ 174 | '?more-great-takes :rating/book ?b' \ 175 | '?more-great-takes :rating/score 5' \ 176 | --show '?b :book/title :book/avg-rating' \ 177 | --asc '?b :book/avg-rating' 178 | ``` 179 | And it should spit out some answers. 180 | 181 | ## TODO/Caveats 182 | 183 | - Testing is not extensive at this point. 184 | 185 | The schema _should_ be enforced, so no deleting attributes that are in use, but I 186 | haven't done the work to verify this so there might be some surprises. 187 | 188 | - Performance is not super reliable. 189 | 190 | Version 0.2 adds partial indexes over specific attributes and has helped a lot with 191 | search performance. However, there is no index on values. Some queries are 192 | impacted by this more than others, so performance is not reliable. 193 | 194 | The difficulty currently with a values index is that SQLite's query planner will 195 | prefer it in cases where it shouldn't. It isn't a good index and should be a 196 | last-resort -- it's also huge. 197 | 198 | - This is not feature-rich yet, constraints ensure equality and no support for 199 | constraints over ranges or involving logical operations exist yet and honestly I 200 | haven't tested how well it will perform with the schema changes made in 0.2. 201 | 202 | ## Internal TODOs 203 | 204 | - Create DontWoof off the Connection. 205 | 206 | - The Select borrowing Network is a bit weird. I tried to split it off but it was still 207 | weird. Not sure what to do about that. One consideration is that pushing a Select on 208 | to a Query only borrows from the network. Maybe this could be relaxed? 209 | 210 | - Test reference counting? Add a clean-up that removes soups with zero rc and runs 211 | pragma optimize. 212 | 213 | - Maybe add some sort of update thing to shorthand retract & assert? 214 | 215 | - The `:db/id` attribute is kind of silly since the entity and value are the same for 216 | triplets of that attribute. 217 | 218 | It's useful for object forms / mappings; like `{":db/id": ...}`. But maybe there is a 219 | more clever way to group by something? (Like some sort of primary key associated with 220 | every form that the database stores ... 🤔) 221 | 222 | ## See Also 223 | 224 | My blog post associated with version 0.1 this software: https://froghat.ca/blag/dont-woof 225 | 226 | #### License 227 | 228 | This is licensed under [Apache License, Version 2.0](LICENSE). 229 | -------------------------------------------------------------------------------- /src/driver.rs: -------------------------------------------------------------------------------- 1 | //! [`rusqlite::types::ToSql`] and [`rusqlite::types::FromSql`] implementations on 2 | //! [`crate::types`]. And the [`TypeTag`] & [`FromTypeTagAndSqlValue`] traits for reading for 3 | //! loading application types from a type-tag and SQLite value pair. 4 | //! 5 | //! ## The TypeTag trait 6 | //! 7 | //! Values are meant to be stored along with enough information to describe what corresponding rust 8 | //! type they were before serialized into the SQLite database. 9 | //! 10 | //! For instance, we might store a date-time as an integer of the number of milliseconds from an 11 | //! epoch. But, when we get it back, we don't want an integer, we want our date-time. 12 | //! 13 | //! **The type tag is in-band information that allows us to discriminate between integers and 14 | //! date-times, or other scalar types with the same SQLite representation.** This way, users don't 15 | //! need to know or expect anything about the type of what they're querying and they should get the 16 | //! same values out as what was put in. 17 | //! 18 | //! **Another consideration for this feature is just to implement orderability properly.** If I query 19 | //! date-times since `A` I don't also want to search for integers greater than the integer 20 | //! representation of the date-time `A`. 21 | use rusqlite::types::{ 22 | FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef as SqlValueRef, 23 | }; 24 | 25 | pub use rusqlite::{Result, Row}; 26 | 27 | use crate::types::{Attribute, AttributeRef, Entity, Value, ValueRef}; 28 | 29 | pub(crate) const PLAIN_TAG: i64 = 0; 30 | pub(crate) const ENTITY_ID_TAG: i64 = 1; 31 | pub(crate) const ATTRIBUTE_IDENTIFIER_TAG: i64 = 2; 32 | // pub const USER_TAG: i64 = 256; 33 | 34 | impl ToSql for Value { 35 | fn to_sql(&self) -> Result { 36 | match self { 37 | Value::Entity(e) => e.to_sql(), 38 | Value::Attribute(a) => a.to_sql(), 39 | Value::Text(s) => s.to_sql(), 40 | Value::Integer(i) => i.to_sql(), 41 | Value::Float(f) => f.to_sql(), 42 | Value::Boolean(b) => b.to_sql(), 43 | Value::Uuid(u) => u.to_sql(), 44 | Value::Blob(b) => b.to_sql(), 45 | } 46 | } 47 | } 48 | 49 | impl ToSql for ValueRef<'_> { 50 | fn to_sql(&self) -> Result { 51 | match self { 52 | ValueRef::Entity(e) => e.to_sql(), 53 | ValueRef::Attribute(a) => a.to_sql(), 54 | ValueRef::Text(s) => s.to_sql(), 55 | ValueRef::Integer(i) => i.to_sql(), 56 | ValueRef::Float(f) => f.to_sql(), 57 | ValueRef::Boolean(b) => b.to_sql(), 58 | ValueRef::Uuid(u) => u.to_sql(), 59 | ValueRef::Blob(b) => b.to_sql(), 60 | } 61 | } 62 | } 63 | 64 | impl ToSql for Entity { 65 | fn to_sql(&self) -> Result { 66 | (**self).to_sql() 67 | } 68 | } 69 | 70 | impl<'a> FromSql for Entity { 71 | fn column_result(value: SqlValueRef) -> FromSqlResult { 72 | uuid::Uuid::column_result(value).map(Entity::from) 73 | } 74 | } 75 | 76 | impl ToSql for Attribute { 77 | fn to_sql(&self) -> Result { 78 | self.just_the_identifier().to_sql() 79 | } 80 | } 81 | 82 | impl<'a> ToSql for &'a AttributeRef { 83 | fn to_sql(&self) -> Result { 84 | self.just_the_identifier().to_sql() 85 | } 86 | } 87 | 88 | impl<'a> FromSql for Attribute { 89 | fn column_result(value: SqlValueRef) -> FromSqlResult { 90 | value.as_str().map(Attribute::from_ident) 91 | } 92 | } 93 | 94 | /// See the module level documentation in [`crate::driver`] about this. 95 | pub trait TypeTag { 96 | /// Used to help map borrowing and owning instances to a single type. To avoid discriminating 97 | /// between `&Value`, `Value`, and `ValueRef` in cases where we want to treat them the same. 98 | /// 99 | /// I don't really like this solution but I don't know of a more normal trait to use. 100 | type Factory; 101 | 102 | fn type_tag(&self) -> i64; 103 | } 104 | 105 | // Wow! Excellent meme! 106 | impl TypeTag for &'_ T { 107 | type Factory = ::Factory; 108 | 109 | fn type_tag(&self) -> i64 { 110 | (*self).type_tag() 111 | } 112 | } 113 | 114 | impl TypeTag for Entity { 115 | type Factory = Self; 116 | 117 | fn type_tag(&self) -> i64 { 118 | ENTITY_ID_TAG 119 | } 120 | } 121 | 122 | impl TypeTag for Attribute { 123 | type Factory = Self; 124 | 125 | fn type_tag(&self) -> i64 { 126 | ATTRIBUTE_IDENTIFIER_TAG 127 | } 128 | } 129 | 130 | impl TypeTag for &'_ AttributeRef { 131 | type Factory = Attribute; 132 | 133 | fn type_tag(&self) -> i64 { 134 | ATTRIBUTE_IDENTIFIER_TAG 135 | } 136 | } 137 | 138 | impl TypeTag for Value { 139 | type Factory = Self; 140 | 141 | fn type_tag(&self) -> i64 { 142 | match self { 143 | Value::Entity(e) => e.type_tag(), 144 | Value::Attribute(a) => a.type_tag(), 145 | _ => PLAIN_TAG, 146 | } 147 | } 148 | } 149 | 150 | impl TypeTag for ValueRef<'_> { 151 | type Factory = Value; 152 | 153 | fn type_tag(&self) -> i64 { 154 | match self { 155 | ValueRef::Entity(e) => e.type_tag(), 156 | ValueRef::Attribute(a) => a.type_tag(), 157 | _ => PLAIN_TAG, 158 | } 159 | } 160 | } 161 | 162 | /// Make `Self` from a type tag (`i64`) and a [`rusqlite::types::ValueRef`]. 163 | pub trait FromTypeTagAndSqlValue: Sized { 164 | fn from_type_tag_and_sql_value(type_tag: i64, value: SqlValueRef<'_>) -> FromSqlResult; 165 | } 166 | 167 | impl FromTypeTagAndSqlValue for Value { 168 | fn from_type_tag_and_sql_value(type_tag: i64, value: SqlValueRef<'_>) -> FromSqlResult { 169 | match type_tag { 170 | ENTITY_ID_TAG => Entity::column_result(value).map(Value::Entity), 171 | ATTRIBUTE_IDENTIFIER_TAG => Attribute::column_result(value).map(Value::Attribute), 172 | PLAIN_TAG => match value { 173 | SqlValueRef::Null => todo!(), 174 | SqlValueRef::Integer(i) => Ok(Value::Integer(i)), 175 | SqlValueRef::Real(f) => Ok(Value::Float(f)), 176 | SqlValueRef::Text(t) => String::from_utf8(t.to_vec()) 177 | .map_err(|e| FromSqlError::Other(Box::new(e))) 178 | .map(Value::Text), 179 | SqlValueRef::Blob(b) => Ok(Value::Blob(b.to_vec())), 180 | }, 181 | /* TODO probably could use a more informative custom type here ... */ 182 | _ => Err(FromSqlError::InvalidType), 183 | } 184 | } 185 | } 186 | 187 | impl FromTypeTagAndSqlValue for Entity { 188 | fn from_type_tag_and_sql_value(type_tag: i64, value: SqlValueRef<'_>) -> FromSqlResult { 189 | match type_tag { 190 | ENTITY_ID_TAG => Entity::column_result(value), 191 | _ => Err(FromSqlError::InvalidType), 192 | } 193 | } 194 | } 195 | 196 | impl FromTypeTagAndSqlValue for Attribute { 197 | fn from_type_tag_and_sql_value(type_tag: i64, value: SqlValueRef<'_>) -> FromSqlResult { 198 | match type_tag { 199 | ATTRIBUTE_IDENTIFIER_TAG => Attribute::column_result(value), 200 | _ => Err(FromSqlError::InvalidType), 201 | } 202 | } 203 | } 204 | 205 | #[derive(Debug, Default)] 206 | pub struct ColumnIndex(usize); 207 | 208 | impl ColumnIndex { 209 | /// Return the current value and advance the index. 210 | /// 211 | /// Returns [`rusqlite::Error::InvalidColumnIndex`] if it can't advance the index because it's 212 | /// [`usize::MAX`] or whatever but that will never happen so I don't know why I even exist. 213 | pub fn bump(&mut self) -> Result { 214 | let idx = self.0; 215 | match self.0.checked_add(1) { 216 | Some(next) => self.0 = next, 217 | None => return Err(rusqlite::Error::InvalidColumnIndex(self.0)), 218 | }; 219 | Ok(idx) 220 | } 221 | } 222 | 223 | /// A factory to make a [`FromSqlRow::Out`] from a [`rusqlite::Row`] using 224 | /// [`FromSqlRow::/rom_start_of_row`]. 225 | /// For example `&[T]` (where `T` implements [`FromSqlRow`]) 226 | /// also implements [`FromSqlRow`] where 227 | /// `FromSqlRow::Out = Vec<::Out>` 228 | pub trait FromSqlRow { 229 | type Out; 230 | 231 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result; 232 | 233 | fn from_start_of_row(&mut self, row: &Row) -> Result { 234 | self.from_sql_row(row, &mut ColumnIndex::default()) 235 | } 236 | } 237 | 238 | /// You might want [`Query<&dyn ToSql>::count`] instead? 239 | impl FromSqlRow for () { 240 | type Out = (); 241 | 242 | fn from_sql_row(&mut self, _row: &Row, _idx: &mut ColumnIndex) -> Result { 243 | Ok(()) 244 | } 245 | } 246 | 247 | impl FromSqlRow for &mut [T] 248 | where 249 | T: FromSqlRow, 250 | { 251 | type Out = Vec<::Out>; 252 | 253 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 254 | self.iter_mut() 255 | .map(|item| item.from_sql_row(row, idx)) 256 | .collect() 257 | } 258 | } 259 | 260 | macro_rules! _from_sql_row_fixed { 261 | ( $($n:expr)* ) => { 262 | $( 263 | impl FromSqlRow for [T; $n] { 264 | type Out = Vec<::Out>; 265 | 266 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 267 | self.as_mut_slice().from_sql_row(row, idx) 268 | } 269 | } 270 | )* 271 | }; 272 | } 273 | 274 | _from_sql_row_fixed!(0 1 2 3 4 5 6 7 8 9); 275 | 276 | macro_rules! _from_sql_row_tuple { 277 | ( ) => {}; 278 | ( $t:ident $( $rest:ident )* ) => { 279 | impl<$t: FromSqlRow, $($rest: FromSqlRow),*> FromSqlRow for ($t, $($rest),*) 280 | { 281 | type Out = (<$t as FromSqlRow>::Out, $(<$rest as FromSqlRow>::Out),*); 282 | 283 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 284 | #[allow(non_snake_case)] 285 | let ($t, $($rest),*) = self; 286 | Ok(($t.from_sql_row(row, idx)?, $( $rest.from_sql_row(row, idx)? ),*)) 287 | } 288 | } 289 | 290 | _from_sql_row_tuple!($($rest)*); 291 | }; 292 | } 293 | 294 | _from_sql_row_tuple!(A B C D E F G H I); 295 | 296 | /// Implements [`FromSqlRow`] for just one [`FromTypeTagAndSqlValue`]. 297 | #[derive(Debug)] 298 | pub struct Just(std::marker::PhantomData); 299 | 300 | impl Clone for Just { 301 | fn clone(&self) -> Self { 302 | just() 303 | } 304 | } 305 | 306 | impl Copy for Just {} 307 | 308 | pub fn just() -> Just { 309 | Just(std::marker::PhantomData::) 310 | } 311 | 312 | impl FromSqlRow for Just 313 | where 314 | T: FromTypeTagAndSqlValue + Sized, 315 | { 316 | type Out = T; 317 | 318 | fn from_sql_row(&mut self, row: &Row, idx: &mut ColumnIndex) -> Result { 319 | let type_tag = row.get::<_, i64>(idx.bump()?)?; 320 | let sql_value = row.get_ref(idx.bump()?)?; 321 | T::from_type_tag_and_sql_value(type_tag, sql_value).map_err(From::from) 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /src/network.rs: -------------------------------------------------------------------------------- 1 | //! For querying data. Describe groups of entity-attribute-value triples and constraints to 2 | //! search. 3 | //! 4 | //! Suppose you want: 5 | //! ```ignore 6 | //! ?b :book/title "The Complete Calvin and Hobbes" 7 | //! ?r :review/book ?b 8 | //! ?r :review/user ?u 9 | //! ?r :review/score 1 10 | //! ``` 11 | //! 12 | //! But now imagine it like: 13 | //! ```text 14 | //! +---+ 15 | //! | e |<- 16 | //! :review/user<->| a | \ 17 | //! | v | \ +---+ ->:review/book 18 | //! +---+ ->| e | / 19 | //! / | a |<- +---+ 20 | //! +---+ / | v |<---->| e | 21 | //! | e |<- +---+ | a |<->:book/title 22 | //! :review/score<->| a | | v |<->"The Complete Calvin and Hobbes" 23 | //! 1<->| v | +---+ 24 | //! +---+ 25 | //! ``` 26 | //! That's a [`Network`], each block is a group of database [`Triples`], the lines are constraints 27 | //! defining what data the triples can match. 28 | //! 29 | //! This is how you might use the API to build that [`Network`]. 30 | //! ``` 31 | //! # use owoof::{Network, AttributeRef}; 32 | //! let mut network: Network = Default::default(); 33 | //! // ?b :book/title "The Complete Calvin and Hobbes" 34 | //! let b = network 35 | //! .fluent_triples() 36 | //! .match_attribute(AttributeRef::from_static(":book/title")) 37 | //! .match_value("The Complete Calvin and Hobbes") 38 | //! .entity(); 39 | //! // ?r :review/book ?b 40 | //! let r = network 41 | //! .fluent_triples() 42 | //! .match_attribute(AttributeRef::from_static(":review/book")) 43 | //! .link_value(b) 44 | //! .entity(); 45 | //! // ?r :review/user ?u 46 | //! let u = network 47 | //! .fluent_triples() 48 | //! .link_entity(r) 49 | //! .match_attribute(AttributeRef::from_static(":review/user")) 50 | //! .value(); 51 | //! // ?r :review/score 1 52 | //! network 53 | //! .fluent_triples() 54 | //! .link_entity(r) 55 | //! .match_attribute(AttributeRef::from_static(":review/book")) 56 | //! .match_value(1) 57 | //! .entity(); 58 | //! ``` 59 | use std::ops::Deref; 60 | 61 | use crate::types::AttributeRef; 62 | use crate::{soup::Encoded, Value, ValueRef}; 63 | 64 | /// A borrowing type alias for [`GenericNetwork`] using [`ValueRef`]. 65 | /// 66 | /// TODO this doesn't work for some reason. The compiler keeps telling me it 67 | /// doesn't know what T is when I do Network::default() and shit ... 68 | pub type Network<'a, T = ValueRef<'a>> = GenericNetwork; 69 | 70 | /// A owning types alias for [`GenericNetwork`] using [`Value`]. 71 | pub type OwnedNetwork = GenericNetwork; 72 | 73 | /// A plan or projection of entity-attribute-value sets with constraints between them. 74 | /// 75 | /// - See [`Network`] for type alias that borrows using [`ValueRef`]. 76 | /// - Or [`OwnedNetwork`] for a type alias that owns using [`Value`]. 77 | #[derive(Debug, Clone, PartialEq)] 78 | pub struct GenericNetwork { 79 | triples: usize, 80 | constraints: Vec>, 81 | } 82 | 83 | impl Default for GenericNetwork { 84 | fn default() -> Self { 85 | GenericNetwork { triples: 0usize, constraints: vec![] } 86 | } 87 | } 88 | 89 | impl GenericNetwork { 90 | /// Adds one group of entity-attribute-value triples that can be constrained with values or 91 | /// by fields on other triples groups. 92 | pub fn add_triples(&mut self) -> Triples { 93 | let t = Triples(self.triples); 94 | self.triples += 1; 95 | t 96 | } 97 | 98 | /// Adds a [`Constraint`]. 99 | pub fn constrain(&mut self, c: Constraint) { 100 | self.add_constraint(c) 101 | } 102 | 103 | /// Adds a [`Constraint`]. 104 | pub fn add_constraint(&mut self, c: Constraint) { 105 | self.constraints.push(c) 106 | } 107 | 108 | /// The number of triples added into this network. 109 | pub fn triples(&self) -> usize { 110 | self.triples 111 | } 112 | 113 | pub fn constraints(&self) -> &[Constraint] { 114 | &self.constraints 115 | } 116 | 117 | pub fn constraints_mut(&mut self) -> &mut [Constraint] { 118 | &mut self.constraints 119 | } 120 | 121 | pub fn fluent_triples(&mut self) -> FluentTriples<'_, V> { 122 | let triples = self.add_triples(); 123 | FluentTriples { network: self, triples } 124 | } 125 | } 126 | 127 | impl GenericNetwork 128 | where 129 | V: crate::TypeTag + rusqlite::ToSql, 130 | { 131 | pub fn prefetch_attributes(&mut self, woof: &crate::DontWoof) -> crate::Result<()> { 132 | woof.prefetch_attributes(self) 133 | } 134 | } 135 | 136 | impl GenericNetwork 137 | where 138 | V: PartialEq, 139 | { 140 | /// Shorthand for `iter::once(field).chain(network.links_to(field))`. 141 | pub fn this_and_links_to(&self, on: TriplesField) -> impl Iterator + '_ { 142 | std::iter::once(on).chain(self.links_to(on)) 143 | } 144 | 145 | /// All [`TriplesField`] with equality constraints to the given [`TriplesField`]. 146 | pub fn links_to(&self, on: TriplesField) -> impl Iterator + '_ { 147 | self.constraints.iter().filter_map(move |c| match *c { 148 | Constraint::Eq { lh, rh: Match::Field(rh) } if lh == on => Some(rh), 149 | Constraint::Eq { lh, rh: Match::Field(rh) } if rh == on => Some(lh), 150 | _ => None, 151 | }) 152 | } 153 | 154 | pub fn constraints_on(&self, on: TriplesField) -> impl Iterator> + '_ { 155 | self.constraints.iter().filter(move |c| match c { 156 | Constraint::Eq { lh, rh } => lh == &on || rh == &Match::Field(on), 157 | }) 158 | } 159 | 160 | /// Find an equality constraint between these two fields. 161 | pub fn is_linked(&self, a: TriplesField, b: TriplesField) -> Option<&Constraint> { 162 | self.constraints.iter().find(|c| match **c { 163 | Constraint::Eq { lh, rh: Match::Field(rh) } if lh == a && rh == b => true, 164 | Constraint::Eq { lh, rh: Match::Field(rh) } if lh == b && rh == a => true, 165 | _ => false, 166 | }) 167 | } 168 | 169 | /// Find an equality constraint between a field and a value. 170 | pub fn is_matched>(&self, a: TriplesField, v: I) -> Option<&Constraint> { 171 | self._is_matched(a, v.into()) 172 | } 173 | 174 | fn _is_matched(&self, a: TriplesField, v: V) -> Option<&Constraint> { 175 | self.constraints.iter().find(|c| match **c { 176 | Constraint::Eq { lh, rh: Match::Value(ref rh) } if lh == a && rh == &v => true, 177 | _ => false, 178 | }) 179 | } 180 | 181 | pub fn constraint_value_matches(&self, v: V) -> impl Iterator + '_ { 182 | let v = Match::Value(v); 183 | self.constraints.iter().filter_map(move |c| match c { 184 | Constraint::Eq { lh, rh } if rh == &v => Some(*lh), 185 | _ => None, 186 | }) 187 | } 188 | 189 | /// Find or add triples `t` such that `t.e = field` and `t.a = attribute`. 190 | pub fn value_for_entity_attribute( 191 | &mut self, 192 | entity: TriplesField, 193 | attribute: A, 194 | ) -> TriplesField 195 | where 196 | A: AsRef, 197 | V: From, 198 | { 199 | let value = self 200 | .this_and_links_to(entity) 201 | .find(|link| { 202 | self.is_matched(link.triples().attribute(), V::from(attribute.clone())) 203 | .is_some() 204 | }) 205 | .map(|link| link.triples().value()); 206 | 207 | value.unwrap_or_else(|| { 208 | self.fluent_triples() 209 | .link_entity(entity) 210 | .match_attribute(attribute) 211 | .value() 212 | }) 213 | } 214 | } 215 | 216 | #[derive(Debug, Copy, Clone, PartialEq)] 217 | pub struct Triples(usize); 218 | 219 | impl Triples { 220 | pub fn entity(self) -> TriplesField { 221 | TriplesField(self, Field::Entity) 222 | } 223 | 224 | pub fn attribute(self) -> TriplesField { 225 | TriplesField(self, Field::Attribute) 226 | } 227 | 228 | pub fn value(self) -> TriplesField { 229 | TriplesField(self, Field::Value) 230 | } 231 | 232 | pub fn eav(self) -> (TriplesField, TriplesField, TriplesField) { 233 | (self.entity(), self.attribute(), self.value()) 234 | } 235 | 236 | pub fn usize(self) -> usize { 237 | self.0 238 | } 239 | } 240 | 241 | #[derive(Debug, Copy, Clone, PartialEq)] 242 | pub struct TriplesField(pub(crate) Triples, pub(crate) Field); 243 | 244 | impl TriplesField { 245 | pub fn triples(self) -> Triples { 246 | self.0 247 | } 248 | 249 | pub fn field(self) -> Field { 250 | self.1 251 | } 252 | } 253 | 254 | #[derive(Debug, Copy, Clone, PartialEq)] 255 | pub enum Field { 256 | Entity, 257 | Attribute, 258 | Value, 259 | } 260 | 261 | #[derive(Debug, Copy, Clone, PartialEq, Eq)] 262 | pub enum Ordering { 263 | Asc, 264 | Desc, 265 | } 266 | 267 | impl TriplesField { 268 | pub fn asc(self) -> (Self, Ordering) { 269 | (self, Ordering::Asc) 270 | } 271 | 272 | pub fn desc(self) -> (Self, Ordering) { 273 | (self, Ordering::Desc) 274 | } 275 | } 276 | 277 | #[derive(Debug, Copy, Clone, PartialEq)] 278 | pub enum Constraint { 279 | Eq { lh: TriplesField, rh: Match }, 280 | } 281 | 282 | impl TriplesField { 283 | pub fn eq(self, rh: Match) -> Constraint { 284 | Constraint::Eq { lh: self, rh } 285 | } 286 | } 287 | 288 | #[derive(Debug, Copy, Clone, PartialEq)] 289 | pub enum Match { 290 | Field(TriplesField), 291 | Encoded(Encoded), 292 | Value(V), 293 | } 294 | 295 | impl From for Match { 296 | fn from(f: TriplesField) -> Self { 297 | Match::Field(f) 298 | } 299 | } 300 | 301 | impl From> for Match { 302 | fn from(e: Encoded) -> Self { 303 | Match::Encoded(e) 304 | } 305 | } 306 | 307 | pub struct FluentTriples<'n, V> { 308 | network: &'n mut GenericNetwork, 309 | triples: Triples, 310 | } 311 | 312 | impl<'n, V> FluentTriples<'n, V> { 313 | pub fn match_entity>(&mut self, i: I) -> &mut Self { 314 | self.network.add_constraint(Constraint::Eq { 315 | lh: self.triples.entity(), 316 | rh: Match::Value(i.into()), 317 | }); 318 | self 319 | } 320 | 321 | pub fn match_attribute>(&mut self, i: I) -> &mut Self 322 | where 323 | V: From, 324 | { 325 | self.network.add_constraint(Constraint::Eq { 326 | lh: self.triples.attribute(), 327 | rh: Match::Value(i.into()), 328 | }); 329 | self 330 | } 331 | 332 | pub fn match_value>(&mut self, i: I) -> &mut Self { 333 | self.network.add_constraint(Constraint::Eq { 334 | lh: self.triples.value(), 335 | rh: Match::Value(i.into()), 336 | }); 337 | self 338 | } 339 | 340 | pub fn link_entity>>(&mut self, i: I) -> &mut Self { 341 | self.network 342 | .add_constraint(Constraint::Eq { lh: self.triples.entity(), rh: i.into() }); 343 | self 344 | } 345 | 346 | pub fn link_attribute>>(&mut self, i: I) -> &mut Self { 347 | self.network 348 | .add_constraint(Constraint::Eq { lh: self.triples.attribute(), rh: i.into() }); 349 | self 350 | } 351 | 352 | pub fn link_value>>(&mut self, i: I) -> &mut Self { 353 | self.network 354 | .add_constraint(Constraint::Eq { lh: self.triples.value(), rh: i.into() }); 355 | self 356 | } 357 | } 358 | 359 | impl<'n, V> Deref for FluentTriples<'n, V> { 360 | type Target = Triples; 361 | 362 | fn deref(&self) -> &Self::Target { 363 | &self.triples 364 | } 365 | } 366 | -------------------------------------------------------------------------------- /src/retrieve.rs: -------------------------------------------------------------------------------- 1 | //! A higher-level way of adding constraints to a [`GenericNetwork`]. 2 | //! 3 | //! [`NamedNetwork`] is just a network paired with a list of variables that map to triples-fields 4 | //! in the network. 5 | //! 6 | //! [`NamedNetwork::add_pattern`] allows adding a constraint from a [`Pattern`] which can be 7 | //! parsed from a string. See [`crate::types::parse_value`] on how parsing is attempted. 8 | //! 9 | //! ``` 10 | //! # use owoof::{NamedNetwork, Value, ValueRef, Attribute, Pattern, BorrowedParse}; 11 | //! # use owoof::{Variable, either::{Left, Right}}; 12 | //! # 13 | //! # #[cfg(feature = "serde_json")] 14 | //! # { 15 | //! let mut network = NamedNetwork::::default(); 16 | //! let pattern = r#"?p :pet/name "Garfield""# 17 | //! .borrowed_parse() 18 | //! .expect("parse pattern"); 19 | //! assert_eq!( 20 | //! pattern, 21 | //! Pattern { 22 | //! entity: Left(Variable::Unify("?p")), 23 | //! attribute: Right(Value::Attribute(Attribute::from_static(":pet/name"))), 24 | //! value: Right(Value::Text("Garfield".to_owned())), 25 | //! } 26 | //! ); 27 | //! network.add_pattern(&pattern); 28 | //! # } 29 | //! ``` 30 | //! 31 | //! One nice thing about this is that variables are automatically unified. 32 | //! 33 | //! So if I add another pattern `?p :animal/name "Cat"`, there will be a constraint linking the 34 | //! `?p` variables together. 35 | //! 36 | //! Also, a [`GenericNetwork`] specifies the FROM clause of a SELECT in SQL, everything else goes on 37 | //! to a [`Select`] object that can be returned by [`GenericNetwork::select()`]. 38 | //! 39 | //! A [`Select`] has [`crate::sql::PushToQuery::to_query`] returning a 40 | //! [`crate::sql::Query`]. And you can execute a [`crate::sql::Query`] with 41 | //! [`crate::sql::Query::disperse`]. (There should be an example of this at the root of the 42 | //! documentation in lib.rs.) 43 | //! 44 | //! See the [dispersal](crate::disperse) module about executing a query and reading data back from 45 | //! SQLite. 46 | //! 47 | //! --- 48 | //! 49 | //! > "I need some information." 50 | //! > 51 | //! > "This is Information Retrieval, not Information [Dispersal](crate::disperse)." 52 | 53 | use std::fmt; 54 | use std::ops::{Deref, DerefMut}; 55 | 56 | use thiserror::Error; 57 | 58 | use crate::either::Either; 59 | use crate::network::{GenericNetwork, Ordering, Triples, TriplesField}; 60 | use crate::types::{Attribute, AttributeParseError, Entity, EntityParseError}; 61 | use crate::FromBorrowedStr; 62 | 63 | #[cfg(feature = "serde_json")] 64 | use crate::Value; 65 | 66 | /* TODO call this Shape or Gather to sound less SQL? */ 67 | /// LIMIT, ORDER BY, and SELECT clauses forming an entire SELECT statement. 68 | /// Needed to actually query for stuff using a [`GenericNetwork`]. 69 | #[derive(Debug, Clone, PartialEq)] 70 | pub struct Select<'n, V> { 71 | /* TODO Does this really needs to be here? */ 72 | pub(crate) network: &'n GenericNetwork, 73 | pub(crate) selection: Vec, 74 | pub(crate) order_by: Vec<(TriplesField, Ordering)>, 75 | pub(crate) limit: i64, 76 | } 77 | 78 | impl<'n, V> From<&'n GenericNetwork> for Select<'n, V> { 79 | fn from(network: &'n GenericNetwork) -> Self { 80 | Select { network, selection: vec![], order_by: vec![], limit: 0 } 81 | } 82 | } 83 | 84 | impl<'n, V> GenericNetwork { 85 | pub fn select(&'n self) -> Select<'n, V> { 86 | Select::from(self) 87 | } 88 | } 89 | 90 | impl<'n, V> Deref for Select<'n, V> { 91 | type Target = GenericNetwork; 92 | 93 | fn deref(&self) -> &Self::Target { 94 | self.network 95 | } 96 | } 97 | 98 | impl<'n, V> Select<'n, V> { 99 | pub fn fields(&self) -> &[TriplesField] { 100 | self.selection.as_slice() 101 | } 102 | 103 | pub fn field(&mut self, field: TriplesField) -> &mut Self { 104 | self.selection.push(field); 105 | self 106 | } 107 | 108 | pub fn limit(&mut self, limit: i64) -> &mut Self { 109 | self.limit = limit; 110 | self 111 | } 112 | 113 | pub fn order_by(&mut self, ord: (TriplesField, Ordering)) -> &mut Self { 114 | self.order_by.push(ord); 115 | self 116 | } 117 | } 118 | 119 | /// A [`GenericNetwork`] paired with [`Names`] to associate variables to 120 | /// their first occurrence in a network. 121 | #[derive(Debug)] 122 | pub struct NamedNetwork<'n, V> { 123 | pub network: GenericNetwork, 124 | pub names: Names<'n>, 125 | } 126 | 127 | impl<'n, V> NamedNetwork<'n, V> { 128 | pub fn new(network: GenericNetwork, names: Names<'n>) -> Self { 129 | NamedNetwork { network, names } 130 | } 131 | 132 | pub fn split(self) -> (GenericNetwork, Names<'n>) { 133 | let NamedNetwork { network, names } = self; 134 | (network, names) 135 | } 136 | } 137 | 138 | /* TODO XXX FIXME -- implement this more generally? */ 139 | impl<'n, 'v> NamedNetwork<'n, crate::ValueRef<'v>> { 140 | /// Constrain a [`GenericNetwork`] using the given [`Pattern`], unifying variables where 141 | /// appropriate. 142 | pub fn add_pattern(&mut self, pattern: &'v Pattern<'n, V>) -> Triples 143 | where 144 | crate::ValueRef<'v>: From<&'v V>, 145 | { 146 | use crate::network::Match; 147 | 148 | let t = self.network.add_triples(); 149 | [ 150 | (t.entity(), &pattern.entity), 151 | (t.attribute(), &pattern.attribute), 152 | (t.value(), &pattern.value), 153 | ] 154 | .into_iter() 155 | .filter_map(|(field, part)| match part { 156 | Either::Left(Variable::Any) => None, 157 | Either::Left(Variable::Unify(unify)) => { 158 | if let Some(link_to) = self.names.get(unify) { 159 | Some(field.eq(link_to.into())) 160 | } else { 161 | self.names.append(unify, field); 162 | None 163 | } 164 | } 165 | Either::Right(v) => Some(field.eq(Match::Value(v.into()))), 166 | }) 167 | .for_each(|constraint| self.network.add_constraint(constraint)); 168 | 169 | t 170 | } 171 | } 172 | 173 | impl<'n, V> Deref for NamedNetwork<'n, V> { 174 | type Target = GenericNetwork; 175 | 176 | fn deref(&self) -> &Self::Target { 177 | &self.network 178 | } 179 | } 180 | 181 | impl<'n, V> DerefMut for NamedNetwork<'n, V> { 182 | fn deref_mut(&mut self) -> &mut Self::Target { 183 | &mut self.network 184 | } 185 | } 186 | 187 | impl<'n, V> Default for NamedNetwork<'n, V> { 188 | fn default() -> Self { 189 | NamedNetwork { network: GenericNetwork::default(), names: Names::default() } 190 | } 191 | } 192 | 193 | /// A mapping of names or variables to the [`TriplesField`] of their location in a network. 194 | /// 195 | /// This uses a `Vec` as it'll be faster than a `BTreeMap` or `HashMap` for smol collections as 196 | /// with our use case. 197 | #[derive(Debug, Default)] 198 | pub struct Names<'n> { 199 | vec: Vec<(&'n str, TriplesField)>, 200 | } 201 | 202 | impl<'n> Names<'n> { 203 | pub fn get(&self, looking_for: &str) -> Option { 204 | self.vec 205 | .iter() 206 | .find_map(|&(ref has, found)| (has == &looking_for).then(|| found)) 207 | } 208 | 209 | /// There is no point in appending the same `name` multiple times, 210 | /// only the first will be returned by [`Self::get()`]. 211 | pub fn append(&mut self, name: &'n str, location: TriplesField) { 212 | self.vec.push((name, location)); 213 | } 214 | 215 | pub fn iter(&self) -> impl Iterator + '_ { 216 | self.vec.iter() 217 | } 218 | 219 | pub fn lookup(&self, v: &Variable<'_>) -> Result { 220 | match v { 221 | Variable::Any => Err(NamesLookupError::DoesNotUnify), 222 | Variable::Unify(unify) => self.get(unify).ok_or(NamesLookupError::Unmatched), 223 | } 224 | } 225 | } 226 | 227 | #[derive(Debug, Error, Clone, PartialEq)] 228 | pub enum NamesLookupError { 229 | #[error("does not unify")] 230 | DoesNotUnify, 231 | #[error("variable not previously declared")] 232 | Unmatched, 233 | } 234 | 235 | /// A data structure for specifying high-level constraints with [`NamedNetwork::add_pattern`]. 236 | /// 237 | /// - [`Variable::Any`] adds no constraint. 238 | /// - [`Variable::Unify`] constrains the field to the first occurrence of the variable. 239 | /// - [`Either::Right`] holds a [`crate::Value`] or [`crate::ValueRef`] or something and will match 240 | /// that field to that value using equality. 241 | /// 242 | /// FYI: `entity` and `attribute` *should* use [`crate::Entity`] and [`crate::Attribute`] 243 | /// respectively instead, but it's a bit convenient for all the fields to be homogeneous or 244 | /// whatever so you can add them to a `Vec` or iterate over them or otherwise interact with them 245 | /// all the same. 246 | /// 247 | /// Can be parsed but must borrow the input string because [`Variable`]s always borrow. See 248 | /// [`crate::BorrowedParse`] about that. Also requires the `serde_json` feature to parse a [`Value`]. 249 | #[derive(Debug, PartialEq)] 250 | pub struct Pattern<'a, V> { 251 | pub entity: Either, V>, 252 | pub attribute: Either, V>, 253 | pub value: Either, V>, 254 | } 255 | 256 | /// Requires the `serde_json` feature. 257 | #[cfg(feature = "serde_json")] 258 | impl<'a> FromBorrowedStr<'a> for Pattern<'a, Value> { 259 | type Err = PatternParseError; 260 | 261 | fn from_borrowed_str(s: &'a str) -> Result { 262 | parse_pattern(s) 263 | } 264 | } 265 | 266 | /// Requires the `serde_json` feature. 267 | #[cfg(feature = "serde_json")] 268 | pub fn parse_pattern<'a>(s: &'a str) -> Result, PatternParseError> { 269 | let (s, e) = take_no_whitespace(s); 270 | let (s, a) = take_no_whitespace(s); 271 | let v = s.trim(); 272 | 273 | return Ok(Pattern { 274 | entity: parse_variable_or_entity(e) 275 | .map_err(|(v, e)| PatternParseError::Entity(v, e))? 276 | .map_right(Value::Entity), 277 | attribute: parse_variable_or_attribute(a) 278 | .map_err(|(v, a)| PatternParseError::Attribute(v, a))? 279 | .map_right(Value::Attribute), 280 | value: parse_variable_or_value(v).map_err(|(v, ())| PatternParseError::Value(v))?, 281 | }); 282 | 283 | fn take_no_whitespace(s: &str) -> (&str, &str) { 284 | let s = s.trim_start(); 285 | let next = s.split_whitespace().next().unwrap_or_default(); 286 | (&s[next.len()..], next) 287 | } 288 | } 289 | 290 | pub fn parse_variable_or_entity<'a>( 291 | s: &'a str, 292 | ) -> Result, Entity>, (VariableParseError, EntityParseError)> { 293 | Either::from_borrowed_str(s) 294 | } 295 | 296 | pub fn parse_variable_or_attribute<'a>( 297 | s: &'a str, 298 | ) -> Result, Attribute>, (VariableParseError, AttributeParseError)> { 299 | Either::from_borrowed_str(s) 300 | } 301 | 302 | #[cfg(feature = "serde_json")] 303 | pub fn parse_variable_or_value<'a>( 304 | s: &'a str, 305 | ) -> Result, Value>, (VariableParseError, ())> { 306 | Either::from_borrowed_str(s) 307 | } 308 | 309 | #[derive(Debug, Error)] 310 | pub enum PatternParseError { 311 | #[error("not a variable ({}) and not an entity ({})", .0, .1)] 312 | Entity(VariableParseError, EntityParseError), 313 | #[error("not a variable ({}) and not an attribute ({})", .0, .1)] 314 | Attribute(VariableParseError, AttributeParseError), 315 | #[error("not a variable ({}) and not a value", .0)] 316 | Value(VariableParseError), 317 | } 318 | 319 | #[derive(Debug, PartialEq)] 320 | pub enum Variable<'a> { 321 | Any, 322 | Unify(&'a str), 323 | } 324 | 325 | impl<'a> fmt::Display for Variable<'a> { 326 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 327 | match self { 328 | Variable::Any => write!(f, "?"), 329 | Variable::Unify(v) => write!(f, "{}", v), 330 | } 331 | } 332 | } 333 | 334 | /// Allows `Either` to implement [`FromBorrowedStr`]. 335 | impl<'a> FromBorrowedStr<'a> for Variable<'a> { 336 | type Err = VariableParseError; 337 | 338 | fn from_borrowed_str(s: &'a str) -> Result { 339 | parse_variable(s) 340 | } 341 | } 342 | 343 | pub fn parse_variable<'a>(s: &'a str) -> Result, VariableParseError> { 344 | match s { 345 | "" => return Err(VariableParseError::Leader), 346 | "?" => return Ok(Variable::Any), 347 | _ if !s.starts_with("?") => return Err(VariableParseError::Leader), 348 | _ => (), 349 | }; 350 | 351 | if s.contains(char::is_whitespace) { 352 | return Err(VariableParseError::Whitespace); 353 | } 354 | 355 | if 256 < s.len() { 356 | return Err(VariableParseError::Length); 357 | } 358 | 359 | Ok(Variable::Unify(s)) 360 | } 361 | 362 | #[derive(Debug, Error, Clone, PartialEq)] 363 | pub enum VariableParseError { 364 | #[error("expected leading `?`")] 365 | Leader, 366 | #[error("whitespace not allowed")] 367 | Whitespace, 368 | #[error("name is either too long or too short (0..=255)")] 369 | Length, 370 | } 371 | 372 | #[cfg(test)] 373 | mod tests { 374 | use super::*; 375 | 376 | #[test] 377 | fn test() { 378 | assert_eq!(parse_variable("1234"), Err(VariableParseError::Leader)); 379 | assert_eq!(parse_variable("?foo"), Ok(Variable::Unify("?foo"))); 380 | assert_eq!(parse_variable("?"), Ok(Variable::Any)); 381 | } 382 | 383 | #[cfg(feature = "serde_json")] 384 | #[test] 385 | fn test_parse_pattern() { 386 | assert_eq!( 387 | parse_pattern("? ? ?asdf").unwrap(), 388 | Pattern { 389 | entity: Either::Left(Variable::Any), 390 | attribute: Either::Left(Variable::Any), 391 | value: Either::Left(Variable::Unify("?asdf")), 392 | } 393 | ); 394 | } 395 | } 396 | -------------------------------------------------------------------------------- /src/sql.rs: -------------------------------------------------------------------------------- 1 | //! Stuff to do with SQL query building. 2 | //! 3 | //! The [`Query`] type holds a SQL string and parameter buffer it implements [`QueryWriter`] as 4 | //! something you can push sql strings and parameters on to. 5 | //! 6 | //! Implementations on [`PushToQuery`] allowing types to add their particular SQL string 7 | //! representations to a [`QueryWriter`]. 8 | //! 9 | //! Implementations on [`rusqlite::types::ToSql`] and [`rusqlite::types::FromSql`] are not here but 10 | //! in [`crate::driver`] instead. 11 | #![allow(clippy::write_with_newline)] 12 | 13 | use std::fmt::{self, Debug, Display, Write}; 14 | use std::iter::{once, repeat}; 15 | 16 | use rusqlite::types::ToSql; 17 | 18 | // use crate::projection::{self, Concept, Constraint, ConstraintOp, DatomSet, Field, Projection}; 19 | // use crate::types::HasAffinity; 20 | 21 | use crate::driver::TypeTag; 22 | use crate::network::{Field, Ordering, Triples, TriplesField}; 23 | use crate::retrieve::Select; 24 | use crate::soup::Encoded; 25 | 26 | /// A string buffer for a SQL query with a list of a values that should be passed along 27 | /// as query parameters to [rusqlite] when querying. 28 | #[derive(Debug)] 29 | pub struct Query

{ 30 | string: String, 31 | /// TODO limit this somehow because sqlite max params is like 32766 or sqlite3_limit() 32 | params: Vec

, 33 | // params_exceeded: bool, 34 | } 35 | 36 | impl

Query

{ 37 | pub fn as_str(&self) -> &str { 38 | &self.string 39 | } 40 | 41 | pub fn params(&self) -> &[P] { 42 | self.params.as_slice() 43 | } 44 | } 45 | 46 | impl

Default for Query

{ 47 | fn default() -> Self { 48 | Query { string: String::new(), params: Vec::new() } 49 | } 50 | } 51 | 52 | impl

Display for Query

{ 53 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 54 | Display::fmt(&self.string, f) 55 | } 56 | } 57 | 58 | impl

Write for Query

{ 59 | fn write_str(&mut self, s: &str) -> fmt::Result { 60 | self.string.write_str(s) 61 | } 62 | } 63 | 64 | impl

QueryWriter

for Query

{ 65 | fn push_param(&mut self, p: P) -> &mut Self { 66 | self.params.push(p); 67 | self 68 | } 69 | 70 | fn push_sql(&mut self, s: &str) -> &mut Self { 71 | self.string.push_str(s); 72 | self 73 | } 74 | 75 | fn nl(&mut self) -> &mut Self { 76 | self.string.push('\n'); 77 | self 78 | } 79 | } 80 | 81 | /// what the fucking fuck 82 | impl QueryWriter

for &'_ mut W 83 | where 84 | W: QueryWriter

, 85 | { 86 | fn push_param(&mut self, p: P) -> &mut Self { 87 | (*self).push_param(p); 88 | self 89 | } 90 | 91 | fn push_sql(&mut self, s: &str) -> &mut Self { 92 | (*self).push_sql(s); 93 | self 94 | } 95 | 96 | fn nl(&mut self) -> &mut Self { 97 | (*self).nl(); 98 | self 99 | } 100 | } 101 | 102 | /// Allows query string building over [`Query`] and [`IndentedQueryWriter`]. 103 | /// 104 | /// `P` is the query parameter type. 105 | pub trait QueryWriter

: Write { 106 | fn push_param(&mut self, p: P) -> &mut Self; 107 | 108 | fn push_sql(&mut self, s: &str) -> &mut Self; 109 | 110 | fn nl(&mut self) -> &mut Self; 111 | 112 | fn with_indent(&mut self, indent: I) -> IndentedQueryWriter<&mut Self, I> 113 | where 114 | Self: Sized, 115 | I: Iterator, 116 | { 117 | IndentedQueryWriter { writer: self, indent } 118 | } 119 | 120 | fn push(&mut self, ptq: T) -> &mut Self 121 | where 122 | Self: Sized, 123 | T: PushToQuery

, 124 | { 125 | ptq.push_to_query(self); 126 | self 127 | } 128 | } 129 | 130 | /// Wraps a [`Query`] and also implements the [`QueryWriter`] 131 | /// trait but prepends lines with some prefix. 132 | #[derive(Debug)] 133 | pub struct IndentedQueryWriter { 134 | writer: W, 135 | indent: I, 136 | } 137 | 138 | impl IndentedQueryWriter { 139 | fn dedent(self) -> W { 140 | self.writer 141 | } 142 | } 143 | 144 | impl QueryWriter

for IndentedQueryWriter 145 | where 146 | W: QueryWriter

, 147 | /* I can't use a generic lifetime here for some reason? 148 | * Maybe because https://github.com/rust-lang/rust/issues/49601 149 | * I have no idea ... */ 150 | I: Iterator, 151 | { 152 | fn push_param(&mut self, p: P) -> &mut Self { 153 | self.writer.push_param(p); 154 | self 155 | } 156 | 157 | fn push_sql(&mut self, s: &str) -> &mut Self { 158 | self.writer.push_sql(s); 159 | self 160 | } 161 | 162 | fn nl(&mut self) -> &mut Self { 163 | self.writer.nl(); 164 | if let Some(i) = self.indent.next() { 165 | self.writer.push_sql(i); 166 | } 167 | self 168 | } 169 | } 170 | 171 | impl Write for IndentedQueryWriter 172 | where 173 | W: Write, 174 | { 175 | fn write_str(&mut self, s: &str) -> fmt::Result { 176 | self.writer.write_str(s) 177 | } 178 | } 179 | 180 | /// Implemented by things that can write themselves into a [`QueryWriter`]. 181 | pub trait PushToQuery

{ 182 | fn push_to_query(&self, _: &mut W) 183 | where 184 | W: QueryWriter

; 185 | 186 | fn to_query(&self) -> Query

{ 187 | let mut q = Query::default(); 188 | self.push_to_query(&mut q); 189 | q 190 | } 191 | } 192 | 193 | /// srlsly what the fucking fuck 194 | impl PushToQuery

for &'_ T 195 | where 196 | T: PushToQuery

, 197 | { 198 | fn push_to_query(&self, w: &mut W) 199 | where 200 | W: QueryWriter

, 201 | { 202 | (*self).push_to_query(w) 203 | } 204 | } 205 | 206 | impl<'n, V> PushToQuery<&'n dyn ToSql> for Select<'n, V> 207 | where 208 | V: ToSql + TypeTag, 209 | { 210 | fn push_to_query(&self, writer: &mut W) 211 | where 212 | W: QueryWriter<&'n dyn ToSql>, 213 | { 214 | use crate::network::{Constraint, Match}; 215 | 216 | /* Values are not stored directly on triplets, they instead are "encoded" into a `soup` 217 | * table. So we never compare values or select against triples directly, but with the soup 218 | * rows that the triples point to. */ 219 | let mut soup = SoupLookup::with_length(self.network.triples()); 220 | let fromsoups = self 221 | .selection 222 | .iter() 223 | .cloned() 224 | .chain(self.network.constraints().iter().filter_map(|constraint| { 225 | if let Constraint::Eq { lh, rh: Match::Value(_) } = *constraint { 226 | Some(lh) 227 | } else { 228 | None 229 | } 230 | })) 231 | .chain(self.order_by.iter().map(|&(tf, _ordering)| tf)) 232 | .filter(|tf| soup.mark(*tf)) 233 | .map(FromSoup) 234 | .collect::>(); 235 | 236 | /* SELECT */ 237 | 238 | let mut writer = writer.with_indent(once("SELECT ").chain(repeat(" , "))); 239 | for tf in self.selection.iter() { 240 | writer 241 | .nl() 242 | .push(FromSoup(*tf)) 243 | .push_sql(".t, ") 244 | .push(FromSoup(*tf)) 245 | .push_sql(".v"); 246 | } 247 | 248 | /* we have to select something or the statement will be malfored */ 249 | if self.selection.is_empty() { 250 | writer.nl().push_sql("1"); 251 | } 252 | 253 | /* FROM */ 254 | 255 | let mut writer = writer 256 | .dedent() 257 | .with_indent(once(" FROM ").chain(repeat(" , "))); 258 | 259 | /* soup lookups first */ 260 | for fromsoup in fromsoups.iter().cloned() { 261 | writer.nl().push_sql(r#"soup "#).push(fromsoup); 262 | } 263 | 264 | for n in 0..self.network.triples() { 265 | writer.nl().push_sql(&format!(r#"triples t{}"#, n)); 266 | } 267 | 268 | /* WHERE */ 269 | 270 | let mut writer = writer 271 | .dedent() 272 | .with_indent(once(" WHERE ").chain(repeat(" AND "))); 273 | 274 | /* soup lookups first */ 275 | for constraint in self.network.constraints().iter() { 276 | if let Constraint::Eq { lh, rh: Match::Value(ref v) } = *constraint { 277 | writer 278 | .nl() 279 | .push(FromSoup(lh)) 280 | // TODO Type tag written as literal to simplify borrowing and _maybe_ improve 281 | // query planning at the statement prepare phase, although, I haven't found 282 | // evidence of this in testing. 283 | // 284 | // This could be solve by holding the type tag and value in the 285 | // Match::Value. 286 | .push_sql(&format!(".t = {} AND ", v.type_tag())) 287 | .push(FromSoup(lh)) 288 | .push_sql(".v = ?") 289 | .push_param(v as &dyn ToSql); 290 | } 291 | } 292 | for fromsoup in fromsoups.iter().cloned() { 293 | writer 294 | .nl() 295 | .push(fromsoup.0) 296 | .push_sql(" = ") 297 | .push(fromsoup) 298 | .push_sql(".rowid"); 299 | } 300 | 301 | /* constrain triples to each other or to the soup lookups */ 302 | for constraint in self.network.constraints().iter() { 303 | match constraint { 304 | &Constraint::Eq { lh, ref rh } => match rh { 305 | &Match::Field(rh) => { 306 | writer.nl().push(lh).push_sql(" = ").push(rh); 307 | } 308 | Match::Encoded(Encoded { rowid, .. }) => { 309 | // TODO check that parameter binding doesn't prevent a partial index from 310 | // being used where a literal would use the index. 311 | writer.nl().push(lh).push_sql(&format!(" = {}", rowid)); 312 | } 313 | /* Skip this; this is handled by {fromsoups.0} = {fromsoup}.rowid above. */ 314 | Match::Value(_) => {} 315 | }, 316 | } 317 | } 318 | 319 | let indent = once("ORDER BY ").chain(repeat(" , ")); 320 | let mut writer = writer.dedent().with_indent(indent); 321 | for &(tf, ordering) in self.order_by.iter() { 322 | // TODO gather soups? 323 | writer 324 | .nl() /* */ 325 | .push(FromSoup(tf)) 326 | .push_sql(".v ") 327 | .push(ordering); 328 | } 329 | 330 | let writer = writer.dedent(); 331 | 332 | if 0 < self.limit { 333 | // TODO The limit is written into the query instead of bound as a parameter because I 334 | // don't want to borrow from Select. 335 | writer.nl().push_sql(&format!(" LIMIT {}", self.limit)); 336 | } 337 | 338 | return; 339 | 340 | #[derive(Copy, Clone)] 341 | struct FromSoup(TriplesField); 342 | 343 | impl

PushToQuery

for FromSoup { 344 | fn push_to_query(&self, w: &mut W) 345 | where 346 | W: QueryWriter

, 347 | { 348 | let _ = match self.0.field() { 349 | Field::Entity => write!(w, "s{}_e", self.0.triples().usize()), 350 | Field::Attribute => write!(w, "s{}_a", self.0.triples().usize()), 351 | Field::Value => write!(w, "s{}_v", self.0.triples().usize()), 352 | }; 353 | } 354 | } 355 | 356 | struct SoupLookup(Vec); 357 | 358 | impl SoupLookup { 359 | fn with_length(len: usize) -> Self { 360 | SoupLookup(vec![0; len]) 361 | } 362 | 363 | fn lookup(&mut self, t: Triples) -> &mut u8 { 364 | &mut self.0[t.usize()] 365 | } 366 | 367 | fn mark(&mut self, tf: TriplesField) -> bool { 368 | let cell = self.lookup(tf.triples()); 369 | let flag: u8 = match tf.field() { 370 | Field::Entity => 1, 371 | Field::Attribute => 2, 372 | Field::Value => 4, 373 | }; 374 | if 0 == *cell & flag { 375 | *cell |= flag; 376 | true 377 | } else { 378 | false 379 | } 380 | } 381 | } 382 | } 383 | } 384 | 385 | impl

PushToQuery

for TriplesField { 386 | fn push_to_query(&self, w: &mut W) 387 | where 388 | W: QueryWriter

, 389 | { 390 | let _ = match self.field() { 391 | Field::Entity => write!(w, "t{}.e", self.triples().usize()), 392 | Field::Attribute => write!(w, "t{}.a", self.triples().usize()), 393 | Field::Value => write!(w, "t{}.v", self.triples().usize()), 394 | }; 395 | } 396 | } 397 | 398 | impl

PushToQuery

for Ordering { 399 | fn push_to_query(&self, w: &mut W) 400 | where 401 | W: QueryWriter

, 402 | { 403 | let _ = match self { 404 | Ordering::Asc => write!(w, "ASC"), 405 | Ordering::Desc => write!(w, "DESC"), 406 | }; 407 | } 408 | } 409 | 410 | #[cfg(test)] 411 | mod tests { 412 | use crate::tests::rusqlite_in_memory; 413 | use crate::{traits::*, AttributeRef, Network}; 414 | 415 | /* some of these examples are kind of stupid but if the network has 416 | * no constraints it should probably not fetch anything -- as opposed 417 | * to generate invalid sql */ 418 | 419 | #[test] 420 | fn test_select_default() { 421 | let db = rusqlite_in_memory().expect("rusqlite_in_memory"); 422 | let network: Network = Network::default(); 423 | let query = network.select().to_query(); 424 | assert!(db.prepare(query.as_str()).is_ok()); 425 | } 426 | 427 | #[test] 428 | fn test_select_nothing_from() { 429 | let db = rusqlite_in_memory().expect("rusqlite_in_memory"); 430 | let mut network: Network = Network::default(); 431 | network 432 | .fluent_triples() 433 | .match_attribute(AttributeRef::from_static(":db/attribute")); 434 | let query = network.select().to_query(); 435 | assert!(db.prepare(query.as_str()).is_ok()); 436 | } 437 | 438 | #[test] 439 | fn test_select_just_limit() { 440 | let db = rusqlite_in_memory().expect("rusqlite_in_memory"); 441 | let network: Network = Network::default(); 442 | let query = network.select().limit(123).to_query(); 443 | assert!(db.prepare(query.as_str()).is_ok()); 444 | } 445 | } 446 | -------------------------------------------------------------------------------- /src/bin/owoof-csv.rs: -------------------------------------------------------------------------------- 1 | //! import one csv into a sqlite database with owoof 2 | 3 | use std::borrow::Cow; 4 | use std::error::Error; 5 | use std::iter; 6 | use std::path::PathBuf; 7 | 8 | use owoof::{AttributeRef, DontWoof, Optional}; 9 | 10 | use rusqlite::OpenFlags; 11 | 12 | use anyhow::Context; 13 | 14 | #[derive(Debug)] 15 | struct Args<'a> { 16 | db: PathBuf, 17 | input: Option, 18 | mappings: Vec>, 19 | dry_run: bool, 20 | limit: usize, 21 | output: bool, 22 | csv_delimiter: u8, 23 | } 24 | 25 | #[derive(Debug)] 26 | struct ToAttribute<'a> { 27 | column: Cow<'a, str>, 28 | attribute: &'a AttributeRef, 29 | } 30 | 31 | #[derive(Debug)] 32 | struct ToPosition<'a> { 33 | attribute: &'a AttributeRef, 34 | position: usize, 35 | } 36 | 37 | fn do_import<'a>(args: Args<'a>) -> anyhow::Result<()> { 38 | let input = open_check_tty(args.input.as_ref())?; 39 | 40 | let mut reader = csv::ReaderBuilder::new() 41 | .flexible(true) 42 | .delimiter(args.csv_delimiter) 43 | .from_reader(input); 44 | let headers = reader.headers()?; 45 | let mut to_positions: Vec = 46 | lookup_header_indices(headers, args.mappings.as_slice())?; 47 | 48 | if args.dry_run { 49 | eprintln!("the following mappings were planned"); 50 | 51 | for mapping in args.mappings { 52 | eprintln!("{}\t{}", mapping.attribute, mapping.column); 53 | } 54 | 55 | eprintln!("but this is a dry run, nothing will be imported"); 56 | return Ok(()); 57 | } 58 | 59 | let id_mapping: Option = to_positions 60 | .iter() 61 | .position(|m| m.attribute == AttributeRef::from_static(":db/id")) 62 | .map(|i| to_positions.remove(i)); 63 | 64 | let mut db = rusqlite::Connection::open_with_flags( 65 | &args.db, 66 | OpenFlags::SQLITE_OPEN_READ_WRITE | OpenFlags::SQLITE_OPEN_NO_MUTEX, 67 | )?; 68 | let woof = DontWoof::new(&mut db)?; 69 | 70 | /* attribute identifiers -> attribute entities -- parallel sequence */ 71 | let attributes: Vec> = 72 | find_or_assert_attributes(&woof, args.mappings.as_slice())?; 73 | 74 | let mut records_seen = 0usize; 75 | let mut limit = (0 != args.limit).then(|| args.limit); 76 | let mut output = if args.output { 77 | let mut w = csv::WriterBuilder::new() 78 | .delimiter(args.csv_delimiter) 79 | .from_writer(io::stdout()); 80 | /* write headers */ 81 | w.write_record( 82 | &iter::once(":db/id") 83 | .chain(headers.iter()) 84 | .collect::(), 85 | )?; 86 | Some(w) 87 | } else { 88 | None 89 | }; 90 | 91 | let mut record = csv::StringRecord::new(); 92 | while Some(0) != limit && reader.read_record(&mut record)? { 93 | let e = if let Some(ToPosition { position, .. }) = id_mapping { 94 | let entity = record 95 | .get(position) 96 | .context("no value")? 97 | .parse::() 98 | .context("parse entity")?; 99 | woof.encode(entity)? 100 | } else { 101 | woof.new_entity()? 102 | }; 103 | 104 | /* This could have been written differently to use the Deserialize implementation on 105 | * owoof::Value if the csv crate API wasn't so awful. 106 | * 107 | * I can't ask it to deserialize a particular cell of a row -- like 108 | * `StringRecord::get_deserialize()` or something -- so we'd have to use 109 | * `StringRecord::deserialize` and deserialize the entire row. But, since we only care 110 | * about some cells and not others, we'd want to use a `serde::de::DeserializeSeed` to skip 111 | * past the cells we don't care about. 112 | * 113 | * But, I don't think there's a way to use `DeserializeSeed` with the csv library because 114 | * it doesn't expose an actual deserializer... 115 | * 116 | * Instead, we just have `value_from_csv_text` which basically just does both the csv 117 | * library's deserializer.rs/infer_deserialize() and owoof::Value's Deserialize. */ 118 | to_positions 119 | .iter() 120 | .zip(attributes.iter().cloned()) 121 | .map(|(to, a): (&ToPosition, _)| { 122 | let text = record.get(to.position).context("no value")?; 123 | let value: owoof::Value = value_from_csv_text(text); 124 | woof.encode(value) 125 | .and_then(|v| woof.assert(e, a, v).map(drop)) 126 | .with_context(|| format!("failed to assert {:?}", text)) 127 | }) 128 | .zip(args.mappings.iter()) 129 | .map(|(res, map)| res.with_context(|| format!("for column {:?}", map.column))) 130 | .collect::>() 131 | .with_context(|| match record.position() { 132 | Some(p) => format!("on line {}", p.line()), 133 | None => format!("on line ???"), 134 | })?; 135 | 136 | records_seen += 1; 137 | 138 | limit.as_mut().map(|l| *l -= 1); 139 | 140 | if let Some(output) = output.as_mut() { 141 | output.write_record( 142 | &iter::once(woof.decode(e)?.to_string().as_str()) 143 | .chain(record.iter()) 144 | .collect::(), 145 | )?; 146 | } 147 | } 148 | 149 | woof.optimize()?; 150 | woof.into_tx().commit()?; 151 | 152 | eprintln!("imported {} rows/entities", records_seen); 153 | 154 | Ok(()) 155 | } 156 | 157 | fn lookup_header_indices<'a>( 158 | headers: &csv::StringRecord, 159 | mappings: &[ToAttribute<'a>], 160 | ) -> anyhow::Result>> { 161 | mappings 162 | .iter() 163 | .map(|to| { 164 | let ToAttribute { attribute, column } = to; 165 | headers 166 | .iter() 167 | .position(|h| h == column) 168 | .map(|position| ToPosition { attribute, position }) 169 | .ok_or(column) 170 | }) 171 | .collect::>, _>>() 172 | .map_err(|column| { 173 | let headers = headers 174 | .iter() 175 | .flat_map(|s| iter::once("\n» ").chain(iter::once(s))) 176 | .collect::(); 177 | anyhow::anyhow!("failed find column `{}` in headers:{}", column, headers) 178 | }) 179 | } 180 | 181 | fn find_or_assert_attributes<'a>( 182 | woof: &DontWoof, 183 | mappings: &[ToAttribute<'a>], 184 | ) -> anyhow::Result>> { 185 | let db_attribute = woof.attribute(woof.encode(AttributeRef::from_static(":db/attribute"))?)?; 186 | 187 | mappings 188 | .iter() 189 | .map(|m| { 190 | let ident = woof.encode(m.attribute)?; 191 | match woof.attribute(ident).optional()? { 192 | Some(attribute) => Ok(attribute), 193 | None => woof 194 | .fluent_entity()? 195 | .assert(db_attribute, ident) 196 | .map(owoof::Encoded::::from), 197 | } 198 | }) 199 | .collect::, _>>() 200 | .context("encode attribute") 201 | } 202 | 203 | /* Try parsing an #entity :attribute or a few other things and fall back to text otherwise. */ 204 | fn value_from_csv_text(s: &str) -> owoof::Value { 205 | use owoof::{Attribute, Entity, Value}; 206 | Option::::None 207 | .or_else(|| s.parse::().map(Value::from).ok()) 208 | .or_else(|| s.parse::().map(Value::from).ok()) 209 | .or_else(|| s.parse::().map(Value::from).ok()) 210 | .or_else(|| s.parse::().map(Value::from).ok()) 211 | .or_else(|| s.parse::().map(Value::from).ok()) 212 | .or_else(|| s.parse::().map(Value::from).ok()) 213 | .unwrap_or_else(|| Value::Text(s.to_owned())) 214 | } 215 | 216 | fn main() { 217 | let args_vec = std::env::args().collect::>(); 218 | let mut args = args_vec.iter().map(String::as_str); 219 | 220 | let exe = args 221 | .next() 222 | .map(|s| s.rsplit('/').next().unwrap_or(s)) 223 | .unwrap_or("owoof-csv"); 224 | 225 | match parse_args(args) { 226 | Err(ArgError::Usage) => usage_and_exit(exe), 227 | Err(err) => { 228 | eprintln!("oof! {}", err); 229 | print_traceback(&err); 230 | eprintln!(""); 231 | usage_and_exit(exe) 232 | } 233 | Ok(args) => { 234 | if let Err(err) = do_import(args) { 235 | eprintln!("oof! {}", err); 236 | print_traceback(err.as_ref()); 237 | std::process::exit(1); 238 | } 239 | } 240 | } 241 | } 242 | 243 | fn print_traceback(err: &dyn Error) { 244 | let mut source = err.source(); 245 | while let Some(err) = source { 246 | eprintln!(" » {}", err); 247 | source = err.source(); 248 | } 249 | } 250 | 251 | fn usage_and_exit(exe: &str) -> ! { 252 | eprintln!("usage: {} [options...] ", exe); 253 | eprintln!(""); 254 | eprintln!("[options...] is a sequence of any of the following."); 255 | eprintln!("\t-l, --limit N\timport only N rows, import everything if N is zero"); 256 | eprintln!("\t-n, --dry-run\tcheck csv mappings but don't modify the database"); 257 | eprintln!("\t-o, --output \twrites inserted :db/id to stdout (see below for more detail)"); 258 | eprintln!( 259 | "\t--db \t<{}> (defaults to OWOOF_DB environment variable)", 260 | default_db_path().display() 261 | ); 262 | eprintln!("\t-i, --input (defaults to stdin)"); 263 | eprintln!(""); 264 | eprintln!(" is a sequence that arguments that map csv headers to attributes."); 265 | eprintln!("\t':pet/name pet_name'\twill read values in the column pet_name and assert them with the :pet/name attribute"); 266 | eprintln!("\t':pet/name' \twill defaults the column name to `name`, the part after / with non-alphabet characters replaced with _"); 267 | eprintln!(""); 268 | eprintln!("Each row imported is an entity added to the database. When --output is passed, a copy of the input csv is written to stdout along with a :db/id column that includes the entity id of each row."); 269 | eprintln!(""); 270 | eprintln!("We try to convert values into an entity, attribute, number, or uuid before giving up and just inserting it as text."); 271 | std::process::exit(2); 272 | } 273 | 274 | fn parse_args<'a, I>(mut args: I) -> Result, ArgError<'a>> 275 | where 276 | I: Iterator, 277 | { 278 | let mut db = Option::<&str>::None; 279 | let mut input = Option::<&str>::None; 280 | let mut mappings = Vec::<&str>::default(); 281 | let mut dry_run = false; 282 | let mut limit = 0usize; 283 | let mut output = false; 284 | let mut csv_delimiter = ","; 285 | 286 | while let Some(arg) = args.next() { 287 | match arg { 288 | "-h" | "--help" => return Err(ArgError::Usage), 289 | "-n" | "--dry-run" => dry_run = true, 290 | "-o" | "--output" => output = true, 291 | "-d" | "--delimiter" => { 292 | csv_delimiter = args.next().ok_or(ArgError::NeedsValue(arg))? 293 | } 294 | 295 | "-l" | "--limit" => { 296 | limit = args 297 | .next() 298 | .ok_or(ArgError::NeedsValue(arg))? 299 | .parse() 300 | .map_err(ArgError::invalid(arg))? 301 | } 302 | "--db" => { 303 | db.replace(args.next().ok_or(ArgError::NeedsValue(arg))?); 304 | } 305 | "-i" | "--input" => { 306 | input.replace(args.next().ok_or(ArgError::NeedsValue(arg))?); 307 | } 308 | "--" => { 309 | mappings.extend(args); 310 | break; 311 | } 312 | _ if arg.starts_with("-") => return Err(ArgError::Unknown(arg)), 313 | _ => mappings.push(arg), 314 | } 315 | } 316 | 317 | Ok(Args { 318 | output, 319 | dry_run, 320 | limit, 321 | db: db 322 | .map(|s| { 323 | s.parse() 324 | .context("parse --db") 325 | .map_err(ArgError::invalid("--db")) 326 | }) 327 | .unwrap_or_else(|| Ok(default_db_path()))?, 328 | input: input 329 | .map(|s| { 330 | s.parse() 331 | .context("parse input csv") 332 | .map_err(ArgError::invalid("--csv")) 333 | }) 334 | .transpose()?, 335 | mappings: mappings 336 | .into_iter() 337 | .map(|s| parse_mapping(s)) 338 | .collect::>>() 339 | .map_err(ArgError::invalid(""))?, 340 | csv_delimiter: { 341 | (csv_delimiter.len() == 1) 342 | .then(|| csv_delimiter.bytes().next().unwrap()) 343 | .context("expected a single byte") 344 | .map_err(ArgError::invalid("--delimiter"))? 345 | }, 346 | }) 347 | } 348 | 349 | fn parse_mapping<'a>(s: &'a str) -> anyhow::Result> { 350 | let attribute = s.split_whitespace().next().unwrap_or(s); 351 | let rest = s[attribute.len()..].trim(); 352 | 353 | let attribute: &AttributeRef = attribute.try_into().context("parse attribute")?; 354 | 355 | let column = if rest.is_empty() { 356 | guess_csv_header_from_attribute(attribute).into() 357 | } else { 358 | rest.into() 359 | }; 360 | 361 | Ok(ToAttribute { attribute, column }) 362 | } 363 | 364 | fn guess_csv_header_from_attribute(attribute: &AttributeRef) -> String { 365 | let mut s = attribute 366 | .tail() 367 | .chars() 368 | .skip_while(|c| !c.is_alphabetic()) 369 | .map(|c| if c.is_alphabetic() { c } else { '_' }) 370 | .collect::(); 371 | if s.ends_with('_') { 372 | s = s.trim_end_matches('_').to_owned() 373 | } 374 | s 375 | } 376 | 377 | #[derive(Debug, thiserror::Error)] 378 | enum ArgError<'a> { 379 | #[error("゚・✿ヾ╲(。◕‿◕。)╱✿・゚")] 380 | Usage, 381 | #[error("unknown argument {}", .0)] 382 | Unknown(&'a str), 383 | #[error("expected value for {}", .0)] 384 | NeedsValue(&'a str), 385 | #[error("invalid option for {}", .0)] 386 | Invalid(&'a str, #[source] anyhow::Error), 387 | } 388 | 389 | impl<'a> ArgError<'a> { 390 | fn invalid>(arg: &'a str) -> impl Fn(I) -> ArgError<'a> { 391 | move |e| ArgError::Invalid(arg, e.into()) 392 | } 393 | } 394 | 395 | fn default_db_path() -> PathBuf { 396 | std::env::var_os("OWOOF_DB") 397 | .map(PathBuf::from) 398 | .unwrap_or("owoof.sqlite".into()) 399 | } 400 | 401 | use std::{fs, io}; 402 | 403 | pub fn open_check_tty(input: Option<&PathBuf>) -> io::Result> { 404 | match input { 405 | Some(path) => { 406 | let file = fs::File::open(path)?; 407 | Ok(Box::new(io::BufReader::new(file))) 408 | } 409 | None => { 410 | if atty::is(atty::Stream::Stdin) { 411 | eprintln!("reading csv from stdin (and stdin looks like a tty) good luck!"); 412 | } 413 | Ok(Box::new(io::stdin())) 414 | } 415 | } 416 | } 417 | -------------------------------------------------------------------------------- /src/bin/owoof.rs: -------------------------------------------------------------------------------- 1 | //! query, assert, or retract triplets 2 | 3 | use std::collections::BTreeMap; 4 | use std::error::Error; 5 | use std::path::PathBuf; 6 | 7 | use owoof::{ 8 | disperse::zip_with_keys, 9 | driver::just, 10 | either, 11 | network::TriplesField, 12 | retrieve::{self, Pattern, Variable}, 13 | sql::PushToQuery, 14 | AttributeRef, BorrowedParse, DontWoof, Ordering, Value, ValueRef, 15 | }; 16 | 17 | use anyhow::Context; 18 | 19 | const OPEN_RW: rusqlite::OpenFlags = 20 | rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE.union(rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX); 21 | 22 | const OPEN_CREATE: rusqlite::OpenFlags = OPEN_RW.union(rusqlite::OpenFlags::SQLITE_OPEN_CREATE); 23 | 24 | #[derive(serde::Deserialize)] 25 | struct Object { 26 | #[serde(rename = ":db/id")] 27 | id: Option, 28 | #[serde(flatten)] 29 | other: std::collections::BTreeMap, 30 | } 31 | 32 | fn main() { 33 | let args_vec = std::env::args().collect::>(); 34 | let mut args = args_vec.iter().map(String::as_str); 35 | 36 | let exe = args 37 | .next() 38 | .map(|s| s.rsplit('/').next().unwrap_or(s)) 39 | .unwrap_or("owoof"); 40 | 41 | let parsed = parse_args(args).unwrap_or_else(|err| match err { 42 | ArgError::Usage => usage_and_exit(exe), 43 | _ => { 44 | eprintln!("oof! {}", &err); 45 | print_traceback(&err); 46 | eprintln!(""); 47 | usage_and_exit(exe) 48 | } 49 | }); 50 | 51 | match &parsed.mode { 52 | Mode::Find => do_find(parsed), 53 | Mode::Init => do_init(parsed), 54 | Mode::Assert => do_assert(parsed), 55 | Mode::Retract => do_retract(parsed), 56 | } 57 | .unwrap_or_else(|err: anyhow::Error| { 58 | eprintln!("oof! {}", &err); 59 | print_traceback(err.as_ref()); 60 | eprintln!(""); 61 | std::process::exit(1); 62 | }); 63 | } 64 | 65 | fn print_traceback(err: &dyn Error) { 66 | let mut source = err.source(); 67 | while let Some(err) = source { 68 | eprintln!(" » {}", err); 69 | source = err.source(); 70 | } 71 | } 72 | 73 | fn do_find(find: Args) -> anyhow::Result<()> { 74 | let mut db = rusqlite::Connection::open_with_flags(&find.path, OPEN_RW)?; 75 | let woof = DontWoof::new(&mut db)?; 76 | 77 | let mut network = retrieve::NamedNetwork::::default(); 78 | 79 | for pattern in find.patterns.iter() { 80 | network.add_pattern(pattern); 81 | } 82 | 83 | let mut retreival = vec![]; 84 | let mut dispersal = find 85 | .show 86 | .iter() 87 | .map(|show| -> anyhow::Result<_> { 88 | let field = network 89 | .names 90 | .lookup(&show.variable) 91 | .or_else(|err| { 92 | /* A special case? 93 | * 94 | * Generally we want the network to be a connected graph so we fail on 95 | * unmatched variables. But if the graph is empty, just go for it. */ 96 | if network.triples() == 0 { 97 | Ok(network 98 | .fluent_triples() 99 | .match_attribute(AttributeRef::from_static(":db/id")) 100 | .entity()) 101 | } else { 102 | Err(err) 103 | } 104 | }) 105 | .with_context(|| anyhow::anyhow!("cannot show `{}`", &show.variable))?; 106 | 107 | /* If there are no attributes, show ?var. 108 | * If attributes are given, then show the value of 109 | * those attributes where ?var is the entity. */ 110 | if show.attributes.is_empty() { 111 | retreival.push(field); 112 | 113 | Ok(either::left(just::())) 114 | } else { 115 | retreival.extend( 116 | show.attributes 117 | .iter() 118 | .map(|&attribute| network.value_for_entity_attribute(field, attribute)), 119 | ); 120 | 121 | Ok(either::right(zip_with_keys(&show.attributes))) 122 | } 123 | }) 124 | .collect::, _>>()?; 125 | 126 | let mut order_by = vec![]; 127 | 128 | find.order.iter().try_for_each(|(show, ordering)| { 129 | let field = network 130 | .names 131 | .lookup(&show.variable) 132 | .with_context(|| anyhow::anyhow!("cannot order by `{}`", &show.variable))?; 133 | 134 | if show.attributes.is_empty() { 135 | order_by.push((field, *ordering)); 136 | } else { 137 | order_by.extend(show.attributes.iter().map(|&attribute| { 138 | let field = network.value_for_entity_attribute(field, attribute); 139 | (field, *ordering) 140 | })); 141 | } 142 | 143 | Result::<_, anyhow::Error>::Ok(()) 144 | })?; 145 | 146 | /* Default --show is showing values for variables with the fewest constraints. */ 147 | 148 | if dispersal.is_empty() { 149 | debug_assert!(retreival.is_empty()); 150 | retreival = variables_with_fewest_constraints(&network) 151 | .map(|(_, field)| field) 152 | .collect(); 153 | dispersal = retreival 154 | .iter() 155 | .map(|_| either::left(just::())) 156 | .collect(); 157 | } 158 | 159 | /* TODO select makes network immutable (this is probably stupid), 160 | * so we can't select until after we prefetch, we can't prefetch 161 | * until after we've gone through the --show and made a selection */ 162 | network.prefetch_attributes(&woof)?; 163 | 164 | let mut select = network.select(); 165 | 166 | for field in retreival.into_iter() { 167 | select.field(field); 168 | } 169 | 170 | for o in order_by.into_iter() { 171 | select.order_by(o); 172 | } 173 | 174 | select.limit(find.limit); 175 | 176 | if find.explain { 177 | let q = select.to_query(); 178 | eprintln!("{}", q.as_str()); 179 | 180 | let explain = woof.explain_plan(&q).context("explain")?; 181 | eprintln!("{}", explain); 182 | 183 | return Ok(()); 184 | } 185 | 186 | let results = select.to_query().disperse( 187 | /* if we map a row to a single value, don't put that value in a list */ 188 | if dispersal.len() == 1 { 189 | either::left(dispersal.into_iter().next().unwrap()) 190 | } else { 191 | either::right(dispersal.as_mut_slice()) 192 | }, 193 | &woof, 194 | )?; 195 | let json = serde_json::to_string_pretty(&results)?; 196 | 197 | println!("{}", json); 198 | Ok(()) 199 | } 200 | 201 | fn do_init(init: Args) -> anyhow::Result<()> { 202 | let mut db = rusqlite::Connection::open_with_flags(&init.path, OPEN_CREATE)?; 203 | let tx = db.transaction()?; 204 | owoof::create_schema(&tx)?; 205 | tx.commit()?; 206 | println!("{}", init.path.display()); 207 | Ok(()) 208 | } 209 | 210 | fn do_assert(assert: Args) -> anyhow::Result<()> { 211 | let mut input = open_check_tty(assert.input.as_ref())?; 212 | 213 | let mut db = rusqlite::Connection::open_with_flags(&assert.path, OPEN_RW)?; 214 | let woof = DontWoof::new(&mut db)?; 215 | 216 | /* TODO this error message is not helpful */ 217 | let stuff: either::Either> = serde_json::from_reader(&mut input) 218 | .context(r#"decode a JSON object like {":db/id": ..., ":foo/bar": ...}"#)?; 219 | 220 | let mut ident_cache: IdentCache = Default::default(); 221 | 222 | let asserted = match stuff { 223 | either::Left(one) => vec![one].into_iter(), 224 | either::Right(v) => v.into_iter(), 225 | } 226 | .map(|obj| { 227 | let Object { id, other } = obj; 228 | let (e, id) = match id { 229 | Some(id) => (woof.encode(id)?, id), 230 | None => { 231 | let e = woof.new_entity()?; 232 | (e, woof.decode(e)?) 233 | } 234 | }; 235 | 236 | other 237 | .into_iter() 238 | .map(|(ident, v)| { 239 | let a = cached_get_ident(&mut ident_cache, &woof, &ident)?; 240 | let v = woof.encode(v)?; 241 | woof.assert(e, a, v)?; 242 | Ok(()) 243 | }) 244 | .collect::>() 245 | .map(|()| id) 246 | }) 247 | .collect::, _>>()?; 248 | 249 | woof.into_tx().commit().context("commit")?; 250 | 251 | let jaysons = if asserted.len() == 1 { 252 | serde_json::to_string_pretty(&asserted[0]) 253 | } else { 254 | serde_json::to_string_pretty(&asserted) 255 | } 256 | .context("serialize results")?; 257 | 258 | println!("{}", jaysons); 259 | 260 | Ok(()) 261 | } 262 | 263 | fn do_retract(retract: Args) -> anyhow::Result<()> { 264 | let mut input = open_check_tty(retract.input.as_ref())?; 265 | 266 | let mut db = rusqlite::Connection::open_with_flags(&retract.path, OPEN_RW)?; 267 | let woof = DontWoof::new(&mut db)?; 268 | 269 | let stuff: either::Either> = serde_json::from_reader(&mut input)?; 270 | 271 | let mut ident_cache: IdentCache = Default::default(); 272 | 273 | match stuff { 274 | either::Left(one) => vec![one].into_iter(), 275 | either::Right(v) => v.into_iter(), 276 | } 277 | .map(|obj| { 278 | let Object { id, other } = obj; 279 | let id = id.context(":db/id is required to retract")?; 280 | let e = woof.encode(id)?; 281 | 282 | other 283 | .into_iter() 284 | .map(|(ident, v)| { 285 | let a = cached_get_ident(&mut ident_cache, &woof, &ident)?; 286 | let v = woof.encode(v)?; 287 | woof.retract(e, a, v)?; 288 | Ok(()) 289 | }) 290 | .collect::>() 291 | }) 292 | .collect::>() 293 | .and_then(|n: Sum| { 294 | woof.into_tx().commit().context("commit")?; 295 | eprintln!("{}", n.usize()); 296 | Ok(()) 297 | }) 298 | } 299 | 300 | type IdentCache = BTreeMap>; 301 | 302 | fn cached_get_ident<'a>( 303 | cache: &mut BTreeMap>, 304 | woof: &DontWoof, 305 | ident: &AttributeRef, 306 | ) -> anyhow::Result> { 307 | match cache.get(ident).cloned() { 308 | Some(a) => Ok(a), 309 | None => { 310 | let a = woof 311 | .encode(ident) 312 | .and_then(|enc| woof.attribute(enc)) 313 | .with_context(|| format!("couldn't find attribute {}, does it exist?", ident))?; 314 | if cache.len() < 256 { 315 | cache.insert(ident.to_owned(), a); 316 | } 317 | Ok(a) 318 | } 319 | } 320 | } 321 | 322 | fn usage_and_exit(exe: &str) -> ! { 323 | eprintln!("usage: {} [--db ] ... [--show ] [--limit ] [--asc ] [--desc ] [--find|--explain|--explain-plan]", exe); 324 | eprintln!(" {} [--db ] init", exe); 325 | eprintln!(" {} [--db ] assert [--input ]", exe); 326 | eprintln!(" {} [--db ] retract [--input ]", exe); 327 | eprintln!(""); 328 | eprintln!(" is ?var|#some-entity-uuid ?var|:some/attribute ?var|json "); 329 | eprintln!(" is ?var [:some/attribute...]"); 330 | eprintln!(""); 331 | eprintln!( 332 | "the default path (set by OWOOF_DB) is {}", 333 | default_db_path().display() 334 | ); 335 | std::process::exit(2); 336 | } 337 | 338 | struct Args<'a> { 339 | mode: Mode, 340 | path: PathBuf, 341 | input: Option, 342 | show: Vec>, 343 | patterns: Vec>, 344 | order: Vec<(Show<'a>, Ordering)>, 345 | limit: i64, 346 | explain: bool, 347 | // explain_plan: bool, 348 | } 349 | 350 | enum Mode { 351 | Find, 352 | Init, 353 | Assert, 354 | Retract, 355 | } 356 | 357 | fn parse_args<'a, I>(mut args: I) -> Result, ArgError<'a>> 358 | where 359 | I: Iterator, 360 | { 361 | let mut mode = Mode::Find; 362 | let mut db = Option::<&str>::None; 363 | let mut input = Option::<&str>::None; 364 | let mut limit = Option::<&str>::None; 365 | let mut patterns = vec![]; 366 | let mut show = vec![]; 367 | let mut order = vec![]; 368 | let mut explain = false; 369 | 370 | while let Some(arg) = args.next() { 371 | match arg { 372 | "-h" | "--help" => return Err(ArgError::Usage), 373 | "--db" => { 374 | db.replace(args.next().ok_or(ArgError::NeedsValue(arg))?); 375 | } 376 | "--input" => { 377 | input.replace(args.next().ok_or(ArgError::NeedsValue(arg))?); 378 | } 379 | "--limit" => { 380 | limit.replace(args.next().ok_or(ArgError::NeedsValue(arg))?); 381 | } 382 | "--show" => { 383 | show.push(args.next().ok_or(ArgError::NeedsValue(arg))?); 384 | } 385 | "--asc" => { 386 | let arg = args.next().ok_or(ArgError::NeedsValue(arg))?; 387 | order.push((arg, Ordering::Asc)); 388 | } 389 | "--desc" => { 390 | let arg = args.next().ok_or(ArgError::NeedsValue(arg))?; 391 | order.push((arg, Ordering::Desc)); 392 | } 393 | "--explain" => explain = true, 394 | "init" => mode = Mode::Init, 395 | "assert" => mode = Mode::Assert, 396 | "retract" => mode = Mode::Retract, 397 | "--" => { 398 | patterns.extend(args); 399 | break; 400 | } 401 | _ if arg.starts_with("-") => return Err(ArgError::Unknown(arg)), 402 | _ => patterns.push(arg), 403 | } 404 | } 405 | 406 | // if matches!(mode, Mode::Find) && patterns.is_empty() { 407 | // return Err(ArgError::Usage); /* ¯\_(ツ)_/¯*/ 408 | // } 409 | 410 | Ok(Args { 411 | mode, 412 | patterns: patterns 413 | .into_iter() 414 | .map(|s| s.borrowed_parse().map_err(anyhow::Error::from)) 415 | .collect::>>() 416 | .map_err(ArgError::invalid(""))?, 417 | show: show 418 | .into_iter() 419 | .map(parse_show) 420 | .collect::>>() 421 | .map_err(ArgError::invalid("--show"))?, 422 | path: db 423 | .map(|s| { 424 | s.parse() 425 | .context("parse path") 426 | .map_err(ArgError::invalid("--db")) 427 | }) 428 | .unwrap_or_else(|| Ok(default_db_path()))?, 429 | input: db 430 | .map(|s| { 431 | s.parse() 432 | .context("parse input path") 433 | .map_err(ArgError::invalid("--input")) 434 | }) 435 | .transpose()?, 436 | order: order 437 | .into_iter() 438 | .map(|(show, ord)| { 439 | parse_show(show) 440 | .map(|show| (show, ord)) 441 | .map_err(ArgError::invalid(match ord { 442 | Ordering::Asc => "--asc", 443 | Ordering::Desc => "--desc", 444 | })) 445 | }) 446 | .collect::, _>>()?, 447 | limit: limit 448 | .map(|s| { 449 | s.parse() 450 | .context("parse number") 451 | .map_err(ArgError::invalid("--limit")) 452 | }) 453 | .unwrap_or_else(|| Ok(default_limit()))?, 454 | explain, 455 | }) 456 | } 457 | 458 | #[derive(Debug, thiserror::Error)] 459 | enum ArgError<'a> { 460 | #[error("゚・✿ヾ╲(。◕‿◕。)╱✿・゚")] 461 | Usage, 462 | #[error("unknown argument {}", .0)] 463 | Unknown(&'a str), 464 | #[error("expected value for {}", .0)] 465 | NeedsValue(&'a str), 466 | #[error("invalid option for {}", .0)] 467 | Invalid(&'a str, #[source] anyhow::Error), 468 | } 469 | 470 | impl<'a> ArgError<'a> { 471 | fn invalid(arg: &'a str) -> impl Fn(anyhow::Error) -> ArgError<'a> { 472 | move |e| ArgError::Invalid(arg, e) 473 | } 474 | } 475 | 476 | fn default_db_path() -> PathBuf { 477 | std::env::var_os("OWOOF_DB") 478 | .map(PathBuf::from) 479 | .unwrap_or("owoof.sqlite".into()) 480 | } 481 | 482 | fn default_limit() -> i64 { 483 | std::env::var("OWOOF_LIMIT") 484 | .map(|s| s.parse().expect("parse OWOOF_LIMIT environment variable")) 485 | .unwrap_or(10) 486 | } 487 | 488 | #[derive(Debug, PartialEq)] 489 | struct Show<'a> { 490 | variable: Variable<'a>, 491 | attributes: Vec<&'a AttributeRef>, 492 | } 493 | 494 | fn parse_show<'a>(s: &'a str) -> anyhow::Result> { 495 | let mut parts = s.split_whitespace(); 496 | Ok(parts.next().unwrap_or_default()) 497 | .and_then(|s| { 498 | s.borrowed_parse::() 499 | .with_context(|| format!("when reading {:?}", s)) 500 | }) 501 | .and_then(|variable| { 502 | parts 503 | .map(|s| { 504 | AttributeRef::from_str(s).with_context(|| format!("when reading {:?}", s)) 505 | }) 506 | .collect::>() 507 | .map(|attributes| Show { variable, attributes }) 508 | }) 509 | .context("expected ?var :some/attributes...") 510 | } 511 | 512 | pub fn variables_with_fewest_constraints<'a, 'n, V>( 513 | network: &'a retrieve::NamedNetwork<'n, V>, 514 | ) -> impl Iterator + 'a 515 | where 516 | V: PartialEq, 517 | { 518 | let constraint_counts = network 519 | .names 520 | .iter() 521 | .map(|&(_, field)| network.constraints_on(field).count()) 522 | .collect::>(); 523 | 524 | constraint_counts 525 | .iter() 526 | .cloned() 527 | .min() 528 | .map(move |min| { 529 | network 530 | .names 531 | .iter() 532 | .zip(constraint_counts.into_iter()) 533 | .filter(move |&(_, count)| count == min) 534 | .map(|(v, _)| v) 535 | }) 536 | .into_iter() 537 | .flatten() 538 | .cloned() 539 | } 540 | 541 | use std::{fs, io}; 542 | 543 | pub fn open_check_tty(input: Option<&PathBuf>) -> io::Result> { 544 | match input { 545 | Some(path) => { 546 | let file = fs::File::open(path)?; 547 | Ok(Box::new(io::BufReader::new(file))) 548 | } 549 | None => { 550 | if atty::is(atty::Stream::Stdin) { 551 | eprintln!("reading csv from stdin (and stdin looks like a tty) good luck!"); 552 | } 553 | Ok(Box::new(io::stdin())) 554 | } 555 | } 556 | } 557 | 558 | #[derive(Default, Clone, Copy)] 559 | struct Sum(usize); 560 | 561 | impl Sum { 562 | fn usize(self) -> usize { 563 | self.0 564 | } 565 | } 566 | 567 | impl std::iter::FromIterator<()> for Sum { 568 | fn from_iter>(iter: T) -> Self { 569 | Sum(iter.into_iter().fold(0usize, |sum, _| sum + 1)) 570 | } 571 | } 572 | 573 | impl std::iter::FromIterator for Sum { 574 | fn from_iter>(iter: T) -> Self { 575 | Sum(iter.into_iter().fold(0usize, |sum, x| sum + x.usize())) 576 | } 577 | } 578 | 579 | impl std::iter::FromIterator for Sum { 580 | fn from_iter>(iter: T) -> Self { 581 | Sum(iter.into_iter().fold(0usize, |sum, x| sum + x)) 582 | } 583 | } 584 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | //! Relating to types for entities, attributes, and other database values. 2 | //! 3 | //! If the `serde` feature is enabled, should be able to serialize and deserialize to a thing. But 4 | //! it might not be very coherent. [`Attribute`] and [`Entity`] instances are serialized to 5 | //! text with a leading symbol. So, the serialized representation of `Value::Attribute(a)` is the 6 | //! same as `Value::Text(a.to_string())`. So if you serialize the latter, you will deserialize 7 | //! into the former. 8 | use std::{ 9 | borrow::{Borrow, ToOwned}, 10 | convert::{AsRef, TryFrom}, 11 | fmt, 12 | ops::Deref, 13 | str::FromStr, 14 | }; 15 | use thiserror::Error; 16 | 17 | #[cfg(feature = "serde")] 18 | use std::borrow::Cow; 19 | 20 | use uuid::Uuid; 21 | 22 | #[cfg(feature = "serde_json")] 23 | use crate::FromBorrowedStr; 24 | 25 | /// A owning version of [`ValueRef`] 26 | #[cfg_attr(feature = "serde", derive(serde::Serialize), serde(untagged))] 27 | #[derive(Debug, Clone, PartialEq)] 28 | pub enum Value { 29 | Entity(Entity), 30 | Attribute(Attribute), 31 | Text(String), 32 | Integer(i64), 33 | Float(f64), 34 | Boolean(bool), 35 | Uuid(Uuid), 36 | Blob(Vec), 37 | // Timestamp(...), 38 | } 39 | 40 | /// A borrowing version of [`Value`] 41 | #[cfg_attr(feature = "serde", derive(serde::Serialize), serde(untagged))] 42 | #[derive(Debug, Clone, PartialEq, Copy)] 43 | pub enum ValueRef<'a> { 44 | Entity(Entity), 45 | Attribute(&'a AttributeRef), 46 | Text(&'a str), 47 | Integer(i64), 48 | Float(f64), 49 | Boolean(bool), 50 | Uuid(Uuid), 51 | Blob(&'a [u8]), 52 | } 53 | 54 | impl<'a> From<&'a Value> for ValueRef<'a> { 55 | fn from(value: &'a Value) -> ValueRef<'a> { 56 | match *value { 57 | Value::Entity(e) => ValueRef::Entity(e), 58 | Value::Attribute(ref a) => ValueRef::Attribute(a), 59 | Value::Text(ref s) => ValueRef::Text(s), 60 | Value::Integer(i) => ValueRef::Integer(i), 61 | Value::Float(f) => ValueRef::Float(f), 62 | Value::Boolean(b) => ValueRef::Boolean(b), 63 | Value::Uuid(u) => ValueRef::Uuid(u), 64 | Value::Blob(ref b) => ValueRef::Blob(b.as_slice()), 65 | } 66 | } 67 | } 68 | 69 | impl From> for Value { 70 | fn from(borrowed: ValueRef<'_>) -> Value { 71 | match borrowed { 72 | ValueRef::Entity(e) => Value::Entity(e), 73 | ValueRef::Attribute(a) => Value::Attribute(a.to_owned()), 74 | ValueRef::Text(s) => Value::Text(s.to_owned()), 75 | ValueRef::Integer(i) => Value::Integer(i), 76 | ValueRef::Float(f) => Value::Float(f), 77 | ValueRef::Boolean(b) => Value::Boolean(b), 78 | ValueRef::Uuid(u) => Value::Uuid(u), 79 | ValueRef::Blob(b) => Value::Blob(b.to_owned()), 80 | } 81 | } 82 | } 83 | 84 | impl From for Value { 85 | fn from(v: Entity) -> Self { 86 | Value::Entity(v) 87 | } 88 | } 89 | 90 | impl From for ValueRef<'_> { 91 | fn from(v: Entity) -> Self { 92 | ValueRef::Entity(v) 93 | } 94 | } 95 | 96 | impl From for Value { 97 | fn from(v: Attribute) -> Self { 98 | Value::Attribute(v) 99 | } 100 | } 101 | 102 | impl<'a> From<&'a AttributeRef> for ValueRef<'a> { 103 | fn from(v: &'a AttributeRef) -> Self { 104 | ValueRef::Attribute(v) 105 | } 106 | } 107 | 108 | impl From for Value { 109 | fn from(s: String) -> Self { 110 | Value::Text(s) 111 | } 112 | } 113 | 114 | /* Conflicts with TryFrom<&str> ... */ 115 | impl<'a> From<&'a str> for ValueRef<'a> { 116 | fn from(s: &'a str) -> Self { 117 | ValueRef::Text(s) 118 | } 119 | } 120 | 121 | impl From for Value { 122 | fn from(i: i64) -> Self { 123 | Value::Integer(i) 124 | } 125 | } 126 | 127 | impl From for ValueRef<'_> { 128 | fn from(i: i64) -> Self { 129 | ValueRef::Integer(i) 130 | } 131 | } 132 | 133 | impl From for Value { 134 | fn from(f: f64) -> Self { 135 | Value::Float(f) 136 | } 137 | } 138 | 139 | impl From for ValueRef<'_> { 140 | fn from(f: f64) -> Self { 141 | ValueRef::Float(f) 142 | } 143 | } 144 | 145 | impl From for Value { 146 | fn from(b: bool) -> Self { 147 | Value::Boolean(b) 148 | } 149 | } 150 | 151 | impl From for ValueRef<'_> { 152 | fn from(b: bool) -> Self { 153 | ValueRef::Boolean(b) 154 | } 155 | } 156 | 157 | impl From for Value { 158 | fn from(u: Uuid) -> Self { 159 | Value::Uuid(u) 160 | } 161 | } 162 | 163 | impl From for ValueRef<'_> { 164 | fn from(u: Uuid) -> Self { 165 | ValueRef::Uuid(u) 166 | } 167 | } 168 | 169 | impl From> for Value { 170 | fn from(v: Vec) -> Self { 171 | Value::Blob(v) 172 | } 173 | } 174 | 175 | impl<'a> From<&'a [u8]> for ValueRef<'a> { 176 | fn from(v: &'a [u8]) -> Self { 177 | ValueRef::Blob(v) 178 | } 179 | } 180 | 181 | impl<'a> From<&'_ ValueRef<'a>> for ValueRef<'a> { 182 | fn from(value: &'_ ValueRef<'a>) -> ValueRef<'a> { 183 | value.clone() 184 | } 185 | } 186 | 187 | /// Opposite of [`FromStr`] and [`FromBorrowedStr`]. Requires the `serde_json` feature. 188 | /// 189 | /// Formatting rules are something like: 190 | /// - #entity-id 191 | /// - :some/attribute 192 | /// - "text is a json string" 193 | /// - some-hyphenated-uuid 194 | #[cfg(feature = "serde_json")] 195 | impl<'v> fmt::Display for ValueRef<'v> { 196 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 197 | use serde_json::to_string as json; 198 | 199 | match self { 200 | ValueRef::Entity(v) => v.fmt(f), 201 | ValueRef::Attribute(v) => v.fmt(f), 202 | ValueRef::Text(v) => json(v).map_err(|_| fmt::Error)?.fmt(f), 203 | ValueRef::Integer(v) => v.fmt(f), 204 | ValueRef::Float(v) => v.fmt(f), 205 | ValueRef::Boolean(v) => v.fmt(f), 206 | ValueRef::Uuid(v) => v.fmt(f), 207 | ValueRef::Blob(v) => json(v).map_err(|_| fmt::Error)?.fmt(f), 208 | } 209 | } 210 | } 211 | 212 | /// See [`ValueRef`]'s [`fmt::Display`] implementation. 213 | #[cfg(feature = "serde_json")] 214 | impl fmt::Display for Value { 215 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 216 | ValueRef::from(self).fmt(f) 217 | } 218 | } 219 | 220 | /// Opposite of [`fmt::Display`]. Requires the `serde_json` feature. 221 | #[cfg(feature = "serde_json")] 222 | pub fn parse_value(s: &str) -> Result { 223 | use serde_json::from_str as json; 224 | 225 | match s.chars().next().ok_or(())? { 226 | '#' => s.parse::().map_err(drop).map(Value::Entity), 227 | ':' => s.parse::().map_err(drop).map(Value::Attribute), 228 | '"' => json::(s).map_err(drop).map(Value::Text), 229 | '[' => json::>(s).map_err(drop).map(Value::Blob), 230 | _ => Result::::Err(()) 231 | .or_else(|_| s.parse::().map(From::from)) 232 | .or_else(|_| s.parse::().map(From::from)) 233 | .or_else(|_| s.parse::().map(From::from)) 234 | .or_else(|_| s.parse::().map(From::from)) 235 | .map_err(drop), 236 | } 237 | } 238 | 239 | /// Opposite of [`fmt::Display`]. Requires the `serde_json` feature. 240 | #[cfg(feature = "serde_json")] 241 | pub fn parse_value_ref(s: &str) -> Result, ()> { 242 | use serde_json::from_str as json; 243 | 244 | match s.chars().next().ok_or(())? { 245 | '#' => s.parse::().map_err(drop).map(ValueRef::Entity), 246 | ':' => AttributeRef::from_str(s) 247 | .map_err(drop) 248 | .map(ValueRef::Attribute), 249 | '"' => json::<&str>(s).map_err(drop).map(ValueRef::Text), 250 | _ => Result::::Err(()) 251 | .or_else(|_| s.parse::().map(From::from)) 252 | .or_else(|_| s.parse::().map(From::from)) 253 | .or_else(|_| s.parse::().map(From::from)) 254 | .or_else(|_| s.parse::().map(From::from)) 255 | .map_err(drop), 256 | } 257 | } 258 | 259 | /// Opposite of [`fmt::Display`]. Requires the `serde_json` feature. 260 | #[cfg(feature = "serde_json")] 261 | impl FromStr for Value { 262 | type Err = (); 263 | 264 | fn from_str(s: &str) -> Result { 265 | parse_value(s) 266 | } 267 | } 268 | 269 | /// Opposite of [`fmt::Display`]. Requires the `serde_json` feature. 270 | #[cfg(feature = "serde_json")] 271 | impl<'a> FromBorrowedStr<'a> for ValueRef<'a> { 272 | type Err = (); 273 | 274 | fn from_borrowed_str(s: &'a str) -> Result { 275 | parse_value_ref(s) 276 | } 277 | } 278 | 279 | /// A uuid referring to an entity. 280 | #[derive(Debug, Clone, PartialEq, Copy)] 281 | #[repr(transparent)] 282 | pub struct Entity(Uuid); 283 | 284 | impl fmt::Display for Entity { 285 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 286 | let buf = &mut [0; 37]; 287 | buf[0] = 0x23; // a # character 288 | self.0.to_hyphenated_ref().encode_lower(&mut buf[1..]); 289 | let s = std::str::from_utf8(buf).unwrap(); 290 | write!(f, "{}", s) 291 | } 292 | } 293 | 294 | impl Deref for Entity { 295 | type Target = Uuid; 296 | 297 | fn deref(&self) -> &Self::Target { 298 | &self.0 299 | } 300 | } 301 | 302 | impl TryFrom<&'_ str> for Entity { 303 | type Error = EntityParseError; 304 | 305 | fn try_from(s: &str) -> Result { 306 | match s.chars().next() { 307 | Some('#') => (), 308 | Some(_) | None => return Err(EntityParseError::Leader), 309 | } 310 | 311 | let (_, uuid) = s.split_at(1); 312 | uuid.parse().map(Entity).map_err(EntityParseError::Uuid) 313 | } 314 | } 315 | 316 | impl FromStr for Entity { 317 | type Err = EntityParseError; 318 | 319 | fn from_str(s: &str) -> Result { 320 | Entity::try_from(s) 321 | } 322 | } 323 | 324 | impl From for Entity { 325 | fn from(u: Uuid) -> Self { 326 | Entity(u) 327 | } 328 | } 329 | 330 | #[derive(Debug, Error, Clone, PartialEq)] 331 | pub enum EntityParseError { 332 | #[error("expected leading `#`")] 333 | Leader, 334 | #[error("invalid uuid")] 335 | Uuid(#[from] uuid::Error), 336 | } 337 | 338 | /// An attribute name or identifier, like `:db/id` or `:pet/name`. 339 | #[cfg_attr(feature = "serde", derive(serde::Serialize))] 340 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] 341 | #[repr(transparent)] 342 | pub struct Attribute(String); 343 | 344 | impl Attribute { 345 | /// Copies the given identifier and prepends a `:` to create an Attribute. 346 | pub fn from_ident(ident: &str) -> Self { 347 | Self::from_string_unchecked(format!(":{}", ident)) 348 | } 349 | 350 | pub fn from_string_unchecked(s: String) -> Self { 351 | Attribute(s) 352 | } 353 | 354 | /// Panics if the attribute is invalid. 355 | pub fn from_static(s: &'static str) -> Self { 356 | AttributeRef::from_static(s).to_owned() 357 | } 358 | } 359 | 360 | impl Deref for Attribute { 361 | type Target = AttributeRef; 362 | 363 | fn deref(&self) -> &Self::Target { 364 | AttributeRef::new(&self.0) 365 | } 366 | } 367 | 368 | impl TryFrom for Attribute { 369 | type Error = AttributeParseError; 370 | 371 | fn try_from(s: String) -> Result { 372 | s.parse() 373 | } 374 | } 375 | 376 | impl<'a> TryFrom<&'a str> for Attribute { 377 | type Error = AttributeParseError; 378 | 379 | fn try_from(s: &'a str) -> Result { 380 | s.parse() 381 | } 382 | } 383 | 384 | impl fmt::Display for Attribute { 385 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 386 | self.as_ref().fmt(f) 387 | } 388 | } 389 | 390 | impl AsRef for Attribute { 391 | fn as_ref(&self) -> &AttributeRef { 392 | self 393 | } 394 | } 395 | 396 | /// A borrowing version of [`Attribute`], like `Path` is to `PathBuf`. 397 | #[cfg_attr(feature = "serde", derive(serde::Serialize))] 398 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 399 | #[repr(transparent)] 400 | pub struct AttributeRef(str); 401 | 402 | impl AttributeRef { 403 | /// uses unsafe copied from std lib's path.rs ¯\\_(ツ)_/¯ 404 | /// ```ignore 405 | /// pub fn new + ?Sized>(s: &S) -> &Path { 406 | /// unsafe { &*(s.as_ref() as *const OsStr as *const Path) } 407 | /// } 408 | /// ``` 409 | fn new + ?Sized>(s: &S) -> &Self { 410 | unsafe { &*(s.as_ref() as *const str as *const AttributeRef) } 411 | } 412 | 413 | /// TODO needs a better name so this doesn't collide with `FromStr`. 414 | pub fn from_str + ?Sized>(s: &S) -> Result<&Self, AttributeParseError> { 415 | parse_attribute(s.as_ref()) 416 | } 417 | 418 | /// Without the leading `:`. 419 | pub fn just_the_identifier(&self) -> &str { 420 | &self.0[1..] 421 | } 422 | 423 | /// The identifier with the leading `:`. 424 | pub fn as_str(&self) -> &str { 425 | &self.0 426 | } 427 | 428 | /// Everything after the last `/` in the identifier, 429 | /// or just the identifier if no `/` is in the attribute. 430 | pub fn tail(&self) -> &str { 431 | self.just_the_identifier() 432 | .rsplit('/') 433 | .next() 434 | .unwrap_or(self.just_the_identifier()) 435 | } 436 | } 437 | 438 | impl Borrow for Attribute { 439 | fn borrow(&self) -> &AttributeRef { 440 | AttributeRef::new(&self.0) 441 | } 442 | } 443 | 444 | impl ToOwned for AttributeRef { 445 | type Owned = Attribute; 446 | 447 | fn to_owned(&self) -> Self::Owned { 448 | Attribute(self.0.to_owned()) 449 | } 450 | } 451 | 452 | impl AsRef for &AttributeRef { 453 | fn as_ref(&self) -> &AttributeRef { 454 | self 455 | } 456 | } 457 | 458 | impl fmt::Display for AttributeRef { 459 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 460 | self.0.fmt(f) 461 | } 462 | } 463 | 464 | /* FromStr doesn't let us borrow from the input string so we can't use it for AttributeRef */ 465 | impl<'a> TryFrom<&'a str> for &'a AttributeRef { 466 | type Error = AttributeParseError; 467 | 468 | fn try_from(s: &'a str) -> Result { 469 | parse_attribute(s) 470 | } 471 | } 472 | 473 | impl FromStr for Attribute { 474 | type Err = AttributeParseError; 475 | 476 | fn from_str(s: &str) -> Result { 477 | parse_attribute(s).map(ToOwned::to_owned) 478 | } 479 | } 480 | 481 | fn parse_attribute(s: &str) -> Result<&AttributeRef, AttributeParseError> { 482 | let rest = match s.chars().next() { 483 | Some(':') => &s[1..], 484 | Some(_) | None => return Err(AttributeParseError::Leader), 485 | }; 486 | 487 | if rest.contains(char::is_whitespace) { 488 | return Err(AttributeParseError::Whitespace); 489 | } 490 | 491 | match rest.len() { 492 | 1..=255 => Ok(AttributeRef::new(s)), 493 | _ => Err(AttributeParseError::Length), 494 | } 495 | } 496 | 497 | #[derive(Debug, Error, Clone, PartialEq)] 498 | pub enum AttributeParseError { 499 | #[error("expected leading `:`")] 500 | Leader, 501 | #[error("whitespace not allowed")] 502 | Whitespace, 503 | #[error("identifier is either too long or too short (1..=255)")] 504 | Length, 505 | } 506 | 507 | impl AttributeRef { 508 | /// Panics if the attribute is invalid 509 | pub fn from_static(s: &'static str) -> &'static Self { 510 | TryFrom::try_from(s).unwrap() 511 | } 512 | } 513 | 514 | #[cfg(feature = "serde")] 515 | pub mod _serde { 516 | use super::*; 517 | use serde::{Deserialize, Deserializer, Serialize, Serializer}; 518 | 519 | impl Serialize for Entity { 520 | fn serialize(&self, serializer: S) -> Result 521 | where 522 | S: Serializer, 523 | { 524 | serializer.collect_str(&self) 525 | } 526 | } 527 | 528 | impl<'de> Deserialize<'de> for Entity { 529 | fn deserialize(deserializer: D) -> Result 530 | where 531 | D: Deserializer<'de>, 532 | { 533 | let s: Cow = Deserialize::deserialize(deserializer)?; 534 | s.parse().map_err(serde::de::Error::custom) 535 | } 536 | } 537 | 538 | impl<'de> Deserialize<'de> for Attribute { 539 | fn deserialize(deserializer: D) -> Result 540 | where 541 | D: Deserializer<'de>, 542 | { 543 | let s: Cow = Deserialize::deserialize(deserializer)?; 544 | s.parse().map_err(serde::de::Error::custom) 545 | } 546 | } 547 | 548 | impl<'de> Deserialize<'de> for Value { 549 | fn deserialize(deserializer: D) -> Result 550 | where 551 | D: Deserializer<'de>, 552 | { 553 | #[derive(Deserialize)] 554 | #[serde(untagged)] 555 | enum Container<'a> { 556 | Integer(i64), 557 | Float(f64), 558 | Boolean(bool), 559 | Text(Cow<'a, str>), 560 | Bytes(Cow<'a, [u8]>), 561 | } 562 | 563 | let s: Container = Deserialize::deserialize(deserializer)?; 564 | Ok(match s { 565 | Container::Integer(i) => Value::from(i), 566 | Container::Float(f) => Value::from(f), 567 | Container::Boolean(b) => Value::from(b), 568 | Container::Text(t) => Option::::None 569 | .or_else(|| t.parse::().map(Value::from).ok()) 570 | .or_else(|| t.parse::().map(Value::from).ok()) 571 | .or_else(|| t.parse::().map(Value::from).ok()) 572 | .unwrap_or_else(|| Value::Text(t.into())), 573 | Container::Bytes(b) => Value::Blob(b.into()), 574 | }) 575 | } 576 | } 577 | } 578 | 579 | #[cfg(test)] 580 | mod tests { 581 | use super::*; 582 | 583 | #[test] 584 | fn test_attribute_validation() { 585 | assert!(Attribute::try_from("foo/meme".to_string()).is_err()); 586 | assert!(Attribute::try_from(":foo/meme extra".to_string()).is_err()); 587 | assert!(Attribute::try_from(":foo/meme".to_string()).is_ok()); 588 | 589 | assert!(AttributeRef::from_str("foo/meme").is_err()); 590 | assert!(AttributeRef::from_str(":foo/meme extra").is_err()); 591 | assert!(AttributeRef::from_str(":foo/meme").is_ok()); 592 | 593 | assert!(AttributeRef::from_str(":f").is_ok()); 594 | assert!(AttributeRef::from_str(":").is_err()); 595 | } 596 | 597 | #[test] 598 | fn test_entity_validation() { 599 | assert!(Entity::try_from("#not-a-uuid").is_err()); 600 | assert!(Entity::try_from("b3ddeb4c-a61f-4433-8acd-7e10117f142e").is_err()); 601 | assert!("#b3ddeb4c-a61f-4433-8acd-7e10117f142e" 602 | .parse::() 603 | .is_ok()); 604 | } 605 | 606 | #[cfg(feature = "serde_json")] 607 | #[test] 608 | fn test_parsing() { 609 | use crate::BorrowedParse; 610 | 611 | let data = [ 612 | (ValueRef::Integer(123), "123"), 613 | (ValueRef::Float(0.12), "0.12"), 614 | (ValueRef::Boolean(true), "true"), 615 | (ValueRef::Text("hello world"), "\"hello world\""), 616 | ( 617 | ValueRef::Uuid( 618 | "b3ddeb4c-a61f-4433-8acd-7e10117f142e" 619 | .parse::() 620 | .unwrap(), 621 | ), 622 | "b3ddeb4c-a61f-4433-8acd-7e10117f142e", 623 | ), 624 | ( 625 | ValueRef::Entity( 626 | "#b3ddeb4c-a61f-4433-8acd-7e10117f142e" 627 | .parse::() 628 | .unwrap(), 629 | ), 630 | "#b3ddeb4c-a61f-4433-8acd-7e10117f142e", 631 | ), 632 | ( 633 | ValueRef::Attribute(AttributeRef::from_str(":foo/bar").unwrap()), 634 | ":foo/bar", 635 | ), 636 | ]; 637 | 638 | for (value, formatted) in data.into_iter() { 639 | let displayed = value.to_string(); 640 | assert_eq!(displayed.as_str(), formatted, "{:?}", value); 641 | 642 | let parsed: ValueRef = displayed.borrowed_parse().unwrap(); 643 | assert_eq!(parsed, value); 644 | } 645 | 646 | { 647 | let blob = ValueRef::Blob("bytes".as_bytes()); 648 | let formatted = "[98,121,116,101,115]"; 649 | assert_eq!(blob.to_string(), formatted); 650 | assert!(formatted.borrowed_parse::().is_err()); 651 | assert_eq!(formatted.parse::().unwrap(), blob.into()); 652 | } 653 | 654 | { 655 | let blob = ValueRef::Text("needs \"escaping\""); 656 | let formatted = r#""needs \"escaping\"""#; 657 | assert_eq!(blob.to_string(), formatted); 658 | assert!(formatted.borrowed_parse::().is_err()); 659 | assert_eq!(formatted.parse::().unwrap(), blob.into()); 660 | } 661 | } 662 | 663 | #[cfg(feature = "serde")] 664 | #[cfg(feature = "serde_json")] 665 | #[test] 666 | fn test_json() { 667 | let data = [ 668 | (ValueRef::Text("some text"), "\"some text\""), 669 | (ValueRef::Integer(123), "123"), 670 | (ValueRef::Float(0.12), "0.12"), 671 | (ValueRef::Boolean(true), "true"), 672 | ( 673 | ValueRef::Uuid( 674 | "b3ddeb4c-a61f-4433-8acd-7e10117f142e" 675 | .parse::() 676 | .unwrap(), 677 | ), 678 | "\"b3ddeb4c-a61f-4433-8acd-7e10117f142e\"", 679 | ), 680 | ( 681 | ValueRef::Entity( 682 | "#b3ddeb4c-a61f-4433-8acd-7e10117f142e" 683 | .parse::() 684 | .unwrap(), 685 | ), 686 | "\"#b3ddeb4c-a61f-4433-8acd-7e10117f142e\"", 687 | ), 688 | ( 689 | ValueRef::Attribute(AttributeRef::from_str(":foo/bar").unwrap()), 690 | "\":foo/bar\"", 691 | ), 692 | ]; 693 | 694 | for (value, json) in data.into_iter() { 695 | let ser = serde_json::to_string(&value).unwrap(); 696 | assert_eq!(&ser, json); 697 | 698 | let deser: Value = serde_json::from_str(&ser).unwrap(); 699 | assert_eq!(ValueRef::from(&deser), value); 700 | } 701 | } 702 | 703 | #[cfg(feature = "serde")] 704 | #[cfg(feature = "serde_json")] 705 | #[test] 706 | fn test_json_incoherence() { 707 | /* We serialize Text but get an Attribute back */ 708 | let attribute_like_text = ValueRef::Text(":looks/like-an-attribute"); 709 | let json = serde_json::to_string(&attribute_like_text).unwrap(); 710 | assert_eq!( 711 | serde_json::from_str::(&json).unwrap(), 712 | Value::from(Attribute::from_static(":looks/like-an-attribute")) 713 | ); 714 | 715 | /* We serialize Text but get an Entity */ 716 | let entity_like_text = ValueRef::Text("#b3ddeb4c-a61f-4433-8acd-7e10117f142e"); 717 | let json = serde_json::to_string(&entity_like_text).unwrap(); 718 | assert_eq!( 719 | serde_json::from_str::(&json).unwrap(), 720 | Value::Entity( 721 | "#b3ddeb4c-a61f-4433-8acd-7e10117f142e" 722 | .parse::() 723 | .unwrap() 724 | ), 725 | ); 726 | } 727 | } 728 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::match_ref_pats)] 2 | #![allow(clippy::option_map_unit_fn)] 3 | //! owoof is about interacting with a SQLite-backed database using entity-attribute-value triples 4 | //! and pattern matching. 5 | //! 6 | //! It is inspired by Datomic and I wrote [a wordy post on my 7 | //! blog](https://froghat.ca/blag/dont-woof/) that explains some of the motivation. Although, that 8 | //! post is about an earlier version of this library. 9 | //! 10 | //! ## tldr 11 | //! 12 | //! Consider a database of triplets that looks like ... 13 | //! ```ignore 14 | //! 100 :animal/name "Cat" 15 | //! 100 :pet/name "Garfield" 16 | //! 101 :animal/name "Dog" 17 | //! 101 :pet/name "Odie" 18 | //! 102 :person/name "John Arbuckle" 19 | //! 100 :pet/human 102 20 | //! 101 :pet/human 102 21 | //! ``` 22 | //! 23 | //! And consider this pattern ... 24 | //! ```ignore 25 | //! ?_ :pet/name ?_ 26 | //! ``` 27 | //! If we were to match triplets using this pattern, we'd be asking for triplets with: any 28 | //! entity, for the *:pet/name* attribute, and any value. And we would get the following 29 | //! two triplets. 30 | //! ```ignore 31 | //! 100 :pet/name "Garfield" 32 | //! 101 :pet/name "Odie" 33 | //! ``` 34 | //! Those are the triplets that exist that match that pattern. 35 | //! 36 | //! Now consider this **pair** of patterns ... 37 | //! ```ignore 38 | //! ?a :pet/name ?_ 39 | //! ?a :animal/name "Cat" 40 | //! ``` 41 | //! The **first** pattern matches the two triplets from earlier. 42 | //! But the **second** matches just: 43 | //! ```ignore 44 | //! 100 :animal/name "Cat" 45 | //! ``` 46 | //! But, we are not interested in *every* combination of triplets in the sets that matched our 47 | //! patterns. We related the patterns by constraining the *entity* to the same variable, *?a*. 48 | //! (This is often called unification.) This means that we only match the combinations of triplets 49 | //! between sets when they share the same entity. So we end up with *one* result with *two* 50 | //! triplets. 51 | //! ```ignore 52 | //! 100 :pet/name "Garfield" 53 | //! 100 :animal/name "Cat" 54 | //! ``` 55 | //! 56 | //! It's like if we were to say: 57 | //! > Match every triplet where some entity named *?a* has the attribute *:pet/name* 58 | //! > with any value. And match triplets on the same entity *?a* having the attribute 59 | //! > *:animal/name* with the value "Cat". 60 | //! 61 | //! Another one might be: 62 | //! ```ignore 63 | //! ?p :person/name ?person 64 | //! ?a :pet/human ?p 65 | //! ?a :pet/name ?pet 66 | //! ``` 67 | //! Here, the variables *?human* and *?pet* don't relate triplets together, but they're 68 | //! there so we can refer to them when we want to get information out. 69 | //! 70 | //! These patterns are saying: 71 | //! > Given each entity *?a* having the *:pet/name* *?pet* 72 | //! > and each entity *?p* having the *:person/name* *?person*, 73 | //! > match only combinations of these where there exists some triplet *?a* *:pet/human* 74 | //! > *?p*. That is, where *?a*'s human is *?p*. 75 | //! 76 | //! The point of owoof is to allow us to build a database of triplets and to use pattern matching 77 | //! to ask it questions and get values out -- like the values of *?person* and *?pet* in the above 78 | //! query: 79 | //! ```ignore 80 | //! "John Arbuckle" "Garfield" 81 | //! "John Arbuckle" "Odie" 82 | //! ``` 83 | //! 84 | //! Here's a kind of WIP example of the rust API corresponding to the patterns above: 85 | //! 86 | //! ``` 87 | //! use owoof::{Network, Value, ValueRef, AttributeRef, disperse::just, traits::*}; 88 | //! 89 | //! let mut network = Network::::default(); 90 | //! 91 | //! let (p, _, person) = network 92 | //! .fluent_triples() 93 | //! .match_attribute(AttributeRef::from_static(":person/name")) 94 | //! .eav(); 95 | //! 96 | //! let (a, _, _) = network 97 | //! .fluent_triples() 98 | //! .match_attribute(AttributeRef::from_static(":pet/human")) 99 | //! .link_value(p) 100 | //! .eav(); 101 | //! 102 | //! let (a, _, pet) = network 103 | //! .fluent_triples() 104 | //! .match_attribute(AttributeRef::from_static(":pet/name")) 105 | //! .eav(); 106 | //! 107 | //! # let mut db = owoof::new_in_memory().unwrap(); 108 | //! # let woof = owoof::DontWoof::new(&mut db).unwrap(); 109 | //! let _: Vec<(Value, Value)> = network.select() 110 | //! .field(person) 111 | //! .field(pet) 112 | //! .to_query() 113 | //! .disperse((just(), just()), &woof) 114 | //! .unwrap(); 115 | //! # // assert_eq!( 116 | //! # // res, 117 | //! # // vec![ 118 | //! # // (Value::Text("John Arbuckle".to_owned()), Value::Text("Garfield".to_owned())), 119 | //! # // (Value::Text("John Arbuckle".to_owned()), Value::Text("Oide".to_owned())), 120 | //! # // ], 121 | //! # // ); 122 | //! ``` 123 | //! 124 | //! Check out the [`network`] module for some stuff about querying. 125 | //! 126 | //! The [`DontWoof`] type is useful for mutating data. 127 | //! 128 | //! ## Crate Features 129 | //! 130 | //! - explain -- Adds `DontWoof::explain()` to do EXPLAIN QUERY PLAN. *enabled by default* 131 | //! - cli -- Required for `bin/owoof`. Enables serde & serde_json. 132 | //! - serde & serde_json -- Required for `parse_value()` & `parse_pattern()` and for serializing [`Value`] 133 | //! and [`ValueRef`] 134 | 135 | use thiserror::Error; 136 | 137 | pub mod disperse; 138 | pub mod driver; 139 | #[cfg(feature = "explain")] 140 | pub mod explain; 141 | pub mod network; 142 | pub mod retrieve; 143 | pub mod soup; 144 | pub mod sql; 145 | pub mod types; 146 | 147 | use rusqlite::hooks::Action; 148 | use rusqlite::{OptionalExtension, ToSql}; 149 | 150 | use std::cell::RefCell; 151 | use std::str::FromStr; 152 | use std::sync::atomic::{self, AtomicBool}; 153 | use std::sync::{Arc, Mutex}; 154 | 155 | use crate::driver::{TypeTag, ENTITY_ID_TAG}; 156 | 157 | pub use crate::either::Either; 158 | pub use crate::network::{GenericNetwork, Network, Ordering, OwnedNetwork}; 159 | pub use crate::retrieve::{NamedNetwork, Pattern, Variable}; 160 | pub use crate::soup::Encoded; 161 | pub use crate::types::{Attribute, AttributeRef, Entity, Value, ValueRef}; 162 | 163 | /// This is just supposed to be some helpful traits re-exported but there's only the one thing in 164 | /// it so there's not much point... 165 | pub mod traits { 166 | pub use super::{BorrowedParse, FromBorrowedStr}; 167 | pub use crate::sql::PushToQuery; 168 | } 169 | 170 | pub(crate) const SCHEMA: &str = include_str!("../schema.sql"); 171 | 172 | /// Simply executes all the statements required to build the schema against the given connection. 173 | /// Run this under a transaction that you manage or use [`create_schema_in_transaction`]. 174 | pub fn create_schema(db: &rusqlite::Connection) -> rusqlite::Result<()> { 175 | db.execute_batch(SCHEMA) 176 | } 177 | 178 | pub fn create_schema_in_transaction(db: &mut rusqlite::Connection) -> rusqlite::Result<()> { 179 | let tx = db.transaction()?; 180 | create_schema(&tx)?; 181 | tx.commit() 182 | } 183 | 184 | pub fn new_in_memory() -> rusqlite::Result { 185 | let mut db = rusqlite::Connection::open_in_memory()?; 186 | create_schema_in_transaction(&mut db)?; 187 | Ok(db) 188 | } 189 | 190 | /// TODO we only have one variant so what's the point? 191 | #[derive(Debug, Error)] 192 | pub enum Error { 193 | #[error("sql error")] 194 | Sql(#[from] rusqlite::Error), 195 | } 196 | 197 | pub type Result = std::result::Result; 198 | 199 | type Change = i64; 200 | 201 | /// This has a bunch of logic for changing data, use [`DontWoof::new`] to make an instance. 202 | /// 203 | /// `DontWoof::from(rusqlite::Transaction)` is bad, don't use it. 204 | #[derive(Debug)] 205 | pub struct DontWoof<'tx> { 206 | tx: HookedTransaction<'tx>, 207 | changes: Arc>>, 208 | changes_swap: RefCell>, 209 | changes_failed: Arc, 210 | } 211 | 212 | impl<'tx> DontWoof<'tx> { 213 | /// `DontWoof::from(rusqlite::Transaction)` is bad, don't use it. 214 | pub fn new(db: &'tx mut rusqlite::Connection) -> Result { 215 | db.execute("pragma foreign_keys=on", [])?; 216 | let tx = db.transaction()?; 217 | Ok(Self::from(tx)) 218 | } 219 | 220 | /// Look up an attribute by its identifier. 221 | /// 222 | /// In other words, find ?e given ?a where ?e :db/attribute ?a. 223 | pub fn attribute>(&self, a: Encoded) -> Result> { 224 | let sql = r#"SELECT rowid FROM "attributes" WHERE ident = ?"#; 225 | self.tx 226 | .query_row(sql, &[&a.rowid], |row| row.get::<_, i64>(0)) 227 | .map(Encoded::from_rowid) 228 | .map_err(Error::from) 229 | } 230 | 231 | // /// Panics if the given identifier is not valid. 232 | // pub fn attribute_for(&self, identifier: &'static str) -> Result> { 233 | // let a = self.encode(AttributeRef::from_static(identifier))?; 234 | // let e = self.attribute(a)?; 235 | // Ok(e) 236 | // } 237 | 238 | pub fn new_entity(&self) -> Result> { 239 | let insert = r#"INSERT INTO "soup" (t, v) VALUES (?, randomblob(16))"#; 240 | let mut insert = self.tx.prepare_cached(insert)?; 241 | let n = insert.execute(rusqlite::params![ENTITY_ID_TAG])?; 242 | assert_eq!(n, 1); 243 | let rowid = self.tx.last_insert_rowid(); 244 | Ok(Encoded::from_rowid(rowid)) 245 | } 246 | 247 | pub fn decode(&self, e: Encoded) -> Result { 248 | use driver::FromSqlRow; 249 | let select = r#"SELECT t, v 250 | FROM "soup" 251 | WHERE rowid = ?"#; 252 | let mut select = self.tx.prepare_cached(select)?; 253 | let t = select.query_row(rusqlite::params![e.rowid], |row| { 254 | driver::just::().from_start_of_row(row) 255 | })?; 256 | Ok(t) 257 | } 258 | 259 | pub fn fluent_entity(&self) -> Result { 260 | self.new_entity().map(|e| self.fluent(e)) 261 | } 262 | 263 | pub fn fluent(&self, e: Encoded) -> FluentEntity { 264 | FluentEntity { woof: self, e } 265 | } 266 | 267 | pub fn encode(&self, val: V) -> Result::Factory>> { 268 | let rowid: i64 = self._encode(val.type_tag(), &val as &dyn ToSql)?; 269 | Ok(Encoded::from_rowid(rowid)) 270 | } 271 | 272 | fn _encode(&self, tag: i64, val: &dyn ToSql) -> Result { 273 | let params = rusqlite::params![tag, val]; 274 | 275 | /* some brief testing suggests this is faster than INSERT ON CONFLICT RETURNING */ 276 | 277 | let select = r#"SELECT rowid 278 | FROM "soup" 279 | WHERE t = ? 280 | AND v = ?"#; 281 | let mut select = self.tx.prepare_cached(select)?; 282 | let rowid = match select 283 | .query_row(params, |row| row.get::<_, i64>(0)) 284 | .optional()? 285 | { 286 | Some(rowid) => rowid, 287 | None => { 288 | let insert = r#"INSERT INTO "soup" (t, v) VALUES (?, ?)"#; 289 | let mut insert = self.tx.prepare_cached(insert)?; 290 | let n = insert.execute(params)?; 291 | assert_eq!(n, 1); 292 | self.tx.last_insert_rowid() 293 | } 294 | }; 295 | 296 | Ok(rowid) 297 | } 298 | 299 | /// Insert a single triplet. 300 | pub fn assert( 301 | &self, 302 | e: Encoded, 303 | a: Encoded, 304 | v: Encoded, 305 | ) -> Result<&Self> { 306 | /* triples is WITHOUT ROWID so don't try to read the last rowid after an insert */ 307 | let mut stmt = self 308 | .tx 309 | .prepare_cached(r#"INSERT INTO "triples" (e,a,v) VALUES (?, ?, ?)"#)?; 310 | let n = stmt.execute(&[&e.rowid, &a.rowid, &v.rowid])?; 311 | assert_eq!(n, 1); 312 | 313 | /* This kind of sucks because it's a super rare event but requires accessing a RefCell 314 | * and unlocking a Mutex. Using an AtomicBool to flag buffer emptiness allow an early exit 315 | * doesn't improve performance much (~8ms down to ~6ms) and overall this check is ~less 316 | * than %1 of an import. So it's not worth worrying about this too much. */ 317 | self._update_attribute_indexes()?; 318 | 319 | Ok(self) 320 | } 321 | 322 | fn _update_attribute_indexes(&self) -> rusqlite::Result<()> { 323 | /* Since Connection is Send, this can fail to lock. And we don't have a way to recover and 324 | * try again if that happens. For now, there should be a big warning about this in the 325 | * API. TODO XXX FIXME */ 326 | if let Ok(mut swap) = self.changes_swap.try_borrow_mut() { 327 | debug_assert!(swap.is_empty()); 328 | if let Ok(ref mut mutex) = self.changes.try_lock() { 329 | if mutex.is_empty() { 330 | return Ok(()); 331 | } 332 | std::mem::swap::>(mutex.as_mut(), swap.as_mut()); 333 | } else { 334 | debug_assert!(false, "failed to lock changes"); 335 | self.changes_failed.store(true, atomic::Ordering::SeqCst); 336 | return Ok(()); 337 | } 338 | 339 | swap.sort_unstable(); 340 | swap.dedup(); 341 | 342 | let result = self._execute_attribute_index_changes(swap.as_slice()); 343 | 344 | swap.clear(); 345 | 346 | /* If this is Err(_), don't set self.changes_failed, that refers to synchronization 347 | * issues. This is a rusqlite query failure or whatever. */ 348 | result.map(drop) 349 | } else { 350 | debug_assert!(false, "failed to borrow changes_swap"); 351 | self.changes_failed.store(true, atomic::Ordering::SeqCst); 352 | Ok(()) 353 | } 354 | } 355 | 356 | fn _execute_attribute_index_changes(&self, swap: &[Change]) -> rusqlite::Result<()> { 357 | /* The changes list just a rowid. We don't assume from the action whether an attribute has 358 | * actually been created or removed because we may get change notifications for failed 359 | * commands that are rolled back -- such as removing an attribute that is in use. 360 | * 361 | * So we're paranoid and any change notification just means to recheck the state. */ 362 | swap.iter().try_for_each(|rowid| { 363 | let mut stmt = self 364 | .tx 365 | .prepare_cached(r#"SELECT count(*) FROM "attributes" WHERE rowid = ?"#)?; 366 | let c: i64 = stmt.query_row(&[rowid], |row| row.get(0))?; 367 | let sql = if 0 < c { 368 | format!( 369 | r#"CREATE INDEX 370 | IF NOT EXISTS "triples-ave-{rowid}" 371 | ON "triples" (v, e) 372 | WHERE a = {rowid}"#, 373 | rowid = rowid 374 | ) 375 | } else { 376 | format!( 377 | r#"DROP INDEX IF EXISTS "triples-ave-{rowid}""#, 378 | rowid = rowid 379 | ) 380 | }; 381 | self.tx.execute(&sql, []).map(drop) 382 | }) 383 | } 384 | 385 | /// Delete a single triplet. 386 | pub fn retract( 387 | &self, 388 | e: Encoded, 389 | a: Encoded, 390 | v: Encoded, 391 | ) -> Result<&Self> { 392 | let mut stmt = self.tx.prepare_cached( 393 | r#"DELETE FROM "triples" 394 | WHERE e = ? 395 | AND a = ? 396 | AND v = ?"#, 397 | )?; 398 | let n = stmt.execute(&[&e.rowid, &a.rowid, &v.rowid])?; 399 | 400 | if 0 < n { 401 | self._update_attribute_indexes()?; 402 | } 403 | 404 | Ok(self) 405 | } 406 | 407 | /// Run `PRAGMA optimize;`. May update indexes and promote better queries. 408 | /// 409 | /// The SQLite documentation recommends calling this before closing a connection. (TODO where?) 410 | /// 411 | /// See 412 | pub fn optimize(&self) -> rusqlite::Result<()> { 413 | self.tx 414 | .execute("SELECT * FROM pragma_optimize()", []) 415 | .map(drop) 416 | } 417 | 418 | pub fn prefetch_attributes(&self, network: &mut Network) -> Result<()> 419 | where 420 | V: TypeTag + ToSql, 421 | { 422 | use crate::network::{Constraint, Field, Match}; 423 | 424 | network 425 | .constraints_mut() 426 | .iter_mut() 427 | .try_for_each(|constraint| match constraint { 428 | &mut Constraint::Eq { lh, rh: Match::Value(ref v) } 429 | if lh.field() == Field::Attribute => 430 | { 431 | let mut stmt = self.tx.prepare_cached( 432 | r#" 433 | SELECT a.rowid 434 | FROM attributes a 435 | JOIN soup s ON a.ident = s.rowid 436 | WHERE s.t = ? AND s.v = ? 437 | LIMIT 1 438 | "#, 439 | )?; 440 | let type_tag = v.type_tag(); 441 | let rh = stmt 442 | .query_row(rusqlite::params![type_tag, v], |row| row.get(0)) 443 | .map(Encoded::from_rowid) 444 | .map(Match::Encoded) 445 | .optional()?; 446 | // If a lookup failed, the query probably won't succeed, but whatever ... 447 | if let Some(rh) = rh { 448 | *constraint = Constraint::Eq { lh, rh }; 449 | } 450 | Ok(()) 451 | } 452 | _ => Result::<(), Error>::Ok(()), 453 | })?; 454 | 455 | Ok(()) 456 | } 457 | 458 | pub fn into_tx(self) -> rusqlite::Transaction<'tx> { 459 | self.tx.unwrap() 460 | } 461 | } 462 | 463 | #[derive(Debug)] 464 | struct HookedTransaction<'tx>(Option>); 465 | 466 | impl<'tx> std::ops::Deref for HookedTransaction<'tx> { 467 | type Target = rusqlite::Transaction<'tx>; 468 | 469 | fn deref(&self) -> &Self::Target { 470 | self.0.as_ref().unwrap() 471 | } 472 | } 473 | 474 | impl<'tx> HookedTransaction<'tx> { 475 | fn new(tx: rusqlite::Transaction<'tx>, hook: F) -> Self 476 | where 477 | F: FnMut(Action, &str, &str, i64) + Send + 'static, 478 | { 479 | tx.update_hook(Some(hook)); 480 | HookedTransaction(Some(tx)) 481 | } 482 | 483 | fn unwrap(mut self) -> rusqlite::Transaction<'tx> { 484 | let tx = self.0.take().unwrap(); 485 | HookedTransaction::_unhook(&tx); 486 | tx 487 | } 488 | 489 | fn _unhook(db: &rusqlite::Connection) { 490 | let no_hook = None::; 491 | db.update_hook(no_hook); 492 | } 493 | } 494 | 495 | impl<'tx> Drop for HookedTransaction<'tx> { 496 | fn drop(&mut self) { 497 | if let Some(tx) = self.0.take() { 498 | HookedTransaction::_unhook(&tx); 499 | } 500 | } 501 | } 502 | 503 | impl<'tx> From> for DontWoof<'tx> { 504 | fn from(tx: rusqlite::Transaction<'tx>) -> Self { 505 | let foreign_keys: i64 = tx 506 | .query_row("pragma foreign_keys", [], |row| row.get(0)) 507 | .unwrap(); 508 | assert!(1 == foreign_keys); 509 | 510 | /* irc this must be Send because this hook is placed on the Connnection which can be shared 511 | * by multiple threads. So Arc and other Send-able primitives are required instead of 512 | * their !Send counterparts. */ 513 | let changes = Arc::new(Mutex::new(Vec::::default())); 514 | let changes_failed = Arc::new(AtomicBool::new(false)); 515 | 516 | let hook = { 517 | let changes = Arc::clone(&changes); 518 | let changes_failed = Arc::clone(&changes_failed); 519 | move |_action: Action, _database: &str, table: &str, rowid: i64| { 520 | if table == "attributes" { 521 | if let Ok(ref mut mutex) = changes.try_lock() { 522 | mutex.push(rowid); 523 | } else { 524 | changes_failed.store(true, atomic::Ordering::SeqCst); 525 | } 526 | } 527 | } 528 | }; 529 | 530 | DontWoof { 531 | tx: HookedTransaction::new(tx, hook), 532 | changes, 533 | changes_swap: RefCell::new(Vec::::default()), 534 | changes_failed, 535 | } 536 | } 537 | } 538 | 539 | impl<'tx> std::ops::Deref for DontWoof<'tx> { 540 | type Target = rusqlite::Transaction<'tx>; 541 | 542 | fn deref(&self) -> &Self::Target { 543 | &self.tx 544 | } 545 | } 546 | 547 | /// ??? 548 | pub struct FluentEntity<'w, 'tx> { 549 | woof: &'w DontWoof<'tx>, 550 | e: Encoded, 551 | } 552 | 553 | impl FluentEntity<'_, '_> { 554 | pub fn assert(&self, a: Encoded, v: Encoded) -> Result<&Self> { 555 | self.woof.assert(self.e, a, v)?; 556 | Ok(self) 557 | } 558 | 559 | pub fn retract(&self, a: Encoded, v: Encoded) -> Result<&Self> { 560 | self.woof.retract(self.e, a, v)?; 561 | Ok(self) 562 | } 563 | } 564 | 565 | impl From<&FluentEntity<'_, '_>> for Encoded { 566 | fn from(fl: &FluentEntity) -> Self { 567 | fl.e 568 | } 569 | } 570 | 571 | pub mod either { 572 | pub use Either::{Left, Left as left, Right, Right as right}; 573 | 574 | #[cfg_attr( 575 | feature = "serde", 576 | derive(serde::Serialize, serde::Deserialize), 577 | serde(untagged) 578 | )] 579 | #[derive(Debug, PartialEq)] 580 | pub enum Either { 581 | Left(L), 582 | Right(R), 583 | } 584 | 585 | impl Either { 586 | pub fn map_left LL>(self, f: F) -> Either { 587 | match self { 588 | Either::Left(l) => Either::Left(f(l)), 589 | Either::Right(r) => Either::Right(r), 590 | } 591 | } 592 | 593 | pub fn map_right RR>(self, f: F) -> Either { 594 | match self { 595 | Either::Left(l) => Either::Left(l), 596 | Either::Right(r) => Either::Right(f(r)), 597 | } 598 | } 599 | } 600 | 601 | use super::FromBorrowedStr; 602 | 603 | impl<'a, L, R> FromBorrowedStr<'a> for Either 604 | where 605 | L: FromBorrowedStr<'a>, 606 | R: FromBorrowedStr<'a>, 607 | { 608 | type Err = ( 609 | >::Err, 610 | >::Err, 611 | ); 612 | 613 | fn from_borrowed_str(s: &'a str) -> Result { 614 | L::from_borrowed_str(s).map(Either::Left).or_else(|a_err| { 615 | R::from_borrowed_str(s) 616 | .map(Either::Right) 617 | .map_err(|b_err| (a_err, b_err)) 618 | }) 619 | } 620 | } 621 | } 622 | 623 | pub trait FromBorrowedStr<'a>: Sized { 624 | type Err; 625 | fn from_borrowed_str(s: &'a str) -> Result; 626 | } 627 | 628 | /// Anything that implements [`FromStr`] implements FromBorrowedStr 629 | impl<'a, T> FromBorrowedStr<'a> for T 630 | where 631 | T: FromStr, 632 | { 633 | type Err = ::Err; 634 | 635 | fn from_borrowed_str(s: &'a str) -> Result { 636 | s.parse() 637 | } 638 | } 639 | 640 | pub trait BorrowedParse<'a> { 641 | fn borrowed_parse(&'a self) -> Result>::Err> 642 | where 643 | F: FromBorrowedStr<'a>; 644 | } 645 | 646 | impl<'a> BorrowedParse<'a> for str { 647 | fn borrowed_parse(&'a self) -> Result>::Err> 648 | where 649 | F: FromBorrowedStr<'a>, 650 | { 651 | F::from_borrowed_str(self) 652 | } 653 | } 654 | 655 | // impl<'a> BorrowedParse<'a> for String { 656 | // fn borrowed_parse(&'a self) -> Result>::Err> 657 | // where 658 | // F: FromBorrowedStr<'a>, 659 | // { 660 | // F::from_borrowed_str(self.as_str()) 661 | // } 662 | // } 663 | 664 | /// A derpy meme that copies [`rusqlite::OptionalExtension`]. 665 | pub trait Optional { 666 | fn optional(self) -> Result>; 667 | } 668 | 669 | impl Optional for Result { 670 | fn optional(self) -> Result> { 671 | self.map_err(|Error::Sql(err)| err) 672 | .optional() 673 | .map_err(Error::from) 674 | } 675 | } 676 | 677 | #[cfg(test)] 678 | mod tests { 679 | use super::*; 680 | 681 | pub(crate) fn rusqlite_in_memory() -> Result { 682 | let mut db = rusqlite::Connection::open_in_memory()?; 683 | { 684 | let tx = db.transaction()?; 685 | tx.execute_batch(SCHEMA)?; 686 | tx.commit()?; 687 | } 688 | Ok(db) 689 | } 690 | 691 | #[test] 692 | fn test_decode_new_entity() -> anyhow::Result<()> { 693 | let mut db = rusqlite_in_memory()?; 694 | let woof = DontWoof::new(&mut db)?; 695 | 696 | let e = woof.new_entity()?; 697 | let _ = woof.decode(e)?; 698 | Ok(()) 699 | } 700 | 701 | #[test] 702 | fn test_decode() -> anyhow::Result<()> { 703 | let mut db = rusqlite_in_memory()?; 704 | let woof = DontWoof::new(&mut db)?; 705 | 706 | let v = woof.encode(ValueRef::from("hello world"))?; 707 | assert_eq!(Value::Text("hello world".to_owned()), woof.decode(v)?); 708 | Ok(()) 709 | } 710 | 711 | #[test] 712 | fn test_retract() -> anyhow::Result<()> { 713 | let mut db = rusqlite_in_memory()?; 714 | let woof = DontWoof::new(&mut db)?; 715 | 716 | let db_id = woof.attribute(woof.encode(AttributeRef::from_static(":db/id"))?)?; 717 | let db_attr = woof.attribute(woof.encode(AttributeRef::from_static(":db/attribute"))?)?; 718 | 719 | let pet_name = woof 720 | .fluent_entity()? 721 | .assert(db_attr, woof.encode(":pet/name".parse::()?)?)? 722 | .into(); 723 | 724 | let animal_name: Encoded = woof 725 | .fluent_entity()? 726 | .assert(db_attr, woof.encode(":animal/name".parse::()?)?)? 727 | .into(); 728 | 729 | let garfield: Encoded = woof 730 | .fluent_entity()? 731 | .assert(pet_name, woof.encode(ValueRef::from("Garfield"))?)? 732 | .assert(animal_name, woof.encode(ValueRef::from("Cat"))?)? 733 | .into(); 734 | 735 | /* fails since garfield has a pet name and an animal name */ 736 | assert!(woof.retract(garfield, db_id, garfield).is_err()); 737 | 738 | /* fails since garfield's :animal/name exists */ 739 | assert!(woof 740 | .fluent(animal_name) 741 | .retract(db_attr, woof.encode(":animal/name".parse::()?)?) 742 | .is_err()); 743 | 744 | use crate::traits::*; 745 | 746 | let mut garfield_query = Network::<_>::default(); 747 | 748 | garfield_query.fluent_triples().link_entity(garfield); 749 | 750 | let garfield_facts = garfield_query.select().to_query(); 751 | 752 | assert_eq!(3, garfield_facts.count(&woof).unwrap()); 753 | 754 | /* Retract garfield :pet/name. 755 | * But :db/id fails because `garfield :animal/name "Cat"` still exists. */ 756 | assert!(woof 757 | .fluent(garfield) 758 | .retract(pet_name, woof.encode(ValueRef::from("Garfield"))?)? 759 | .retract(db_id, garfield) 760 | .is_err()); 761 | 762 | assert_eq!(2, garfield_facts.count(&woof).unwrap()); 763 | 764 | woof.fluent(garfield) 765 | .retract(animal_name, woof.encode(ValueRef::from("Cat"))?)? 766 | .retract(db_id, garfield)?; 767 | 768 | assert_eq!(0, garfield_facts.count(&woof).unwrap()); 769 | 770 | /* Now we can remove :animal/name. 771 | * Technically, `animal_name :db/id animal_name` still exists but it's not an attribute 772 | * anymore because it has no identifier. */ 773 | woof.fluent(animal_name) 774 | .retract(db_attr, woof.encode(":animal/name".parse::()?)?)?; 775 | 776 | Ok(()) 777 | } 778 | } 779 | --------------------------------------------------------------------------------