├── Tests ├── LinuxMain.swift └── DiomedeTests │ ├── XCTestManifests.swift │ ├── DiomedeTypeSetTests.swift │ ├── DiomedeQuadStorePerformance.swift │ ├── DiomedeCharacteristicSetTests.swift │ └── DiomedeTests.swift ├── Dockerfile ├── LICENSE ├── Package.swift ├── Sources ├── diomede-db-util │ └── main.swift ├── Diomede │ ├── DataEncoding.swift │ └── Diomede.swift ├── DiomedeQuadStore │ ├── Util.swift │ ├── RDFExtensions.swift │ ├── TypeSets.swift │ └── CharacteristicSets.swift └── diomede-cli │ └── main.swift └── README.md /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | @testable import DiomedeTests 3 | 4 | XCTMain([ 5 | testCase(DiomedeQuadStoreTests.allTests), 6 | ]) -------------------------------------------------------------------------------- /Tests/DiomedeTests/XCTestManifests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | 3 | #if !canImport(ObjectiveC) 4 | public func allTests() -> [XCTestCaseEntry] { 5 | return [ 6 | testCase(DiomedeQuadStoreTests.allTests), 7 | ] 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM swift:5.2.4-focal 2 | 3 | RUN apt-get update && apt-get install -y \ 4 | build-essential \ 5 | libserd-dev \ 6 | && rm -rf /var/lib/apt/lists/* 7 | 8 | RUN mkdir /work 9 | WORKDIR /work 10 | 11 | COPY Package.swift . 12 | RUN swift package update 13 | COPY Tests Tests 14 | COPY Sources Sources 15 | RUN swift build --build-tests 16 | 17 | ENV KINEO_W3C_TEST_PATH /work/rdf-tests 18 | ENV KINEO_W3C_TEST_PATH_12 /work/rdf-tests-12 19 | 20 | CMD ["swift", "test", "--parallel"] 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2020 Gregory Todd Williams 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.7 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "Diomede", 8 | platforms: [ 9 | .macOS(.v13) 10 | ], 11 | products: [ 12 | // Products define the executables and libraries produced by a package, and make them visible to other packages. 13 | .library( 14 | name: "Diomede", 15 | targets: ["Diomede"]), 16 | .library( 17 | name: "DiomedeQuadStore", 18 | targets: ["DiomedeQuadStore"]), 19 | .executable( 20 | name: "diomede-db-util", 21 | targets: ["diomede-db-util"]), 22 | .executable( 23 | name: "diomede-cli", 24 | targets: ["diomede-cli"]), 25 | ], 26 | dependencies: [ 27 | .package(url: "https://github.com/agisboye/CLMDB.git", from: "0.9.24"), 28 | .package(url: "https://github.com/krzyzanowskim/CryptoSwift.git", .upToNextMinor(from: "1.5.0")), 29 | .package(url: "https://github.com/kasei/swift-sparql-syntax.git", .upToNextMinor(from: "0.2.0")), 30 | ], 31 | targets: [ 32 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 33 | // Targets can depend on other targets in this package, and on products in packages which this package depends on. 34 | .executableTarget( 35 | name: "diomede-db-util", 36 | dependencies: ["Diomede"] 37 | ), 38 | .executableTarget( 39 | name: "diomede-cli", 40 | dependencies: ["DiomedeQuadStore"] 41 | ), 42 | .target( 43 | name: "Diomede", 44 | dependencies: [ 45 | .product(name: "LMDB", package: "CLMDB"), 46 | .product(name: "SPARQLSyntax", package: "swift-sparql-syntax") 47 | ]), 48 | .target( 49 | name: "DiomedeQuadStore", 50 | dependencies: [ 51 | "Diomede", 52 | "CryptoSwift", 53 | .product(name: "SPARQLSyntax", package: "swift-sparql-syntax") 54 | ]), 55 | .testTarget( 56 | name: "DiomedeTests", 57 | dependencies: ["Diomede", "DiomedeQuadStore"]), 58 | ] 59 | ) 60 | -------------------------------------------------------------------------------- /Sources/diomede-db-util/main.swift: -------------------------------------------------------------------------------- 1 | import Diomede 2 | import Foundation 3 | 4 | let args = Array(CommandLine.arguments.dropFirst()) 5 | let cmd = CommandLine.arguments[0] 6 | 7 | guard args.count >= 1 else { 8 | print("Usage: \(cmd) ENVPATH DATABASE OP") 9 | exit(1) 10 | } 11 | 12 | let path = args[0] 13 | guard let e = Environment(path: path) else { 14 | fatalError() 15 | } 16 | 17 | guard args.count >= 2 else { 18 | print("Usage: \(cmd) ENVPATH DATABASE OP") 19 | print("Databases:") 20 | for d in try e.databases() { 21 | print("- \(d)") 22 | } 23 | exit(1) 24 | } 25 | 26 | 27 | let dbname = args[1] 28 | let op = (args.count > 2) ? args[2] : "" 29 | 30 | 31 | if op == "create" { 32 | try e.write { (txn) -> Int in 33 | try e.createDatabase(txn: txn, named: dbname) 34 | return 0 35 | } 36 | } else if op == "drop" { 37 | try e.write { (txn) -> Int in 38 | try e.dropDatabase(txn: txn, named: dbname) 39 | return 0 40 | } 41 | } else { 42 | if let db = e.database(named: dbname) { 43 | if args.count == 1 { 44 | try db.iterate { (k, v) in 45 | guard let key = String(data: k, encoding: .utf8), 46 | let value = String(data: v, encoding: .utf8) else { return } 47 | print("\(key) -> \(value)") 48 | } 49 | } else { 50 | if op == "get" { 51 | guard let k = args[3].data(using: .utf8) else { 52 | print("Invalid key") 53 | exit(1) 54 | } 55 | guard let data = try db.get(key: k), let value = String(data: data, encoding: .utf8) else { 56 | print("Invalid value") 57 | exit(1) 58 | } 59 | print("Value: \(value)") 60 | } else if op == "add" { 61 | guard let k = args[3].data(using: .utf8), 62 | let v = args[4].data(using: .utf8) else { 63 | print("Invalid key-value pair") 64 | exit(1) 65 | } 66 | try db.insert(uniqueKeysWithValues: [(k,v)]) 67 | } else if op == "between" { 68 | guard let l = args[3].data(using: .utf8), 69 | let u = args[4].data(using: .utf8) else { 70 | print("Invalid bounds") 71 | exit(1) 72 | } 73 | 74 | try db.iterate(between: l, and: u) { (k, v) in 75 | guard let key = String(data: k, encoding: .utf8), 76 | let value = String(data: v, encoding: .utf8) else { return } 77 | print("\(key) -> \(value)") 78 | } 79 | } else if op == "dump" { 80 | try db.iterate { (key, value) in 81 | print("\(key._hexValue) -> \(value._hexValue)") 82 | } 83 | } else { 84 | print("OP: \(op)") 85 | } 86 | } 87 | } else { 88 | print("*** No such database: '\(dbname)'") 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /Tests/DiomedeTests/DiomedeTypeSetTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | import SPARQLSyntax 3 | @testable import DiomedeQuadStore 4 | 5 | #if os(Linux) 6 | extension DiomedeTypeSetTests { 7 | static var allTests : [(String, (DiomedeTypeSetTests) -> () throws -> Void)] { 8 | return [ 9 | ("testTypeSets", testTypeSets), 10 | ] 11 | } 12 | } 13 | #endif 14 | 15 | class DiomedeTypeSetTests: XCTestCase { 16 | var filename: URL! 17 | var graph: Term! 18 | var store: DiomedeQuadStore! 19 | var subjectBase: Int = 0 20 | 21 | override func setUp() { 22 | super.setUp() 23 | let f = FileManager.default 24 | let dir = f.temporaryDirectory 25 | let filename = "kineo-test-\(UUID().uuidString).db" 26 | print(filename) 27 | let path = dir.appendingPathComponent(filename) 28 | self.filename = path 29 | self.store = DiomedeQuadStore(path: self.filename.path, create: true) 30 | } 31 | 32 | override func tearDown() { 33 | super.tearDown() 34 | #if os(macOS) 35 | let f = FileManager.default 36 | try? f.trashItem(at: self.filename, resultingItemURL: nil) 37 | #endif 38 | } 39 | 40 | func testTypeSets() throws { 41 | if let qs = store { 42 | let ex = TermNamespace(namespace: Namespace(value: "http://example.org/")) 43 | let rdf = TermNamespace(namespace: Namespace.rdf) 44 | let g = ex.graph 45 | 46 | let quads : [Quad] = [ 47 | Quad(subject: ex.s1, predicate: ex.p1, object: Term.trueValue, graph: g), 48 | 49 | Quad(subject: ex.s2, predicate: rdf.type, object: ex.Type1, graph: g), 50 | Quad(subject: ex.s2, predicate: ex.p1, object: Term.trueValue, graph: g), 51 | 52 | Quad(subject: ex.s3a, predicate: rdf.type, object: ex.Type1, graph: g), 53 | Quad(subject: ex.s3a, predicate: ex.p2, object: Term.trueValue, graph: g), 54 | Quad(subject: ex.s3a, predicate: ex.p3, object: Term.trueValue, graph: g), 55 | 56 | Quad(subject: ex.s3b, predicate: rdf.type, object: ex.Type2, graph: g), 57 | Quad(subject: ex.s3b, predicate: ex.p2, object: Term.trueValue, graph: g), 58 | Quad(subject: ex.s3b, predicate: ex.p3, object: Term.trueValue, graph: g), 59 | ] 60 | try qs.load(version: 0, quads: quads) 61 | 62 | 63 | try qs.computeCharacteristicSets(withTypeSets: true) 64 | XCTAssertEqual(qs.count, 9) 65 | 66 | let cs = try qs.characteristicSets(for: g, includeTypeSets: true) 67 | 68 | XCTAssertEqual(cs.sets.count, 3) 69 | 70 | let acs1 = try cs.aggregatedCharacteristicSet(matching: bgp(for: [ex.p1]), in: g, store: qs) 71 | XCTAssertEqual(acs1.count, 2) 72 | XCTAssertEqual(CharacteristicSet(acs1, from: qs).types, [Set([ex.Type1]): 1]) 73 | XCTAssertEqual(acs1.predicates.count, 1) // [ex.p1] 74 | 75 | let acs2 = try cs.aggregatedCharacteristicSet(matching: bgp(for: [ex.p1, rdf.type]), in: g, store: qs) 76 | XCTAssertEqual(acs2.count, 1) 77 | XCTAssertEqual(CharacteristicSet(acs2, from: qs).types, [Set([ex.Type1]): 1]) 78 | XCTAssertEqual(acs2.predicates.count, 2) // [ex.p1, rdf.type] 79 | 80 | let acs3 = try cs.aggregatedCharacteristicSet(matching: bgp(for: [rdf.type]), in: g, store: qs) 81 | XCTAssertEqual(acs3.count, 3) 82 | XCTAssertEqual(CharacteristicSet(acs3, from: qs).types, [Set([ex.Type1]): 2, Set([ex.Type2]): 1]) 83 | XCTAssertEqual(acs3.predicates.count, 1) // [rdf.type] 84 | } 85 | } 86 | 87 | private func bgp(for preds: [Term]) -> [TriplePattern] { 88 | let preds = preds.map { TriplePattern(subject: .variable("s", binding: true), predicate: .bound($0), object: .variable("o", binding: true)) } 89 | return preds 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /Sources/Diomede/DataEncoding.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DataEncoding.swift 3 | // Diomede 4 | // 5 | // Created by Gregory Todd Williams on 5/25/20. 6 | // 7 | 8 | import Foundation 9 | 10 | public protocol DataEncodable { 11 | func asData() throws -> Data 12 | static func fromData(_ data: Data) throws -> Self 13 | } 14 | 15 | extension Data : DataEncodable { 16 | public func asData() -> Data { return self } 17 | public static func fromData(_ data: Data) -> Self { return data } 18 | } 19 | 20 | extension Data { 21 | public var _hexValue: String { 22 | var s = "0x" 23 | for b in self { 24 | s += String(format: "%02x", b) 25 | } 26 | return s 27 | } 28 | 29 | public var _stringValue: String { 30 | if let s = String(data: self, encoding: .utf8) { 31 | return s 32 | } else { 33 | return _hexValue 34 | } 35 | } 36 | } 37 | 38 | public struct QuadID: Hashable, Comparable, DataEncodable { 39 | public var a: UInt64 40 | public var b: UInt64 41 | public var c: UInt64 42 | public var d: UInt64 43 | 44 | public init(_ a: UInt64, _ b: UInt64, _ c: UInt64, _ d: UInt64) { 45 | self.a = a 46 | self.b = b 47 | self.c = c 48 | self.d = d 49 | } 50 | public var values: [UInt64] { 51 | return [a, b, c, d] 52 | } 53 | 54 | public subscript(_ i: Int) -> UInt64 { 55 | switch i { 56 | case 0: 57 | return a 58 | case 1: 59 | return b 60 | case 2: 61 | return c 62 | default: 63 | return d 64 | } 65 | } 66 | 67 | public func asData() -> Data { 68 | let data = [a, b, c, d].map { $0.asData() } 69 | let combined = data.reduce(Data(), { $0 + $1 }) 70 | return combined 71 | } 72 | public static func fromData(_ data: Data) throws -> Self { 73 | guard data.count == 32 else { 74 | throw DiomedeError.encodingError 75 | } 76 | 77 | // load 4 big-endian UInt64 values from the bytes of the Data object 78 | let size = MemoryLayout.size 79 | let values = data.withUnsafeBytes { (bp: UnsafeRawBufferPointer) -> [UInt64] in 80 | return stride(from: 0, to: size*4, by: size) 81 | .map { bp.load(fromByteOffset: $0, as: UInt64.self) } 82 | .map { UInt64(bigEndian: $0) } 83 | } 84 | return QuadID(values[0], values[1], values[2], values[3]) 85 | } 86 | 87 | public static func < (lhs: QuadID, rhs: QuadID) -> Bool { 88 | return lhs.values.lexicographicallyPrecedes(rhs.values) 89 | } 90 | } 91 | 92 | extension Array: DataEncodable where Element == Int { 93 | public func asData() -> Data { 94 | let data = self.map { $0.asData() } 95 | let combined = data.reduce(Data(), { $0 + $1 }) 96 | return combined 97 | } 98 | public static func fromData(_ data: Data) -> Self { 99 | let stride = 8 100 | let count = data.count / stride 101 | var i = data.startIndex 102 | var values = [Int]() 103 | for _ in 0.. Data { 114 | var be = self.bigEndian 115 | return Data(bytes: &be, count: 8) 116 | } 117 | 118 | public static func fromData(_ data: Data) -> Self { 119 | let be = data.withUnsafeBytes { (bp: UnsafeRawBufferPointer) -> UInt64 in 120 | return UInt64(bigEndian: bp.load(as: UInt64.self)) 121 | } 122 | return be 123 | } 124 | } 125 | 126 | extension Int : DataEncodable { 127 | public func asData() -> Data { 128 | var be = Int64(self).bigEndian 129 | return Data(bytes: &be, count: 8) 130 | } 131 | public static func fromData(_ data: Data) -> Self { 132 | let be = data.withUnsafeBytes { (bp: UnsafeRawBufferPointer) -> Int64 in 133 | return Int64(bigEndian: bp.load(as: Int64.self)) 134 | } 135 | return Int(be) 136 | } 137 | } 138 | 139 | extension String : DataEncodable { 140 | public func asData() throws -> Data { 141 | guard let data = self.data(using: .utf8) else { 142 | throw DiomedeError.encodingError 143 | } 144 | return data 145 | } 146 | public static func fromData(_ data: Data) throws -> Self { 147 | guard let s = String(data: data, encoding: .utf8) else { 148 | throw DiomedeError.encodingError 149 | } 150 | return s 151 | } 152 | } 153 | 154 | -------------------------------------------------------------------------------- /Tests/DiomedeTests/DiomedeQuadStorePerformance.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | import SPARQLSyntax 3 | @testable import DiomedeQuadStore 4 | 5 | #if os(Linux) 6 | extension DiomedeQuadStorePerformanceTests { 7 | static var allTests : [(String, (DiomedeQuadStorePerformanceTests) -> () throws -> Void)] { 8 | return [ 9 | ("testPerformance_emptyLoad_e3", testPerformance_emptyLoad_e3), 10 | ("testPerformance_emptyLoad_e4", testPerformance_emptyLoad_e4), 11 | ("testPerformance_nonEmptyLoad_e3", testPerformance_nonEmptyLoad_e3), 12 | ("testPerformance_nonEmptyLoad_e4", testPerformance_nonEmptyLoad_e4), 13 | ] 14 | } 15 | } 16 | #endif 17 | 18 | enum TestError: Error { 19 | case missingTestData 20 | case diomedeError 21 | } 22 | 23 | class DiomedeQuadStorePerformanceTests: XCTestCase { 24 | var filemanager : FileManager { return FileManager.default } 25 | 26 | func quadGenerator(_ count: Int) -> AnySequence { 27 | let graph = Term(value: "htp://example.org/graph", type: .iri) 28 | var j = 0 29 | let i = AnyIterator { () -> Quad? in 30 | if j >= count { return nil } 31 | j += 1 32 | let s = Term(iri: "http://example.org/s\(j)") 33 | let p = Term(iri: "http://example.org/p") 34 | let o = Term(string: "foo") 35 | let q = Quad(subject: s, predicate: p, object: o, graph: graph) 36 | return q 37 | } 38 | return AnySequence(i.makeIterator()) 39 | } 40 | 41 | func tempStore(in dir: URL) throws -> DiomedeQuadStore { 42 | let filename = "diomede-test-\(UUID().uuidString).db" 43 | let path = dir.appendingPathComponent(filename) 44 | // print(path) 45 | guard let store = DiomedeQuadStore(path: path.path, create: true) else { throw TestError.diomedeError } 46 | return store 47 | } 48 | 49 | func testPerformance_emptyLoad_e3() throws { 50 | let count = 1_000 51 | let dir = filemanager.temporaryDirectory 52 | defer { try? filemanager.removeItem(at: dir) } 53 | let initialCount = 0 54 | self.measure { 55 | do { 56 | let store = try tempStore(in: dir) 57 | 58 | XCTAssertEqual(store.count, initialCount) 59 | 60 | let quads = quadGenerator(count) 61 | try store.load(version: 0, quads: quads) 62 | XCTAssertEqual(store.count, initialCount+count) 63 | } catch (_) { 64 | XCTFail() 65 | } 66 | } 67 | } 68 | 69 | func testPerformance_emptyLoad_e4() throws { 70 | let count = 10_000 71 | let dir = filemanager.temporaryDirectory 72 | defer { try? filemanager.removeItem(at: dir) } 73 | let initialCount = 0 74 | self.measure { 75 | do { 76 | let store = try tempStore(in: dir) 77 | 78 | XCTAssertEqual(store.count, initialCount) 79 | 80 | let quads = quadGenerator(count) 81 | try store.load(version: 0, quads: quads) 82 | XCTAssertEqual(store.count, initialCount+count) 83 | } catch (_) { 84 | XCTFail() 85 | } 86 | } 87 | } 88 | 89 | func testPerformance_nonEmptyLoad_e3() throws { 90 | let count = 1_000 91 | let dir = filemanager.temporaryDirectory 92 | defer { try? filemanager.removeItem(at: dir) } 93 | let iri = Term(iri: "http://example.org/x") 94 | let q = Quad(subject: iri, predicate: iri, object: Term.falseValue, graph: iri) 95 | let initialCount = 1 96 | self.measure { 97 | do { 98 | let store = try tempStore(in: dir) 99 | try store.load(version: 0, quads: [q]) 100 | XCTAssertEqual(store.count, initialCount) 101 | 102 | let quads = quadGenerator(count) 103 | try store.load(version: 0, quads: quads) 104 | XCTAssertEqual(store.count, initialCount+count) 105 | } catch (_) { 106 | XCTFail() 107 | } 108 | } 109 | } 110 | 111 | func testPerformance_nonEmptyLoad_e4() throws { 112 | let count = 10_000 113 | let dir = filemanager.temporaryDirectory 114 | defer { try? filemanager.removeItem(at: dir) } 115 | let iri = Term(iri: "http://example.org/x") 116 | let q = Quad(subject: iri, predicate: iri, object: Term.falseValue, graph: iri) 117 | let initialCount = 1 118 | self.measure { 119 | do { 120 | let store = try tempStore(in: dir) 121 | try store.load(version: 0, quads: [q]) 122 | XCTAssertEqual(store.count, initialCount) 123 | 124 | let quads = quadGenerator(count) 125 | try store.load(version: 0, quads: quads) 126 | XCTAssertEqual(store.count, initialCount+count) 127 | } catch (_) { 128 | XCTFail() 129 | } 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /Sources/DiomedeQuadStore/Util.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Util.swift 3 | // DiomedeQuadStore 4 | // 5 | // Created by Gregory Todd Williams on 5/26/20. 6 | // 7 | 8 | import Foundation 9 | 10 | class LinkedListNode { 11 | var next: LinkedListNode? 12 | var previous: LinkedListNode? 13 | var key: K 14 | var value: V 15 | init(key: K, value: V, next: LinkedListNode?, previous: LinkedListNode?) { 16 | self.key = key 17 | self.value = value 18 | self.next = next 19 | self.previous = previous 20 | } 21 | } 22 | 23 | class LinkedList : Sequence { 24 | var head: LinkedListNode? 25 | weak var tail: LinkedListNode? 26 | var count: Int 27 | 28 | init() { 29 | count = 0 30 | head = nil 31 | tail = nil 32 | } 33 | 34 | deinit { 35 | if let head = self.head { 36 | var n : LinkedListNode? = head 37 | while n != nil { 38 | let next = n?.next 39 | n?.previous = nil 40 | n?.next = nil 41 | n = next 42 | } 43 | } 44 | self.tail = nil 45 | } 46 | 47 | func append(key: K, value: V) -> LinkedListNode { 48 | count += 1 49 | switch (head, tail) { 50 | case (_, .none): 51 | head = LinkedListNode(key: key, value: value, next: nil, previous: nil) 52 | tail = head 53 | case (_, .some(let t)): 54 | let node = LinkedListNode(key: key, value: value, next: nil, previous: t) 55 | t.next = node 56 | tail = node 57 | } 58 | return tail! 59 | } 60 | 61 | func prepend(node: LinkedListNode) { 62 | count += 1 63 | switch (head, tail) { 64 | case (.none, _): 65 | node.next = nil 66 | node.previous = nil 67 | head = node 68 | tail = head 69 | case (.some(let h), _): 70 | node.next = h 71 | node.previous = nil 72 | h.previous = node 73 | head = node 74 | } 75 | } 76 | 77 | func prepend(key: K, value: V) -> LinkedListNode { 78 | count += 1 79 | switch (head, tail) { 80 | case (.none, _): 81 | head = LinkedListNode(key: key, value: value, next: nil, previous: nil) 82 | tail = head 83 | case (.some(let h), _): 84 | let node = LinkedListNode(key: key, value: value, next: h, previous: nil) 85 | h.previous = node 86 | head = node 87 | } 88 | return head! 89 | } 90 | 91 | func removeLast() -> LinkedListNode? { 92 | switch tail { 93 | case .none: 94 | return nil 95 | case .some(let node): 96 | count -= 1 97 | if let p = node.previous { 98 | tail = p 99 | p.next = nil 100 | } else { 101 | head = nil 102 | tail = nil 103 | } 104 | return node 105 | } 106 | } 107 | 108 | func remove(node: LinkedListNode) { 109 | count -= 1 110 | if node === head && node === tail { 111 | count = 0 112 | head = nil 113 | tail = nil 114 | } else if node === head { 115 | let n = node.next! 116 | head = n 117 | n.previous = nil 118 | } else if node === tail { 119 | let p = node.previous! 120 | tail = p 121 | p.next = nil 122 | } else { 123 | let p = node.previous! 124 | let n = node.next! 125 | p.next = n 126 | n.previous = p 127 | } 128 | } 129 | 130 | func makeIterator() -> AnyIterator> { 131 | var current = head 132 | return AnyIterator { 133 | let v = current 134 | if current != nil { 135 | current = current?.next 136 | } 137 | return v 138 | } 139 | } 140 | } 141 | 142 | 143 | class LRUCache : Sequence { 144 | var dict: [K:LinkedListNode] 145 | public var capacity: Int 146 | var list = LinkedList() 147 | 148 | var hit: Int 149 | var miss: Int 150 | 151 | public init(capacity: Int) { 152 | self.capacity = capacity 153 | self.dict = [K:LinkedListNode]() 154 | self.hit = 0 155 | self.miss = 0 156 | } 157 | 158 | public func removeValue(forKey key: K) -> V? { 159 | if let node = dict[key] { 160 | list.remove(node: node) 161 | dict.removeValue(forKey: key) 162 | return node.value 163 | } else { 164 | return nil 165 | } 166 | } 167 | 168 | public subscript(key: K) -> V? { 169 | get { 170 | if let node = dict[key] { 171 | hit += 1 172 | list.remove(node: node) 173 | list.prepend(node: node) 174 | return node.value 175 | } else { 176 | miss += 1 177 | return nil 178 | } 179 | } 180 | 181 | set(newValue) { 182 | if let node = dict[key] { 183 | list.remove(node: node) 184 | } 185 | if let value = newValue { 186 | let node = list.prepend(key: key, value: value) 187 | dict[key] = node 188 | } else { 189 | dict.removeValue(forKey: key) 190 | } 191 | if list.count > capacity { 192 | if let node = list.removeLast() { 193 | dict.removeValue(forKey: node.key) 194 | } 195 | } 196 | } 197 | } 198 | 199 | public func makeIterator() -> AnyIterator<(K, V)> { 200 | let i = list.makeIterator() 201 | return AnyIterator { 202 | if let node = i.next() { 203 | return (node.key, node.value) 204 | } else { 205 | return nil 206 | } 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /Sources/DiomedeQuadStore/RDFExtensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RDF-Extensions.swift 3 | // Diomede 4 | // 5 | // Created by Gregory Todd Williams on 5/23/20. 6 | // 7 | 8 | import Foundation 9 | #if os(macOS) 10 | import Compression 11 | #endif 12 | import CryptoSwift 13 | import SPARQLSyntax 14 | import Diomede 15 | 16 | extension Data { 17 | #if os(macOS) 18 | func uncompressed(size outputDataSize: Int) throws -> Data { 19 | let compressedSize = self.count 20 | let destinationBuffer = UnsafeMutablePointer.allocate(capacity: outputDataSize) 21 | 22 | var sourceBuffer = Array(repeating: 0, count: compressedSize) 23 | self.copyBytes(to: &sourceBuffer, count: compressedSize) 24 | 25 | let decodedSize = compression_decode_buffer(destinationBuffer, 26 | outputDataSize, 27 | &sourceBuffer, 28 | compressedSize, 29 | nil, 30 | COMPRESSION_ZLIB) 31 | guard decodedSize == outputDataSize else { 32 | destinationBuffer.deallocate() 33 | throw DiomedeError.encodingError 34 | } 35 | return Data(bytesNoCopy: destinationBuffer, count: outputDataSize, deallocator: .free) 36 | } 37 | 38 | func compressed60() -> Data? { 39 | guard self.count > 100 else { return nil } 40 | let inputDataSize = self.count 41 | let byteSize = MemoryLayout.stride 42 | let bufferSize = inputDataSize / byteSize 43 | let destinationBuffer = UnsafeMutablePointer.allocate(capacity: bufferSize) 44 | 45 | var sourceBuffer = Array(repeating: 0, count: bufferSize) 46 | self.copyBytes(to: &sourceBuffer, count: inputDataSize) 47 | 48 | let compressedSize = compression_encode_buffer(destinationBuffer, 49 | inputDataSize, 50 | &sourceBuffer, 51 | inputDataSize, 52 | nil, 53 | COMPRESSION_ZLIB) 54 | let ratio = Double(compressedSize) / Double(inputDataSize) 55 | if ratio < 0.60 { 56 | // print("compression: \(compressedSize) / \(inputDataSize) (\(100.0 * ratio)%)") 57 | return Data(bytesNoCopy: destinationBuffer, count: compressedSize, deallocator: .free) 58 | } 59 | return nil 60 | } 61 | #endif 62 | 63 | func uuid() throws -> UUID { 64 | guard self.count == 16 else { 65 | throw DiomedeError.encodingError 66 | } 67 | 68 | var uu : uuid_t = UUID().uuid 69 | Swift.withUnsafeMutableBytes(of: &uu) { (ptr) -> () in 70 | self.copyBytes(to: ptr, count: 16) 71 | } 72 | return UUID(uuid: uu) 73 | } 74 | } 75 | 76 | extension Term: @retroactive DataEncodable { 77 | public func sha256() throws -> Data { 78 | let d = try self.asData() 79 | let term_key = d.sha256() 80 | return term_key 81 | } 82 | 83 | public func asData() throws -> Data { 84 | let s: String 85 | switch self.type { 86 | case .blank: 87 | if let u = UUID(uuidString: self.value), self.value.count == 36 { 88 | var uu = u.uuid 89 | let du = Data(bytes: &uu, count: 16) 90 | 91 | let d = try "u".asData() 92 | return d + du 93 | } 94 | s = "B\"" + self.value 95 | case .iri: 96 | if self.value.count == 45 && self.value.hasPrefix("urn:uuid:") { 97 | let suffix = String(self.value.dropFirst(9)) 98 | if let u = UUID(uuidString: suffix) { 99 | var uu = u.uuid 100 | let du = Data(bytes: &uu, count: 16) 101 | 102 | let d = try "U".asData() 103 | return d + du 104 | } 105 | } 106 | s = "I\"" + self.value 107 | case .language(let lang): 108 | s = "L\(lang)\"" + self.value 109 | case .datatype(let dt): 110 | switch dt { 111 | case .string: 112 | // the Compression framework is not cross-platform, so this isn't supported currently 113 | // if let inputData = self.value.data(using: .utf8), let cd = inputData.compressed60() { 114 | // let d = "Z".data(using: .utf8)! 115 | // let s = inputData.count.asData() 116 | // return d + s + cd 117 | // } 118 | 119 | s = "S\"" + self.value 120 | case .integer: 121 | s = "i\"" + self.value 122 | default: 123 | s = "D\(dt.value)\"" + self.value 124 | } 125 | } 126 | return try s.asData() 127 | } 128 | 129 | public static func fromData(_ data: Data) throws -> Term { 130 | guard data.count >= 2 else { 131 | throw DiomedeError.encodingError 132 | } 133 | let c = String(UnicodeScalar(data[0])) 134 | switch c { 135 | // these are encodings which do not have a DOUBLE QUOTE followed by a string value 136 | case "U": 137 | let bytes = Data(data.dropFirst()) 138 | let uu = try bytes.uuid() 139 | return Term(iri: "urn:uuid:\(uu.uuidString.lowercased())") 140 | case "u": 141 | let bytes = Data(data.dropFirst()) 142 | let uu = try bytes.uuid() 143 | return Term(value: uu.uuidString.uppercased(), type: .blank) 144 | // the Compression framework is not cross-platform, so this isn't supported currently 145 | // case "Z": 146 | // let bytes = Data(data.dropFirst()) 147 | // let size = Int.fromData(bytes) 148 | // let buffer = bytes.dropFirst(8) 149 | // let data = try buffer.uncompressed(size: size) 150 | // guard let string = String(data: data, encoding: .utf8) else { 151 | // throw DiomedeError.encodingError 152 | // } 153 | // return Term(string: string) 154 | default: 155 | break 156 | } 157 | 158 | let s = try String.fromData(data) 159 | guard let i = s.firstIndex(of: "\"") else { 160 | throw DiomedeError.encodingError 161 | } 162 | let value = String(s.suffix(from: s.index(after: i))) 163 | switch c { 164 | case "B": 165 | return Term(value: value, type: .blank) 166 | case "I": 167 | return Term(value: value, type: .iri) 168 | case "L": 169 | let lang = String(s.dropFirst().prefix(while: { $0 != "\"" })) 170 | return Term(value: value, type: .language(lang)) 171 | case "S": 172 | return Term(value: value, type: .datatype(.string)) 173 | case "i": 174 | return Term(value: value, type: .datatype(.integer)) 175 | case "D": 176 | let dtvalue = String(s.dropFirst(1).prefix(while: { $0 != "\"" })) 177 | let dt = TermDataType(stringLiteral: dtvalue) 178 | return Term(value: value, type: .datatype(dt)) 179 | default: 180 | throw DiomedeError.unknownError 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /Tests/DiomedeTests/DiomedeCharacteristicSetTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | import SPARQLSyntax 3 | @testable import DiomedeQuadStore 4 | 5 | #if os(Linux) 6 | extension DiomedeCharacteristicSetTests { 7 | static var allTests : [(String, (DiomedeCharacteristicSetTests) -> () throws -> Void)] { 8 | return [ 9 | ("testCSStarCardinality", testCSStarCardinality), 10 | ] 11 | } 12 | } 13 | #endif 14 | 15 | class DiomedeCharacteristicSetTests: XCTestCase { 16 | var filename: URL! 17 | var graph: Term! 18 | var store: DiomedeQuadStore! 19 | var subjectBase: Int = 0 20 | 21 | override func setUp() { 22 | super.setUp() 23 | let f = FileManager.default 24 | let dir = f.temporaryDirectory 25 | let filename = "kineo-test-\(UUID().uuidString).db" 26 | print(filename) 27 | let path = dir.appendingPathComponent(filename) 28 | self.filename = path 29 | self.store = DiomedeQuadStore(path: self.filename.path, create: true) 30 | } 31 | 32 | override func tearDown() { 33 | super.tearDown() 34 | #if os(macOS) 35 | let f = FileManager.default 36 | try? f.trashItem(at: self.filename, resultingItemURL: nil) 37 | #endif 38 | } 39 | 40 | func generateSimpleStarQuads(subjectCount: Int, predicates: Set, graph: Term) -> [Quad] { 41 | var count = 0 42 | var quads = [Quad]() 43 | for _ in 0.. [Quad] { 59 | var count = 0 60 | var quads = [Quad]() 61 | for _ in 0.. 65 | 66 | Characteristic Set: count = 4 67 | 4 68 | 4 69 | 70 | Characteristic Set: count = 2 71 | 2 72 | 2 73 | 74 | Characteristic Set: count = 1 75 | 1 76 | 1 77 | 1 78 | 1 79 | 80 | Characteristic Set: count = 1 81 | 1 82 | 1 83 | 1 84 | 1 85 | 1 86 | 1 87 | 1 88 | 1 89 | 90 | Number of Characteristic Sets: 4 91 | ``` 92 | 93 | ```sh 94 | % diomede sample-database.db quads 95 | ``` 96 | 97 | ``` 98 | . 99 | . 100 | . 101 | _:C368E10B-04CF-4D5B-9751-5384648F219E . 102 | _:7DBAAF97-9750-4325-917F-87B753C393E5 . 103 | . 104 | . 105 | _:6CA4A01D-3C96-4974-B002-39D7F4535564 . 106 | "Auction Event" . 107 | ... 108 | ``` 109 | 110 | 111 | 112 | ## Data Layout 113 | 114 | The LMDB file structure is organized into a number of named databases. 115 | The required databases are: 116 | 117 | * `quads` 118 | 119 | This is the primary table representing quads, mapping a quad ID (8-byte integer) to four term IDs (4 concatenated 8-byte integers). 120 | Term IDs are stored in subject-predicate-object-graph order. 121 | 122 | * `fullIndexes` 123 | 124 | This is a database containing a list of all the (optional) quad-ordering indexes, mapping the index name (permutations of "spog") to an array containing the ordinal of each term position in the index order. 125 | The ordinals are represented as 8-byte integers and must be a permutation of `[0,1,2,3]` (which itself represents the subject-predicate-object-graph order). 126 | An entry in this database implies the existence of a database whose name is the entry's key. 127 | 128 | * `term_to_id` 129 | 130 | This is a mapping from the SHA256 hash of [encoded term values](#term-encoding) to term IDs (8-byte integers). 131 | 132 | * `id_to_term` 133 | 134 | This is a mapping from term IDs (8-byte integers) to [encoded term values](#term-encoding). 135 | 136 | * `graphs` 137 | 138 | This is a table of the named graphs present in the database, mapping term IDs to empty (0-byte) values. 139 | Its data is redundant, being computable from the unique terms represented by the graph position of each record in `quads` table. 140 | 141 | * `stats` 142 | 143 | This is a table of metadata useful to either/both the Diomede system or to end-users. 144 | Some keys that are present are: 145 | 146 | * `Diomede-Version` 147 | * `Last-Modified` 148 | * `next_unassigned_term_id` 149 | * `next_unassigned_quad_id` 150 | 151 | Optional databases used for indexing may also be present: 152 | 153 | * Any "full index" databases named with a permutation of "spog" (e.g. `spog` and `pogs`) 154 | 155 | These databases map four term IDs (4 concatenated 8-byte integers) to a quad ID (8-byte integer) in the order implied by the database name (and given explicitly as the value of the corresponding entry in the `fullIndexes` database). 156 | 157 | * `characteristicSets` 158 | 159 | This is a database containing an encoding of the [Characteristic Sets](http://www.csd.uoc.gr/~hy561/papers/storageaccess/optimization/Characteristic%20Sets.pdf) for each named graph in the database. 160 | The keys in the database are a pair (graph term ID, sequence number), encoded as 2 concatenated 8-byte integers. 161 | The values are arrays of 8-byte integers in which the first element is the total cardinality for the Characteristic Set, and the remaining elements are (predicate term ID, total occurrence count, min occurrences, max occurrences) tuples. 162 | 163 | ## Term Encoding 164 | 165 | The encoding of RDF terms (performed in [RDFExtensions.swift](Sources/DiomedeQuadStore/RDFExtensions.swift)) produces a UTF-8 encoded string which is either stored in the database (in `id_to_term`) or hashed with SHA256 and stored (in `term_to_id`). 166 | The encodings depend on the term type, but all identify the type with the first character. 167 | 168 | * IRIs 169 | 170 | * LATIN CAPITAL LETTER I (U+0049) 171 | * QUOTATION MARK (U+0022) 172 | * IRI value 173 | 174 | * Blank Nodes 175 | * LATIN CAPITAL LETTER B (U+0042) 176 | * QUOTATION MARK (U+0022) 177 | * Blank node identifier 178 | 179 | * Language Literals 180 | 181 | * LATIN CAPITAL LETTER L (U+004C) 182 | * Language tag 183 | * QUOTATION MARK (U+0022) 184 | * Literal value 185 | 186 | * `xsd:integer` Literals 187 | 188 | * LATIN SMALL LETTER I (U+0069) 189 | * QUOTATION MARK (U+0022) 190 | * Integer string value 191 | 192 | * `xsd:string` Literals 193 | 194 | * LATIN SMALL LETTER S (U+0053) 195 | * QUOTATION MARK (U+0022) 196 | * Literal value 197 | 198 | * Other Datatype Literals 199 | 200 | * LATIN CAPITAL LETTER D (U+0044) 201 | * Datatype IRI value 202 | * QUOTATION MARK (U+0022) 203 | * Literal value 204 | 205 | Note that no canonicalization or unicode normalization is performed. 206 | 207 | ## API and Design Choices 208 | 209 | All integers are 8-bytes and stored as big-endian. 210 | 211 | Effort is made to keep LMDB transactions short-lived. 212 | This means that matching operations are generally performed atomically, materializing an entire list of term or quad IDs. 213 | However, in an attempt to benefit from some degree of pipelining and avoid unnecessary work (e.g. if a limited number of matches is requested), materializing term values is performed in batches, with each batch being processed in its own read transaction. 214 | This is assumes that terms are never deleted from the `term_to_id` and `id_to_term` databases. 215 | 216 | The term ID lookup databases (`term_to_id` and `id_to_term`) use both hashing and an assigned integer for each term. 217 | While this adds complexity, it is done for several reasons: 218 | 219 | * The use of hashing allows keys in the `term_to_id` database to be fixed size, and remain below the LMDB key size limit (which is a compile-time constant which defaults to 511 bytes). 220 | * The use of integers as the primary key for a term (instead of the hash values) allows some flexibility in how IDs are assigned and used. 221 | 222 | Future development of this format may inline frequently-occurring terms, or those with minimal size requirements. 223 | This can reduce the work performed in materializing terms during query processing. 224 | 225 | It is also expected that the ability to assign term IDs will be useful in supporting RDF* in the future by inlining quad ID values in term IDs. 226 | -------------------------------------------------------------------------------- /Tests/DiomedeTests/DiomedeTests.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | import SPARQLSyntax 3 | @testable import DiomedeQuadStore 4 | 5 | #if os(Linux) 6 | extension DiomedeQuadStoreTests { 7 | static var allTests : [(String, (DiomedeQuadStoreTests) -> () throws -> Void)] { 8 | return [ 9 | ("testSimpleLoadQuery", testSimpleLoadQuery), 10 | ("testSimpleDelete", testSimpleDelete), 11 | ("testDuplicateLoad", testDuplicateLoad), 12 | ("testRepeatedDuplicateLoad", testRepeatedDuplicateLoad), 13 | ("testCountQuads", testCountQuads), 14 | ("testUUIDTerm_toData", testUUIDTerm_toData), 15 | ("testUUIDTerm_fromData", testUUIDTerm_fromData), 16 | ("testBlankTerm_toData", testBlankTerm_toData), 17 | ("testBlankTerm_fromData", testBlankTerm_fromData), 18 | ] 19 | } 20 | } 21 | #endif 22 | 23 | class DiomedeQuadStoreTests: XCTestCase { 24 | var filename: URL! 25 | var graph: Term! 26 | var store: DiomedeQuadStore! 27 | 28 | override func setUp() { 29 | super.setUp() 30 | let f = FileManager.default 31 | let dir = f.temporaryDirectory 32 | let filename = "kineo-test-\(UUID().uuidString).db" 33 | print(filename) 34 | let path = dir.appendingPathComponent(filename) 35 | self.filename = path 36 | self.store = DiomedeQuadStore(path: self.filename.path, create: true) 37 | self.graph = Term(value: "http://example.org/", type: .iri) 38 | // try? self.store.load(version: 1, quads: testQuads) 39 | } 40 | 41 | override func tearDown() { 42 | super.tearDown() 43 | #if os(macOS) 44 | let f = FileManager.default 45 | try? f.trashItem(at: self.filename, resultingItemURL: nil) 46 | #endif 47 | } 48 | 49 | func testSimpleLoadQuery() throws { 50 | if let qs = store { 51 | try qs.load(version: 0, quads: [ 52 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: Term(iri: "tag:graph")), 53 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "p2"), object: Term(integer: 7), graph: Term(iri: "tag:graph")), 54 | ]) 55 | XCTAssertEqual(qs.count, 2) 56 | 57 | let qp = QuadPattern.all 58 | let matchingQuads = try Array(qs.quads(matching: qp)) 59 | XCTAssertEqual(qs.count, 2) 60 | for q in matchingQuads { 61 | XCTAssertEqual(q.subject, Term(iri: "s")) 62 | let p = q.predicate 63 | switch p.value { 64 | case "p1": 65 | XCTAssertEqual(q.object, Term(string: "o")) 66 | case "p2": 67 | XCTAssertEqual(q.object, Term(integer: 7)) 68 | default: 69 | XCTFail() 70 | } 71 | } 72 | } 73 | } 74 | 75 | func testSimpleDelete() throws { 76 | if let qs = store { 77 | let q1 = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: Term(iri: "tag:graph")) 78 | let q2 = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p2"), object: Term(integer: 7), graph: Term(iri: "tag:graph")) 79 | let q3 = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p3"), object: Term(integer: 8), graph: Term(iri: "tag:graph")) 80 | try qs.load(version: 0, quads: [q1, q2, q3]) 81 | XCTAssertEqual(qs.count, 3) 82 | 83 | try qs.delete(quads: [q1, q3]) 84 | XCTAssertEqual(qs.count, 1) 85 | 86 | let qp = QuadPattern.all 87 | let matchingQuads = try Array(qs.quads(matching: qp)) 88 | XCTAssertEqual(qs.count, 1) 89 | let q = matchingQuads[0] 90 | XCTAssertEqual(q, q2) 91 | } 92 | } 93 | 94 | func testDuplicateLoad() throws { 95 | if let qs = store { 96 | let q = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: Term(iri: "tag:graph")) 97 | try qs.load(version: 0, quads: [q, q]) 98 | XCTAssertEqual(qs.count, 1) 99 | } 100 | } 101 | 102 | func testRepeatedDuplicateLoad() throws { 103 | if let qs = store { 104 | XCTAssertEqual(qs.count, 0) 105 | let q = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: Term(iri: "tag:graph")) 106 | try qs.load(version: 0, quads: [q, q]) 107 | try qs.load(version: 0, quads: [q, q]) 108 | XCTAssertEqual(qs.count, 1) 109 | } 110 | } 111 | 112 | func testCountQuads() throws { 113 | if let qs = store { 114 | try qs.load(version: 0, quads: [ 115 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: Term(iri: "tag:graph")), 116 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "p2"), object: Term(integer: 7), graph: Term(iri: "tag:graph")), 117 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "p3"), object: Term(iri: "s"), graph: Term(iri: "tag:graph")), 118 | Quad(subject: Term(iri: "s"), predicate: Term(iri: "tag:graph"), object: Term(iri: "s"), graph: Term(iri: "tag:graph")), 119 | ]) 120 | XCTAssertEqual(qs.count, 4) 121 | 122 | let qp1 = QuadPattern.all 123 | try XCTAssertEqual(qs.countQuads(matching: qp1), 4) 124 | 125 | let sVar = Node.variable("s", binding: true) 126 | let pVar = Node.variable("p", binding: true) 127 | let oVar = Node.variable("o", binding: true) 128 | let graph = Node.bound(Term(iri: "tag:graph")) 129 | 130 | let qp2 = QuadPattern(subject: sVar, predicate: pVar, object: oVar, graph: graph) 131 | try XCTAssertEqual(qs.countQuads(matching: qp2), 4) 132 | 133 | // repeated variable ?s in both subject and object positions 134 | let qp3 = QuadPattern(subject: sVar, predicate: pVar, object: sVar, graph: graph) 135 | try XCTAssertEqual(qs.countQuads(matching: qp3), 2) 136 | 137 | // repeated variable ?s in subject, predicate, and object positions 138 | let qp4 = QuadPattern(subject: sVar, predicate: sVar, object: sVar, graph: graph) 139 | try XCTAssertEqual(qs.countQuads(matching: qp4), 0) 140 | 141 | // repeated variable ?s in subject, and object positions, and ?g in predicate, and graph positions 142 | let qp5 = QuadPattern(subject: sVar, predicate: pVar, object: sVar, graph: pVar) 143 | try XCTAssertEqual(qs.countQuads(matching: qp5), 1) 144 | } 145 | } 146 | 147 | func testUUIDTerm_toData() throws { 148 | let t = Term(iri: "urn:uuid:08b7a198-7eaf-4a6a-b0f4-258cb7e299fe") 149 | let d = try t.asData() 150 | let bytes : [UInt8] = [0x55, 0x08, 0xb7, 0xa1, 0x98, 0x7e, 0xaf, 0x4a, 0x6a, 0xb0, 0xf4, 0x25, 0x8c, 0xb7, 0xe2, 0x99, 0xfe] 151 | let expected = Data(bytes: bytes, count: 17) 152 | XCTAssertEqual(d, expected) 153 | } 154 | 155 | func testUUIDTerm_fromData() throws { 156 | let bytes : [UInt8] = [0x55, 0x08, 0xb7, 0xa1, 0x98, 0x7e, 0xaf, 0x4a, 0x6a, 0xb0, 0xf4, 0x25, 0x8c, 0xb7, 0xe2, 0x99, 0xfe] 157 | let data = Data(bytes: bytes, count: 17) 158 | let term = try Term.fromData(data) 159 | let expected = Term(iri: "urn:uuid:08b7a198-7eaf-4a6a-b0f4-258cb7e299fe") 160 | XCTAssertEqual(term, expected) 161 | } 162 | 163 | func testBlankTerm_toData() throws { 164 | let t = Term(value: "08B7A198-7EAF-4A6A-B0F4-258CB7E299FE", type: .blank) 165 | let d = try t.asData() 166 | let bytes : [UInt8] = [0x75, 0x08, 0xb7, 0xa1, 0x98, 0x7e, 0xaf, 0x4a, 0x6a, 0xb0, 0xf4, 0x25, 0x8c, 0xb7, 0xe2, 0x99, 0xfe] 167 | print(d._hexValue) 168 | let expected = Data(bytes: bytes, count: 17) 169 | XCTAssertEqual(d, expected) 170 | } 171 | 172 | func testBlankTerm_fromData() throws { 173 | let bytes : [UInt8] = [0x75, 0x08, 0xb7, 0xa1, 0x98, 0x7e, 0xaf, 0x4a, 0x6a, 0xb0, 0xf4, 0x25, 0x8c, 0xb7, 0xe2, 0x99, 0xfe] 174 | let data = Data(bytes: bytes, count: 17) 175 | let term = try Term.fromData(data) 176 | let expected = Term(value: "08B7A198-7EAF-4A6A-B0F4-258CB7E299FE", type: .blank) 177 | XCTAssertEqual(term, expected) 178 | } 179 | 180 | func testDropGraph() throws { 181 | if let qs = store { 182 | for g in ["tag:graph1", "tag:graph2"] { 183 | let graph = Term(iri: g) 184 | let q = Quad(subject: Term(iri: "s"), predicate: Term(iri: "p1"), object: Term(string: "o"), graph: graph) 185 | try qs.load(version: 0, quads: [q]) 186 | } 187 | XCTAssertEqual(qs.count, 2) 188 | let graphsPre = Set(qs.graphs().map { $0.value }) 189 | XCTAssertEqual(graphsPre, ["tag:graph1", "tag:graph2"]) 190 | 191 | let g1 = Term(iri: "tag:graph1") 192 | try qs.drop(graph: g1) 193 | 194 | XCTAssertEqual(qs.count, 1) 195 | let graphsPost = Set(qs.graphs().map { $0.value }) 196 | XCTAssertEqual(graphsPost, ["tag:graph2"]) 197 | 198 | XCTAssertNoThrow(try qs.verify()) 199 | } 200 | } 201 | 202 | 203 | // the Compression framework is not cross-platform, so this isn't supported currently 204 | // func testLargeLiteralEncoding() throws { 205 | // let string = "This was primarily a book sale (2005 lots), although many lots of prints and curiosa as well as some drawings were also included. Two owners are named on the title page of the catalogue, and it is uncertain which of the two is supposed to have owned the paintings. Mr. Pieter de Malapert (1740-1806) lived in a house called Plettenberg at Jutphaas near Utrecht, a possession of the Malapert family since the late sixteenth century, after inheriting it from his father, Louis de Malapert, in 1782. He had a law degree from Utrecht University and was canon at Utrecht cathedral. At his death his estate was estimated to be worth fl. 99,286:7:4. He seems to have been active primarily as a collector of naturalia, the bulk of which was described in the Catalogus musei Malaperttiani published in 1806. His heirs elected to sell Plettenberg, so the contents were removed, probably by his only brother and executor, Jan Jacob de Malapert (1743-1816), and given to the Utrecht booksellers Bartholomeus Wild and Johannes Altheer to sell. The other owner can be identified as the late Wolfert Beeldsnijder, an iron merchant and alderman of Ijsselstein who died in 1806. The paintings consisted mostly of landscapes and genre scenes, but the descriptions are too brief to allow them to be identified. (B. Fredericksen)" 206 | // let t = Term(string: string) 207 | // let data = try t.asData() 208 | // XCTAssertEqual(String(UnicodeScalar(data[0])), "Z") 209 | // XCTAssertLessThan(data.count, string.count) 210 | // 211 | // let term = try Term.fromData(data) 212 | // XCTAssertEqual(term, Term(string: string)) 213 | // 214 | // } 215 | } 216 | -------------------------------------------------------------------------------- /Sources/DiomedeQuadStore/TypeSets.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TypeSets.swift 3 | // DiomedeQuadStore 4 | // 5 | // Created by Gregory Todd Williams on 5/27/20. 6 | // 7 | 8 | import Foundation 9 | import Diomede 10 | import SPARQLSyntax 11 | 12 | public struct TypeIDSet: Codable { 13 | public typealias TermID = UInt64 14 | public var graph: TermID 15 | public var count: Int 16 | public var types: Set 17 | // NOTE: whenever new variables are added here, they must be serialized and deserialized 18 | // in the TypeIDSet extension below that implements asData()/fromData(). 19 | 20 | public init(graph: TermID, types: Set, count: Int = 0) { 21 | self.count = count 22 | self.graph = graph 23 | self.types = types 24 | } 25 | 26 | public mutating func formAggregation(_ other: TypeIDSet) { 27 | self.count += other.count 28 | self.types.formUnion(other.types) 29 | } 30 | 31 | public func aggregate(_ other: TypeIDSet) -> TypeIDSet { 32 | let count = self.count + other.count 33 | let types = self.types 34 | return TypeIDSet(graph: self.graph, types: types, count: count) 35 | } 36 | 37 | public mutating func addStar() { 38 | // caller is responsible for ensuring that all added stars have the same types 39 | self.count += 1 40 | } 41 | 42 | func isSuperset(of subset: TypeIDSet) -> Bool { 43 | return types.isSuperset(of: subset.types) 44 | } 45 | } 46 | 47 | public struct TypeSet: Codable { 48 | public var count: Int 49 | public var types: Set 50 | 51 | public init(_ cs: TypeIDSet, from store: DiomedeQuadStore) { 52 | self.count = cs.count 53 | self.types = [] 54 | for tid in cs.types { 55 | let terms = store.termIterator(fromIds: [tid]) 56 | let term = terms.next()! 57 | self.types.insert(term) 58 | } 59 | } 60 | 61 | public init(types: Set) { 62 | self.count = 0 63 | self.types = types 64 | } 65 | 66 | public init(types: Set, count: Int) { 67 | self.count = count 68 | self.types = types 69 | } 70 | 71 | public mutating func formUnion(_ other: TypeSet) { 72 | self.count += other.count 73 | self.types.formUnion(other.types) 74 | } 75 | 76 | public func union(_ other: TypeSet) -> TypeSet { 77 | let count = self.count + other.count 78 | let types = self.types.union(other.types) 79 | return TypeSet(types: types, count: count) 80 | } 81 | 82 | public func isSuperset(of subset: TypeSet) -> Bool { 83 | return types.isSuperset(of: subset.types) 84 | } 85 | } 86 | 87 | public struct TypeDataSet { 88 | typealias TermID = UInt64 89 | var typeSets: [TypeIDSet] 90 | var store: DiomedeQuadStore 91 | 92 | public var sets: [TypeSet] { 93 | return typeSets.map { 94 | TypeSet($0, from: store) 95 | } 96 | } 97 | 98 | public init(_ store: DiomedeQuadStore, typeSets: [TypeIDSet]) throws { 99 | self.store = store 100 | self.typeSets = typeSets 101 | } 102 | 103 | public init(_ store: DiomedeQuadStore, in graph: Term) throws { 104 | self.store = store 105 | if let bestIndex = try store.bestIndex(matchingBoundPositions: [0, 1, 3]) { 106 | if bestIndex == .gpso { 107 | // we can access triples sorted by subject, meaning we can pipeline the first grouping without keeping all triples in memory 108 | self.typeSets = try TypeDataSet.generateTypeSets_ordered(store: store, using: bestIndex, in: graph) 109 | return 110 | } 111 | } 112 | self.typeSets = try TypeDataSet.generateTypeSets_naive(store: store, in: graph) 113 | } 114 | 115 | static func generateTypeSets_ordered(store: DiomedeQuadStore, using index: DiomedeQuadStore.IndexOrder, in graph: Term) throws -> [TypeIDSet] { 116 | precondition(index == .gpso) 117 | var characteristicSets = [TypeIDSet]() 118 | var lastSubject: TermID? = nil 119 | var triples = [[TermID]]() 120 | var css = [Set: TypeIDSet]() 121 | 122 | var qp = QuadPattern.all 123 | qp.graph = .bound(graph) 124 | 125 | guard case .bound(let graph) = qp.graph, let gid = try store.id(for: graph), let typeid = try store.id(for: Term.rdf("type")) else { 126 | throw DiomedeError.indexError 127 | } 128 | 129 | let quadIds = try store.quadIds(usingIndex: index, withPrefix: [gid, typeid]) 130 | for tids in quadIds { 131 | let t = tids[0..<3] 132 | let s = t[0] 133 | if let last = lastSubject, last != s { 134 | let types = Set(triples.map { $0[2] }) 135 | 136 | css[types, default: TypeIDSet(graph: gid, types: types)].addStar() 137 | triples = [] 138 | } 139 | triples.append(Array(t)) 140 | lastSubject = s 141 | } 142 | 143 | // handle the remaining triples 144 | if !triples.isEmpty { 145 | let types = Set(triples.map { $0[2] }) 146 | 147 | css[types, default: TypeIDSet(graph: gid, types: types)].addStar() 148 | } 149 | 150 | characteristicSets.append(contentsOf: css.values) 151 | return characteristicSets 152 | } 153 | 154 | static func generateTypeSets_naive(store: DiomedeQuadStore, in graph: Term) throws -> [TypeIDSet] { 155 | var typeSets = [TypeIDSet]() 156 | var allTypes = [TermID: Set]() 157 | var css = [Set: TypeIDSet]() 158 | 159 | var qp = QuadPattern.all 160 | qp.graph = .bound(graph) 161 | qp.predicate = .bound(Term.rdf("type")) 162 | 163 | for tids in try store.quadIds(matching: qp) { 164 | let t = tids[0..<3] 165 | let s = t[0] 166 | let type = t[2] 167 | allTypes[s, default: []].insert(type) 168 | } 169 | 170 | guard let gid = try store.id(for: graph) else { 171 | throw DiomedeError.indexError 172 | } 173 | 174 | for (_, types) in allTypes { 175 | css[types, default: TypeIDSet(graph: gid, types: types)].addStar() 176 | } 177 | 178 | typeSets.append(contentsOf: css.values) 179 | return typeSets 180 | } 181 | 182 | public func selectivity(of object: Term, given predicate: Node, in graph: Term, store: DiomedeQuadStore) throws -> Double { 183 | var pattern = QuadPattern.all 184 | pattern.graph = .bound(graph) 185 | pattern.predicate = predicate 186 | let p_count = try store.countQuads(matching: pattern) 187 | pattern.object = .bound(object) 188 | let op_count = try store.countQuads(matching: pattern) 189 | 190 | let s = Double(op_count) / Double(p_count) 191 | // print("selectivity of \(object) given \(predicate) in \(graph): \(s)") 192 | return s 193 | } 194 | 195 | public var instanceCount: Int { 196 | return typeSets.reduce(0, { $0 + $1.count }) 197 | } 198 | 199 | public func aggregatedTypeSet(matching bgp: [TriplePattern], in graph: Term, store: DiomedeQuadStore) throws -> TypeIDSet { 200 | guard let gid = try store.id(for: graph) else { 201 | throw DiomedeError.indexError 202 | } 203 | 204 | let typeTriples = bgp.filter { (t) in 205 | if case .bound(Term.rdf("type")) = t.predicate { 206 | return true 207 | } 208 | return false 209 | } 210 | let types = typeTriples.map { (t) -> Term? in 211 | let n = t.object 212 | if case .bound(let term) = n { 213 | return term 214 | } else { 215 | return nil 216 | } 217 | }.compactMap { $0 } 218 | 219 | let subset = try self.typeIDSet(matching: Set(types), in: graph, store: store) 220 | let matching = typeSets.filter { $0.isSuperset(of: subset) } 221 | let acs = matching.reduce(TypeIDSet(graph: gid, types: [])) { $0.aggregate($1) } 222 | return acs 223 | } 224 | 225 | public func typeIDSet(matching types: Set, in graph: Term, store: DiomedeQuadStore) throws -> TypeIDSet { 226 | var termIds = [UInt64]() 227 | try store.env.read { (txn) -> Int in 228 | for term in types { 229 | guard let id = try store.id(for: term, txn: txn) else { 230 | throw DiomedeError.nonExistentTermError 231 | } 232 | termIds.append(id) 233 | } 234 | return 0 235 | } 236 | 237 | guard let gid = try store.id(for: graph) else { 238 | throw DiomedeError.indexError 239 | } 240 | 241 | var subset = TypeIDSet(graph: gid, types: Set(termIds)) 242 | subset.addStar() 243 | // the counts don't matter in this TypeIDSet because it will only be used 244 | // in a subsequent code to match supersets (which will contain real counts of the data). 245 | return subset 246 | } 247 | 248 | // public func starCardinality(matching bgp: [TriplePattern], in graph: Term, store: DiomedeQuadStore) throws -> Double { 249 | // let subset = try self.typeIDSet(matching: bgp, in: graph, store: store) 250 | // let q = bgp 251 | // 252 | // var card = 0.0 253 | // let matching = typeSets.filter { $0.isSuperset(of: subset) } 254 | //// let subsetPreds = store.termIterator(fromIds: Array(subset.predicates)).map { $0.description }.sorted() 255 | //// print("\(matching.count) characteristic sets match: \(subsetPreds)") 256 | // for set in matching { 257 | //// let cs = TypeSet(set, from: store) 258 | //// print("matching set: \(cs)") 259 | // let distinct = Double(set.count) 260 | // var m = 1.0 261 | // var o = 1.0 262 | // for t in q { 263 | // let pred = t.predicate 264 | // let obj = t.object 265 | // if case .bound(let obj) = obj { 266 | // o = try Swift.min(o, self.selectivity(of: obj, given: pred, in: graph, store: store)) 267 | // } else if case .bound(let pred) = pred { 268 | // guard let pid = try store.id(for: pred) else { 269 | // throw DiomedeError.nonExistentTermError 270 | // } 271 | // let tm = Double(set.predCounts[pid]?.sum ?? 0) / distinct 272 | //// print("\(tm) <= \(t)") 273 | // m *= tm 274 | // } else { 275 | // // unbound predicate; sum up all the counts 276 | // let allPredCounts = set.predCounts.values.map { Double($0.sum) }.reduce(0.0) { $0 + $1 } 277 | // let tm = allPredCounts / distinct 278 | //// print("\(tm) <= \(t)") 279 | // m *= tm 280 | // } 281 | // } 282 | // let prod = distinct * m * o 283 | //// print("\(distinct) * \(m) * \(o) = \(prod)") 284 | // card += prod 285 | // } 286 | //// print("= \(card)") 287 | // return card 288 | // } 289 | 290 | } 291 | 292 | extension TypeDataSet: Sequence { 293 | public func makeIterator() -> AnyIterator { 294 | return AnyIterator(typeSets.makeIterator()) 295 | } 296 | } 297 | 298 | extension TypeSet: CustomDebugStringConvertible { 299 | public var debugDescription: String { 300 | return "TypeSet(\(count); \(types.sorted()))" 301 | } 302 | } 303 | 304 | extension TypeIDSet: CustomDebugStringConvertible { 305 | public var debugDescription: String { 306 | return "TypeIDSet(\(count); \(types.sorted()))" 307 | } 308 | } 309 | 310 | extension TypeDataSet: CustomDebugStringConvertible { 311 | public var debugDescription: String { 312 | var s = "Type Sets [\n" 313 | for set in typeSets.sorted(by: { (a, b) in return a.count > b.count }) { 314 | s += "\t\(set.debugDescription)\n" 315 | } 316 | s += "]\n" 317 | return s 318 | } 319 | } 320 | 321 | extension DiomedeQuadStore { 322 | public var hasTypeSets: Bool { 323 | let indexName = "typeSets" 324 | 325 | do { 326 | let databases = Set(try env.databases()) 327 | return databases.contains(indexName) 328 | } catch { 329 | return false 330 | } 331 | } 332 | 333 | public var typeSetsAreAccurate: Bool { 334 | guard hasTypeSets else { return false } 335 | let csHeader = "TypeSets-Last-Modified" 336 | let quadsHeader = "Quads-Last-Modified" 337 | if let csDate = self.read(mtimeHeader: csHeader), let quadsDate = self.read(mtimeHeader: quadsHeader) { 338 | return csDate >= quadsDate 339 | } 340 | return false 341 | } 342 | 343 | public func typeSets(for graph: Term) throws -> TypeDataSet { 344 | let indexName = "typeSets" 345 | 346 | guard let index = self.env.database(named: indexName) else { 347 | throw DiomedeError.indexError 348 | } 349 | guard let gid = try self.id(for: graph) else { 350 | throw DiomedeError.nonExistentTermError 351 | } 352 | 353 | let lower = [Int(gid), 0, 0].asData() 354 | let upper = [Int(gid+1), 0, 0].asData() 355 | 356 | var sets = [TypeIDSet]() 357 | try index.iterate(between: lower, and: upper) { (k, v) in 358 | let key = [Int].fromData(k) 359 | guard key[0] == gid else { 360 | return 361 | } 362 | let cs = try TypeIDSet.fromData(v, in: gid) 363 | sets.append(cs) 364 | } 365 | return try TypeDataSet(self, typeSets: sets) 366 | } 367 | 368 | public func dropTypeSets() throws { 369 | let indexName = "typeSets" 370 | let databases = Set(try env.databases()) 371 | if databases.contains(indexName) { 372 | // print("dropping \(indexName)...") 373 | if let index = self.env.database(named: indexName) { 374 | try index.drop() 375 | try self.touch(mtimeHeaders: ["TypeSets-Last-Modified"]) // update the last-modified timestamp 376 | } 377 | } else { 378 | // print("no-op") 379 | } 380 | } 381 | } 382 | 383 | extension TypeIDSet { 384 | public func asData() throws -> Data { 385 | // NOTE: this does not contain the graph ID, which is serialized in the key 386 | var value = [count] 387 | for type in types { 388 | value.append(Int(type)) 389 | } 390 | let valueData = value.asData() 391 | return valueData 392 | } 393 | 394 | public static func fromData(_ data: Data, in gid: TermID) throws -> Self { 395 | var values = [Int].fromData(data) 396 | let count = values[0] 397 | values.removeFirst() 398 | let types = Set(values.map { UInt64($0) }) 399 | let cs = TypeIDSet(graph: gid, types: types, count: count) 400 | return cs 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /Sources/diomede-cli/main.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | import Diomede 4 | import DiomedeQuadStore 5 | import SPARQLSyntax 6 | 7 | //func getCurrentTime() -> CFAbsoluteTime { 8 | // return CFAbsoluteTimeGetCurrent() 9 | //} 10 | // 11 | //func time(_ name: String, block: () throws -> ()) rethrows { 12 | // let start = getCurrentTime() 13 | // try block() 14 | // let end = getCurrentTime() 15 | // let elapsed = end - start 16 | // print("[\(name)] Elapsed time: \(elapsed)s") 17 | //} 18 | 19 | func humanReadable(bytes: Int) -> String{ 20 | var names = [" bytes", "kB", "MB", "GB"] 21 | var unit = names.remove(at: 0) 22 | var size = bytes 23 | while !names.isEmpty && size >= 1024 { 24 | unit = names.remove(at: 0) 25 | size /= 1024 26 | } 27 | return "\(size)\(unit)" 28 | } 29 | 30 | func printCharacteristicSets(for graph: Term, in dataset: CharacteristicDataSet, depth: Int = 0) { 31 | let sets = dataset.sets.sorted { $0.count >= $1.count } 32 | print("") 33 | let indent = String(repeating: " ", count: depth) 34 | for set in sets { 35 | print("\(indent)Characteristic Set: count = \(set.count)") 36 | let pairs = set.types.sorted { $0.value > $1.value } 37 | for (types, count) in pairs { 38 | let typeset = types.sorted().map { "\($0)" }.joined(separator: ", ") 39 | print(String(format: "\(indent) typeset = %6d \(typeset)", count)) 40 | } 41 | for pred in set.predicates.sorted() { 42 | let predCount = set.predCounts[pred]! 43 | let occurences = predCount.sum 44 | print(String(format: "\(indent) pred = %6d \(pred)", occurences)) 45 | } 46 | print("") 47 | } 48 | } 49 | 50 | func printTypeSets(for graph: Term, in dataset: TypeDataSet, depth: Int = 0) { 51 | let sets = dataset.sets.sorted { $0.count >= $1.count } 52 | print("") 53 | let indent = String(repeating: " ", count: depth) 54 | for set in sets { 55 | print("\(indent)Type Set: count = \(set.count)") 56 | for type in set.types.sorted() { 57 | print("\(indent) \(type)") 58 | } 59 | print("") 60 | } 61 | } 62 | 63 | let args = Array(CommandLine.arguments.dropFirst()) 64 | let pname = CommandLine.arguments[0] 65 | guard args.count >= 2 else { 66 | print(""" 67 | Usage: 68 | 69 | \(pname) DATABASE.db COMMAND [ARGUMENTS] 70 | 71 | Commands: 72 | 73 | create 74 | Create an empty quadstore database. 75 | 76 | stats 77 | Print metadata and statistics about the quadstore. 78 | 79 | terms 80 | Print all RDF terms in the quadstore with each term's 81 | associated term ID. 82 | 83 | hashes 84 | Print all the SHA256 hashes of all RDF terms in the 85 | quadstore with ID of the corresponding term. 86 | 87 | INDEXNAME 88 | (Where INDEXNAME is any permutation of "spog".) 89 | 90 | If INDEXNAME is a quad index available in the quadstore, 91 | prints all the quads in the quadstore in the index's 92 | sort order. 93 | 94 | Otherwise, an error is reported and a non-zero value is 95 | returned. 96 | 97 | addindex NAME 98 | Add an index to the quadstore. 99 | NAME may be one of: 100 | * a permutation of "spog" to add a quad ordering index 101 | * "cs" to add a Characteristic Sets index 102 | 103 | dropindex NAME 104 | Drop an index from the quadstore. 105 | 106 | graphs 107 | Print the IRIs of all graphs in the quadstore. 108 | 109 | graphterms GRAPH-IRI 110 | Print all the terms (used as either subject or object) 111 | in specified graph. 112 | 113 | triples GRAPH-IRI 114 | Print all the triples in the specified graph. 115 | 116 | quads 117 | Print all the quads in the quadstore. 118 | 119 | indexes 120 | Print the name of all indexes in the quadstore. 121 | 122 | bestIndex POSITION1 [POSITION2...] 123 | Print the name of the index best suited to match quads 124 | with the named positions bound. Position names are one 125 | of: "subject", "predicate", "object", or "graph". 126 | 127 | If no quad indexes are available, an error is reported 128 | and a non-zero value is returned. 129 | 130 | cs GRAPH-IRI 131 | Print the Characteristic Sets for the specified graph. 132 | 133 | prefix [clear | PREFIX IRI] 134 | 135 | If a PREFIX and IRI are given, adds them to the quadstore 136 | as a prefix (namespace, iri) pair. 137 | 138 | If the `clear` sub-command is given, all prefix 139 | pairs are removed. 140 | 141 | Otherwise, prints all prefix pairs using SPARQL syntax. 142 | 143 | """) 144 | 145 | exit(0) 146 | } 147 | 148 | 149 | //struct Stdout: TextOutputStream { 150 | // internal init() {} 151 | // 152 | // internal mutating func write(_ string: String) { 153 | // if string.isEmpty { return } 154 | // var string = string 155 | // _ = string.withUTF8 { utf8 in 156 | // fwrite(utf8.baseAddress!, 1, utf8.count, stdout) 157 | // } 158 | // } 159 | //} 160 | // 161 | //func printQuads(_ iter: AnyIterator) { 162 | // var out = Stdout() 163 | // let seq = AnySequence(iter) 164 | // for q in seq { 165 | // for t in q { 166 | // t.printNTriplesString(to: &out) 167 | // } 168 | // print(" .") 169 | // } 170 | //} 171 | 172 | let path = args[0] 173 | let op = args[1] 174 | 175 | if op == "create" { 176 | guard let _ = DiomedeQuadStore(path: path, create: true) else { 177 | print("Failed to create QuadStore") 178 | exit(1) 179 | } 180 | exit(0) 181 | } else if op == "upgrade" { 182 | guard let _ = DiomedeQuadStore.upgrade(path: path, configuration: nil) else { 183 | print("Failed to upgrade QuadStore") 184 | exit(1) 185 | } 186 | exit(0) 187 | } 188 | 189 | guard let e = Environment(path: path) else { 190 | print("failed to open environment: \(path)") 191 | exit(1) 192 | } 193 | 194 | let csDatabaseName = "characteristicSets" 195 | let tsDatabaseName = "typeSets" 196 | 197 | let indexes = e.database(named: DiomedeQuadStore.StaticDatabases.fullIndexes.rawValue)! 198 | var availableQuadIndexes = Set() 199 | try indexes.iterate { (k, v) in 200 | let name = try String.fromData(k) 201 | availableQuadIndexes.insert(name) 202 | } 203 | 204 | if op == "stats" { 205 | guard let qs = DiomedeQuadStore(path: path) else { 206 | print("Failed to construct quadstore") 207 | exit(1) 208 | } 209 | 210 | let stats = e.database(named: DiomedeQuadStore.StaticDatabases.stats.rawValue)! 211 | let graphs = e.database(named: DiomedeQuadStore.StaticDatabases.graphs.rawValue)! 212 | let versionHeaders = [ 213 | "Diomede-Version", 214 | "meta", "Last-Modified", 215 | "Quads-Last-Modified", 216 | "Index-Last-Modified", 217 | "Prefixes-Last-Modified", 218 | "CharacteristicSets-Last-Modified", 219 | ] 220 | let maxLength = versionHeaders.map { $0.count }.max() ?? 10 221 | print("Versions:") 222 | for k in (versionHeaders) { 223 | if let d = try stats.get(key: k) { 224 | let value = try String.fromData(d) 225 | if !value.isEmpty { 226 | print(" \(k.padding(toLength: maxLength+1, withPad: " ", startingAt: 0)): \(value)") 227 | } 228 | } 229 | } 230 | 231 | if let version = try qs.effectiveVersion() { 232 | print("Effective version: \(version)") 233 | } 234 | 235 | for k in ([DiomedeQuadStore.NextIDKey.term, DiomedeQuadStore.NextIDKey.quad]) { 236 | if let d = try stats.get(key: k.rawValue) { 237 | let value = Int.fromData(d) 238 | print("\(k): \(value)") 239 | } 240 | } 241 | let gcount = try graphs.count() 242 | print("Graphs: \(gcount)") 243 | 244 | let quads = e.database(named: "quads")! 245 | try e.read { (txn) -> Int in 246 | let count = quads.count(txn: txn) 247 | let bytes = quads.size(txn: txn) 248 | print("Quads: \(count) (\(humanReadable(bytes: bytes)))") 249 | return 0 250 | } 251 | 252 | let indexes = e.database(named: DiomedeQuadStore.StaticDatabases.fullIndexes.rawValue)! 253 | let indexCount = try indexes.count() 254 | if indexCount == 0 { 255 | print("No indexes") 256 | } else { 257 | print("Indexes:") 258 | let indexNames = qs.fullIndexes.keys.map { $0.rawValue }.sorted() 259 | if !indexNames.isEmpty { 260 | print(" - Quad Orderings:") 261 | for name in indexNames { 262 | try e.read { (txn) in 263 | guard let index = e.database(txn: txn, named: name) else { 264 | return 1 265 | } 266 | let bytes = index.size(txn: txn) 267 | print(" - \(name) (\(humanReadable(bytes: bytes)))") 268 | return 0 269 | } 270 | } 271 | } 272 | } 273 | 274 | let databases = Set(try e.databases()) 275 | if databases.contains(csDatabaseName) { 276 | if let db = e.database(named: csDatabaseName) { 277 | try e.read { (txn) in 278 | let bytes = db.size(txn: txn) 279 | print(" - Characteristic Sets (\(humanReadable(bytes: bytes)))") 280 | return 0 281 | } 282 | let count = try db.count() 283 | if gcount > 1 { 284 | let avg = count / gcount 285 | print(" - \(count) sets (~\(avg) per graph)") 286 | } else { 287 | print(" - \(count) sets") 288 | } 289 | } 290 | } 291 | 292 | if databases.contains(tsDatabaseName) { 293 | if let db = e.database(named: tsDatabaseName) { 294 | try e.read { (txn) in 295 | let bytes = db.size(txn: txn) 296 | print(" - Type Sets (\(humanReadable(bytes: bytes)))") 297 | return 0 298 | } 299 | let count = try db.count() 300 | if gcount > 1 { 301 | let avg = count / gcount 302 | print(" - \(count) sets (~\(avg) per graph)") 303 | } else { 304 | print(" - \(count) sets") 305 | } 306 | } 307 | } 308 | 309 | //} else if op == "load" || op == "import" { 310 | // let filename = args[2] 311 | // let url = URL(fileURLWithPath: filename) 312 | // guard let qs = DiomedeQuadStore(path: path) else { 313 | // fatalError("Failed to construct quadstore") 314 | // } 315 | // 316 | // 317 | // let parser = RDFParserCombined() 318 | // let graph = Term(iri: url.absoluteString) 319 | // var quads = [Quad]() 320 | // try parser.parse(file: url.path, base: graph.value) { (s, p, o) in 321 | // let q = Quad(subject: s, predicate: p, object: o, graph: graph) 322 | // quads.append(q) 323 | // } 324 | // 325 | // let now = UInt64(Date().timeIntervalSince1970) 326 | // try qs.load(version: now, quads: quads) 327 | //} else if op == "match" { 328 | // let line = args[2] 329 | // guard let qs = DiomedeQuadStore(path: path) else { 330 | // fatalError("Failed to construct quadstore") 331 | // } 332 | // let p = NTriplesPatternParser(reader: "") 333 | // guard let qp = p.parseQuadPattern(line: line) else { 334 | // fatalError("Bad quad pattern") 335 | // } 336 | // let i = try qs.quads(matching: qp) 337 | // printQuads(i) 338 | } else if op == "terms" { 339 | let i2t = e.database(named: DiomedeQuadStore.StaticDatabases.id_to_term.rawValue)! 340 | try i2t.unescapingIterate { (k, v) in 341 | let key = Int.fromData(k) 342 | let value = try Term.fromData(v) 343 | 344 | let term_hash = try value.asData().sha256() 345 | let term = value.debugDescription 346 | print("\(key): \(term) (\(term_hash._hexValue))") 347 | } 348 | } else if op == "hashes" { 349 | let t2i = e.database(named: DiomedeQuadStore.StaticDatabases.term_to_id.rawValue)! 350 | try t2i.iterate { (k, v) in 351 | let key = k 352 | let value = Int.fromData(v) 353 | print("\(key._hexValue): \(value)") 354 | } 355 | } else if let indexOrder = DiomedeQuadStore.IndexOrder(rawValue: op) { 356 | guard availableQuadIndexes.contains(op) else { 357 | print("Index \(op) not in quadstore") 358 | exit(1) 359 | } 360 | guard let qs = DiomedeQuadStore(path: path) else { 361 | print("Failed to construct quadstore") 362 | exit(1) 363 | } 364 | 365 | let i = try qs.quads(using: indexOrder) 366 | for q in i { 367 | print("\(q)") 368 | } 369 | } else if op == "addindex" { 370 | guard args.count > 2 else { 371 | print("Index name argument required") 372 | exit(1) 373 | } 374 | let name = args[2] 375 | guard let qs = DiomedeQuadStore(path: path) else { 376 | print("Failed to construct quadstore") 377 | exit(1) 378 | } 379 | 380 | if name == "cs" { 381 | print("Generating Characteristic Sets index") 382 | try qs.computeCharacteristicSets() 383 | } else if name == "ts" { 384 | print("Generating Type Sets index") 385 | try qs.computeCharacteristicSets(withTypeSets: true) 386 | } else { 387 | guard let indexOrder = DiomedeQuadStore.IndexOrder(rawValue: name) else { 388 | throw DiomedeError.indexError 389 | } 390 | print("Generating \(indexOrder.rawValue) index") 391 | try qs.addFullIndex(order: indexOrder) 392 | } 393 | } else if op == "dropindex" { 394 | guard args.count > 2 else { 395 | print("Index name argument required") 396 | exit(1) 397 | } 398 | let name = args[2] 399 | guard let qs = DiomedeQuadStore(path: path) else { 400 | print("Failed to construct quadstore") 401 | exit(1) 402 | } 403 | if name == "cs" { 404 | print("Dropping Characteristic Sets index") 405 | try qs.dropCharacteristicSets() 406 | try qs.dropTypeSets() 407 | } else if name == "ts" { 408 | print("Dropping Type Sets index") 409 | try qs.dropTypeSets() 410 | } else { 411 | if availableQuadIndexes.count == 1 { 412 | print("WARNING: Insert performance will degrade as a result of removing the only remaining quad index.") 413 | } 414 | 415 | guard let indexOrder = DiomedeQuadStore.IndexOrder(rawValue: name) else { 416 | throw DiomedeError.indexError 417 | } 418 | print("Dropping \(indexOrder.rawValue) index") 419 | try qs.dropFullIndex(order: indexOrder) 420 | } 421 | } else if op == "triples" { 422 | let line = args[2] 423 | guard let qs = DiomedeQuadStore(path: path) else { 424 | print("Failed to construct quadstore") 425 | exit(1) 426 | } 427 | let graph = Term(iri: line) 428 | var qp = QuadPattern.all 429 | qp.graph = .bound(graph) 430 | 431 | for q in try qs.quads(matching: qp) { 432 | let t = q.triple 433 | print(t) 434 | } 435 | } else if op == "quads" { 436 | guard let qs = DiomedeQuadStore(path: path) else { 437 | print("Failed to construct quadstore") 438 | exit(1) 439 | } 440 | let qp = QuadPattern.all 441 | let i = try qs.quads(matching: qp) 442 | for q in i { 443 | print(q.fullDescription) 444 | } 445 | } else if op == "quadids" { 446 | let quads = e.database(named: "quads")! 447 | try quads.iterate { (_, qidsData) in 448 | var tids = [Int]() 449 | let strideBy = qidsData.count / 4 450 | for i in stride(from: 0, to: qidsData.count, by: strideBy) { 451 | let data = qidsData[i..<(i+strideBy)] 452 | let tid = Int.fromData(data) 453 | tids.append(tid) 454 | } 455 | print(tids) 456 | } 457 | } else if op == "graphterms" { 458 | let line = args[2] 459 | guard let qs = DiomedeQuadStore(path: path) else { 460 | print("Failed to construct quadstore") 461 | exit(1) 462 | } 463 | let term = Term(iri: line) 464 | for o in qs.graphTerms(in: term) { 465 | print(o) 466 | } 467 | } else if op == "graphs" { 468 | guard let qs = DiomedeQuadStore(path: path) else { 469 | print("Failed to construct quadstore") 470 | exit(1) 471 | } 472 | for g in try qs.namedGraphs() { 473 | print(g.value) 474 | } 475 | } else if op == "indexes" { 476 | for name in availableQuadIndexes { 477 | print(name) 478 | } 479 | let databases = Set(try e.databases()) 480 | if databases.contains(csDatabaseName) { 481 | print("cs") 482 | } 483 | if databases.contains(tsDatabaseName) { 484 | print("ts") 485 | } 486 | } else if op == "bestIndex" { 487 | guard let qs = DiomedeQuadStore(path: path) else { 488 | print("Failed to construct quadstore") 489 | exit(1) 490 | } 491 | // -- bestIndex s p g 492 | let positions = args[2...].map { $0.lowercased().prefix(1) }.joined() 493 | var bound = Set() 494 | for p in args[2...] { 495 | switch p.lowercased() { 496 | case "s": 497 | bound.insert(0) 498 | case "p": 499 | bound.insert(1) 500 | case "o": 501 | bound.insert(2) 502 | case "g": 503 | bound.insert(3) 504 | default: 505 | break 506 | } 507 | } 508 | 509 | try e.read { (txn) -> Int in 510 | if let index = try qs._private_bestIndex(matchingBoundPositions: bound, txn: txn) { 511 | print("\(index.rawValue)") 512 | } else { 513 | print("No index available for <\(positions)>") 514 | exit(1) 515 | } 516 | return 0 517 | } 518 | 519 | } else if op == "verify" { 520 | guard let qs = DiomedeQuadStore(path: path) else { 521 | print("Failed to construct quadstore") 522 | exit(1) 523 | } 524 | try qs.verify() 525 | } else if op == "prefix" { 526 | guard let qs = DiomedeQuadStore(path: path) else { 527 | print("Failed to construct quadstore") 528 | exit(1) 529 | } 530 | 531 | if args.count == 3 && args[2] == "clear" { 532 | try qs.clearPrefixes() 533 | } else if args.count == 4 { 534 | let name = args[2] 535 | let iri = Term(iri: args[3]) 536 | try qs.addPrefix(name, for: iri) 537 | } else { 538 | let prefixes = try qs.prefixes() 539 | for (name, iri) in prefixes { 540 | let prefix = "\(name):".padding(toLength: max(16, name.count), withPad: " ", startingAt: 0) 541 | print("PREFIX \(prefix) \(iri)") 542 | } 543 | } 544 | } else if op == "cs" { 545 | guard let qs = DiomedeQuadStore(path: path) else { 546 | print("Failed to construct quadstore") 547 | exit(1) 548 | } 549 | 550 | let databases = Set(try e.databases()) 551 | if !databases.contains(csDatabaseName) { 552 | print("No Characteristic Sets index found") 553 | exit(1) 554 | } 555 | 556 | do { 557 | if qs.hasCharacteristicSets { 558 | print("Characteristic Sets are accurate: \(qs.characteristicSetsAreAccurate)") 559 | } 560 | if args.count <= 2 { 561 | var count = 0 562 | for graph in qs.graphs() { 563 | print("Graph: \(graph)") 564 | let dataset = try qs.characteristicSets(for: graph, includeTypeSets: qs.hasTypeSets) 565 | count += dataset.sets.count 566 | printCharacteristicSets(for: graph, in: dataset, depth: 1) 567 | } 568 | print("Total Number of Characteristic Sets: \(count) ") 569 | } else { 570 | let line = args[2] 571 | let graph = Term(iri: line) 572 | do { 573 | let dataset = try qs.characteristicSets(for: graph, includeTypeSets: qs.hasTypeSets) 574 | 575 | print("Graph: \(graph)") 576 | printCharacteristicSets(for: graph, in: dataset, depth: 1) 577 | print("Number of Characteristic Sets: \(dataset.sets.count) ") 578 | } catch DiomedeError.nonExistentTermError { 579 | print("No characteristic set found for graph \(graph)") 580 | } 581 | } 582 | } catch DiomedeError.indexError { 583 | print("No characteristic sets index found") 584 | } 585 | } else if op == "ts" { 586 | guard let qs = DiomedeQuadStore(path: path) else { 587 | print("Failed to construct quadstore") 588 | exit(1) 589 | } 590 | 591 | let databases = Set(try e.databases()) 592 | if !databases.contains(tsDatabaseName) { 593 | print("No Type Sets index found") 594 | exit(1) 595 | } 596 | 597 | do { 598 | if qs.hasTypeSets { 599 | print("Type Sets are accurate: \(qs.typeSetsAreAccurate)") 600 | } 601 | if args.count <= 2 { 602 | var count = 0 603 | for graph in qs.graphs() { 604 | print("Graph: \(graph)") 605 | let dataset = try qs.typeSets(for: graph) 606 | count += dataset.sets.count 607 | printTypeSets(for: graph, in: dataset, depth: 1) 608 | } 609 | print("Total Number of Type Sets: \(count) ") 610 | } else { 611 | let line = args[2] 612 | let graph = Term(iri: line) 613 | do { 614 | let dataset = try qs.typeSets(for: graph) 615 | 616 | print("Graph: \(graph)") 617 | printTypeSets(for: graph, in: dataset, depth: 1) 618 | print("Number of Type Sets: \(dataset.sets.count) ") 619 | } catch DiomedeError.nonExistentTermError { 620 | print("No type set found for graph \(graph)") 621 | } 622 | } 623 | } catch DiomedeError.indexError { 624 | print("No characteristic sets index found") 625 | } 626 | } else if op == "pred-card" { 627 | guard args.count > 3 else { 628 | print("Index name argument required") 629 | exit(1) 630 | } 631 | let graph = Term(iri: args[2]) 632 | let pred = Term(iri: args[3]) 633 | 634 | guard let qs = DiomedeQuadStore(path: path) else { 635 | print("Failed to construct quadstore") 636 | exit(1) 637 | } 638 | 639 | let databases = Set(try e.databases()) 640 | if !databases.contains(csDatabaseName) { 641 | print("No Characteristic Sets index found") 642 | exit(1) 643 | } 644 | 645 | do { 646 | var t1 = TriplePattern.all 647 | var q1 = QuadPattern.all 648 | t1.predicate = .bound(pred) 649 | 650 | q1.predicate = .bound(pred) 651 | q1.graph = .bound(graph) 652 | 653 | // try time("Estimated") { 654 | var estimatedCardinality = 0.0 655 | // for graph in qs.graphs() { 656 | let dataset = try qs.characteristicSets(for: graph) 657 | let c1 = try dataset.starCardinality(matching: [t1], in: graph, store: qs) 658 | estimatedCardinality += c1 659 | // } 660 | print("Estimated: \(estimatedCardinality)") 661 | // } 662 | 663 | // try time("Actual") { 664 | let actualCardinality = try qs.countQuads(matching: q1) 665 | print("Actual : \(actualCardinality)") 666 | // } 667 | } catch DiomedeError.indexError { 668 | print("No characteristic sets index found") 669 | } 670 | } 671 | 672 | -------------------------------------------------------------------------------- /Sources/DiomedeQuadStore/CharacteristicSets.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CharacteristicSets.swift 3 | // DiomedeQuadStore 4 | // 5 | // Created by Gregory Todd Williams on 5/27/20. 6 | // 7 | 8 | import Foundation 9 | import Diomede 10 | import SPARQLSyntax 11 | 12 | struct JoinSelectivities { 13 | var ss: Double 14 | var sp: Double 15 | var so: Double 16 | var pp: Double 17 | var po: Double 18 | var oo: Double 19 | } 20 | 21 | /// Statistics related to a predicate within the context of a Characteristic Set 22 | public struct PredicateCount: Codable { 23 | /// The total number of triples using this predicate within this Characteristic Set 24 | public var sum: Int 25 | 26 | /// The minimum number of triples with this predicate for any subject within this Characteristic Set 27 | public var min: Int 28 | 29 | /// The maximum number of triples with this predicate for any subject within this Characteristic Set 30 | public var max: Int 31 | 32 | // NOTE: whenever new variables are added here, they must be serialized and deserialized 33 | // in the CharacteristicIDSet extension below that implements asData()/fromData(). 34 | 35 | 36 | /// Update statistics for this predicate based on the observation of a CS star, with a specified number of triples using this predicate 37 | /// - Parameter count: The number of triples sharing a subject with this predicate observed in the data 38 | public mutating func addMultiplicity(_ count: Int) { 39 | self.sum += count 40 | self.min = Swift.min(self.min, count) 41 | self.max = Swift.max(self.max, count) 42 | } 43 | 44 | public mutating func formUnion(_ other: PredicateCount) { 45 | self.sum += other.sum 46 | self.min = Swift.min(self.min, other.min) 47 | self.max = Swift.max(self.max, other.max) 48 | } 49 | 50 | public func union(_ other: PredicateCount) -> PredicateCount { 51 | var u = PredicateCount(sum: 0, min: 0, max: 0) 52 | 53 | u.sum += self.sum 54 | u.sum += other.sum 55 | 56 | return u 57 | } 58 | } 59 | 60 | public struct CharacteristicIDSet: Codable { 61 | public typealias TermID = UInt64 62 | public var graph: TermID 63 | public var count: Int 64 | public var predCounts: [TermID: PredicateCount] 65 | public var types: [Set: Int] 66 | // NOTE: whenever new variables are added here, they must be serialized and deserialized 67 | // in the CharacteristicIDSet extension below that implements asData()/fromData(). 68 | 69 | public init(graph: TermID) { 70 | self.count = 0 71 | self.graph = graph 72 | self.predCounts = [:] 73 | self.types = [:] 74 | } 75 | 76 | public init(graph: TermID, predicates: Set, count: Int, predCounts: [TermID: PredicateCount], types: [Set: Int]) { 77 | self.count = count 78 | self.graph = graph 79 | self.predCounts = predCounts 80 | self.types = types 81 | } 82 | 83 | public mutating func formAggregation(_ other: CharacteristicIDSet) { 84 | self.count += other.count 85 | let sharedPredicates = self.predicates.intersection(other.predicates) 86 | var updatedPredCounts = [TermID : PredicateCount]() 87 | for tid in sharedPredicates { 88 | guard let thisPredCounts = self.predCounts[tid], 89 | let otherPredCounts = other.predCounts[tid] else { continue } 90 | updatedPredCounts[tid] = thisPredCounts.union(otherPredCounts) 91 | } 92 | self.predCounts = updatedPredCounts 93 | self.types.merge(other.types) { $0 + $1 } 94 | } 95 | 96 | public func aggregate(_ other: CharacteristicIDSet) -> CharacteristicIDSet { 97 | let count = self.count + other.count 98 | 99 | let sharedPredicates = self.predicates.intersection(other.predicates) 100 | var updatedPredCounts = [TermID : PredicateCount]() 101 | for tid in sharedPredicates { 102 | guard let thisPredCounts = self.predCounts[tid], 103 | let otherPredCounts = other.predCounts[tid] else { continue } 104 | updatedPredCounts[tid] = thisPredCounts.union(otherPredCounts) 105 | } 106 | 107 | let allTypes = types.merging(other.types) { $0 + $1 } 108 | return CharacteristicIDSet(graph: self.graph, predicates: sharedPredicates, count: count, predCounts: updatedPredCounts, types: allTypes) 109 | } 110 | 111 | public mutating func addStar(_ quadids: [[TermID]], withTypePredicateID typeID: TermID) { 112 | // caller is responsible for ensuring that all added stars have the same predicates 113 | self.count += 1 114 | let grouped = Dictionary(grouping: quadids, by: { $0[1] }) 115 | for (pid, quadids) in grouped { 116 | let count = quadids.count 117 | predCounts[pid, default: PredicateCount(sum: 0, min: Int.max, max: Int.min)].addMultiplicity(count) 118 | if pid == typeID { 119 | let starTypes = Set(quadids.map { $0[2] }) 120 | types[starTypes, default: 0] += 1 121 | } 122 | } 123 | } 124 | 125 | var predicates: Set { 126 | return Set(predCounts.keys) 127 | } 128 | 129 | func isSuperset(of subset: CharacteristicIDSet) -> Bool { 130 | return predicates.isSuperset(of: subset.predicates) 131 | } 132 | } 133 | 134 | public struct CharacteristicSet: Codable { 135 | public var count: Int 136 | public var predCounts: [Term: PredicateCount] 137 | public var types: [Set: Int] 138 | 139 | public init(_ cs: CharacteristicIDSet, from store: DiomedeQuadStore) { 140 | self.count = cs.count 141 | self.predCounts = [:] 142 | self.types = [:] 143 | for (tid, predcount) in cs.predCounts { 144 | let terms = store.termIterator(fromIds: [tid]) 145 | let term = terms.next()! 146 | self.predCounts[term] = predcount 147 | } 148 | for (tids, count) in cs.types { 149 | let terms = Set(store.termIterator(fromIds: Array(tids))) 150 | self.types[terms] = count 151 | } 152 | } 153 | 154 | public init(predicates: Set) { 155 | self.count = 0 156 | self.predCounts = Dictionary(uniqueKeysWithValues: predicates.map { ($0, PredicateCount(sum: 1, min: 1, max: 1)) }) 157 | self.types = [:] 158 | } 159 | 160 | public init(predicates: Set, count: Int, predCounts: [Term: PredicateCount], types: [Set: Int]) { 161 | self.count = count 162 | self.predCounts = predCounts 163 | self.types = types 164 | } 165 | 166 | public mutating func formAggregation(_ other: CharacteristicSet) { 167 | self.count += other.count 168 | let sharedPredicates = self.predicates.intersection(other.predicates) 169 | var updatedPredCounts = [Term : PredicateCount]() 170 | for tid in sharedPredicates { 171 | guard let thisPredCounts = self.predCounts[tid], 172 | let otherPredCounts = other.predCounts[tid] else { continue } 173 | updatedPredCounts[tid] = thisPredCounts.union(otherPredCounts) 174 | } 175 | self.predCounts = updatedPredCounts 176 | for (tids, count) in other.types { 177 | self.types[tids, default: 0] += count 178 | } 179 | } 180 | 181 | public func aggregate(_ other: CharacteristicSet) -> CharacteristicSet { 182 | let count = self.count + other.count 183 | 184 | let sharedPredicates = self.predicates.intersection(other.predicates) 185 | var updatedPredCounts = [Term : PredicateCount]() 186 | for tid in sharedPredicates { 187 | guard let thisPredCounts = self.predCounts[tid], 188 | let otherPredCounts = other.predCounts[tid] else { continue } 189 | updatedPredCounts[tid] = thisPredCounts.union(otherPredCounts) 190 | } 191 | 192 | let allTypes = types.merging(other.types) { $0 + $1 } 193 | return CharacteristicSet(predicates: sharedPredicates, count: count, predCounts: updatedPredCounts, types: allTypes) 194 | } 195 | 196 | public var predicates: Set { 197 | return Set(predCounts.keys) 198 | } 199 | 200 | public func isSuperset(of subset: CharacteristicSet) -> Bool { 201 | return predicates.isSuperset(of: subset.predicates) 202 | } 203 | } 204 | 205 | public struct CharacteristicDataSet { 206 | typealias TermID = UInt64 207 | var characteristicSets: [CharacteristicIDSet] 208 | var store: DiomedeQuadStore 209 | 210 | public var sets: [CharacteristicSet] { 211 | return characteristicSets.map { 212 | CharacteristicSet($0, from: store) 213 | } 214 | } 215 | 216 | public init(_ store: DiomedeQuadStore, characteristicSets: [CharacteristicIDSet]) throws { 217 | self.store = store 218 | self.characteristicSets = characteristicSets 219 | } 220 | 221 | public init(_ store: DiomedeQuadStore, in graph: Term) throws { 222 | self.store = store 223 | if let bestIndex = try store.bestIndex(matchingBoundPositions: [0, 3]) { 224 | let order = bestIndex.order() 225 | if order[0] == 3 && order[1] == 0 { 226 | // we can access triples sorted by subject, meaning we can pipeline the first grouping without keeping all triples in memory 227 | self.characteristicSets = try CharacteristicDataSet.generateCharacteristicSets_ordered(store: store, using: bestIndex, in: graph) 228 | return 229 | } 230 | } 231 | self.characteristicSets = try CharacteristicDataSet.generateCharacteristicSets_naive(store: store, in: graph) 232 | } 233 | 234 | static func generateCharacteristicSets_ordered(store: DiomedeQuadStore, using index: DiomedeQuadStore.IndexOrder, in graph: Term) throws -> [CharacteristicIDSet] { 235 | var characteristicSets = [CharacteristicIDSet]() 236 | var lastSubject: TermID? = nil 237 | var triples = [[TermID]]() 238 | var css = [Set: CharacteristicIDSet]() 239 | 240 | var qp = QuadPattern.all 241 | qp.graph = .bound(graph) 242 | 243 | guard case .bound(let graph) = qp.graph, let gid = try store.id(for: graph) else { 244 | throw DiomedeError.indexError 245 | } 246 | 247 | let typeid = (try? store.id(for: Term.rdf("type"))) ?? 0 248 | 249 | let quadIds = try store.quadIds(usingIndex: index, withPrefix: [gid]) 250 | for tids in quadIds { 251 | let t = tids[0..<3] 252 | let s = t[0] 253 | if let last = lastSubject, last != s { 254 | let predicates = triples.map { $0[1] } 255 | let set = Set(predicates) 256 | 257 | css[set, default: CharacteristicIDSet(graph: gid)].addStar(triples, withTypePredicateID: typeid) 258 | triples = [] 259 | } 260 | triples.append(Array(t)) 261 | lastSubject = s 262 | } 263 | 264 | // handle the remaining triples 265 | if !triples.isEmpty { 266 | let predicates = triples.map { $0[1] } 267 | let set = Set(predicates) 268 | 269 | css[set, default: CharacteristicIDSet(graph: gid)].addStar(triples, withTypePredicateID: typeid) 270 | } 271 | 272 | characteristicSets.append(contentsOf: css.values) 273 | return characteristicSets 274 | } 275 | 276 | static func generateCharacteristicSets_naive(store: DiomedeQuadStore, in graph: Term) throws -> [CharacteristicIDSet] { 277 | var characteristicSets = [CharacteristicIDSet]() 278 | var triples = [TermID: [[TermID]]]() 279 | var css = [Set: CharacteristicIDSet]() 280 | 281 | var qp = QuadPattern.all 282 | qp.graph = .bound(graph) 283 | 284 | for tids in try store.quadIds(matching: qp) { 285 | let t = tids[0..<3] 286 | let s = t[0] 287 | triples[s, default: []].append(Array(t)) 288 | } 289 | 290 | guard let gid = try store.id(for: graph) else { 291 | throw DiomedeError.indexError 292 | } 293 | 294 | let typeid = (try? store.id(for: Term.rdf("type"))) ?? 0 295 | 296 | for (_, triples) in triples { 297 | let predicates = triples.map { $0[1] } 298 | let set = Set(predicates) 299 | css[set, default: CharacteristicIDSet(graph: gid)].addStar(triples, withTypePredicateID: typeid) 300 | } 301 | 302 | characteristicSets.append(contentsOf: css.values) 303 | return characteristicSets 304 | } 305 | 306 | public func selectivity(of object: Term, given predicate: Node, in graph: Term, store: DiomedeQuadStore) throws -> Double { 307 | var pattern = QuadPattern.all 308 | pattern.graph = .bound(graph) 309 | pattern.predicate = predicate 310 | let p_count = try store.countQuads(matching: pattern) 311 | pattern.object = .bound(object) 312 | let op_count = try store.countQuads(matching: pattern) 313 | 314 | let s = Double(op_count) / Double(p_count) 315 | // print("selectivity of \(object) given \(predicate) in \(graph): \(s)") 316 | return s 317 | } 318 | 319 | public var instanceCount: Int { 320 | return characteristicSets.reduce(0, { $0 + $1.count }) 321 | } 322 | 323 | public func aggregatedCharacteristicSet(matching bgp: [TriplePattern], in graph: Term, store: DiomedeQuadStore) throws -> CharacteristicIDSet { 324 | guard let gid = try store.id(for: graph) else { 325 | throw DiomedeError.indexError 326 | } 327 | let subset = try self.characteristicIDSet(matching: bgp, in: graph, store: store) 328 | let matching = characteristicSets.filter { $0.isSuperset(of: subset) } 329 | guard let first = matching.first else { 330 | return CharacteristicIDSet(graph: gid) 331 | } 332 | 333 | let acs = matching.dropFirst().reduce(first) { $0.aggregate($1) } 334 | return acs 335 | } 336 | 337 | public func characteristicIDSet(matching bgp: [TriplePattern], in graph: Term, store: DiomedeQuadStore) throws -> CharacteristicIDSet { 338 | let q = bgp 339 | let sq = q.map { $0.predicate }.compactMap { (node) -> Term? in 340 | if case .bound(let term) = node { 341 | return term 342 | } else { 343 | return nil 344 | } 345 | } 346 | 347 | var termIds = [UInt64]() 348 | try store.env.read { (txn) -> Int in 349 | for term in sq { 350 | guard let id = try store.id(for: term, txn: txn) else { 351 | throw DiomedeError.nonExistentTermError 352 | } 353 | termIds.append(id) 354 | } 355 | return 0 356 | } 357 | 358 | guard let gid = try store.id(for: graph) else { 359 | throw DiomedeError.indexError 360 | } 361 | 362 | var subset = CharacteristicIDSet(graph: gid) 363 | subset.addStar(termIds.map { [0, $0, 0, 0] }, withTypePredicateID: 0) 364 | // the counts don't matter in this CharacteristicIDSet because it will only be used 365 | // in a subsequent code to match supersets (which will contain real counts of the data). 366 | return subset 367 | } 368 | 369 | public func starCardinality(matching bgp: [TriplePattern], in graph: Term, store: DiomedeQuadStore) throws -> Double { 370 | let subset = try self.characteristicIDSet(matching: bgp, in: graph, store: store) 371 | let q = bgp 372 | 373 | var card = 0.0 374 | let matching = characteristicSets.filter { $0.isSuperset(of: subset) } 375 | // let subsetPreds = store.termIterator(fromIds: Array(subset.predicates)).map { $0.description }.sorted() 376 | // print("\(matching.count) characteristic sets match: \(subsetPreds)") 377 | for set in matching { 378 | // let cs = CharacteristicSet(set, from: store) 379 | // print("matching set: \(cs)") 380 | let distinct = Double(set.count) 381 | var m = 1.0 382 | var o = 1.0 383 | for t in q { 384 | let pred = t.predicate 385 | let obj = t.object 386 | if case .bound(let obj) = obj { 387 | o = try Swift.min(o, self.selectivity(of: obj, given: pred, in: graph, store: store)) 388 | } else if case .bound(let pred) = pred { 389 | guard let pid = try store.id(for: pred) else { 390 | throw DiomedeError.nonExistentTermError 391 | } 392 | let tm = Double(set.predCounts[pid]?.sum ?? 0) / distinct 393 | // print("\(tm) <= \(t)") 394 | m *= tm 395 | } else { 396 | // unbound predicate; sum up all the counts 397 | let allPredCounts = set.predCounts.values.map { Double($0.sum) }.reduce(0.0) { $0 + $1 } 398 | let tm = allPredCounts / distinct 399 | // print("\(tm) <= \(t)") 400 | m *= tm 401 | } 402 | } 403 | let prod = distinct * m * o 404 | // print("\(distinct) * \(m) * \(o) = \(prod)") 405 | card += prod 406 | } 407 | // print("= \(card)") 408 | return card 409 | } 410 | 411 | } 412 | 413 | extension CharacteristicDataSet: Sequence { 414 | public func makeIterator() -> AnyIterator { 415 | return AnyIterator(characteristicSets.makeIterator()) 416 | } 417 | } 418 | 419 | extension CharacteristicSet: CustomDebugStringConvertible { 420 | public var debugDescription: String { 421 | return "CharacteristicSet(\(count); \(predicates.sorted()))" 422 | } 423 | } 424 | 425 | extension CharacteristicIDSet: CustomDebugStringConvertible { 426 | public var debugDescription: String { 427 | return "CharacteristicIDSet(\(count); \(types); \(predicates.sorted()))" 428 | } 429 | } 430 | 431 | extension CharacteristicDataSet: CustomDebugStringConvertible { 432 | public var debugDescription: String { 433 | var s = "Characteristic Sets [\n" 434 | for set in characteristicSets.sorted(by: { (a, b) in return a.count > b.count }) { 435 | s += "\t\(set.debugDescription)\n" 436 | } 437 | s += "]\n" 438 | return s 439 | } 440 | } 441 | 442 | extension DiomedeQuadStore { 443 | public var hasCharacteristicSets: Bool { 444 | let indexName = "characteristicSets" 445 | 446 | do { 447 | let databases = Set(try env.databases()) 448 | return databases.contains(indexName) 449 | } catch { 450 | return false 451 | } 452 | } 453 | 454 | public var characteristicSetsAreAccurate: Bool { 455 | guard hasCharacteristicSets else { return false } 456 | let csHeader = "CharacteristicSets-Last-Modified" 457 | let quadsHeader = "Quads-Last-Modified" 458 | if let csDate = self.read(mtimeHeader: csHeader), let quadsDate = self.read(mtimeHeader: quadsHeader) { 459 | return csDate >= quadsDate 460 | } 461 | return false 462 | } 463 | 464 | public func characteristicSets(for graph: Term, includeTypeSets: Bool = false) throws -> CharacteristicDataSet { 465 | let csIndexName = "characteristicSets" 466 | let typeIndexName = "typeSets" 467 | 468 | guard let index = self.env.database(named: csIndexName) else { 469 | throw DiomedeError.indexError 470 | } 471 | guard let gid = try self.id(for: graph) else { 472 | throw DiomedeError.nonExistentTermError 473 | } 474 | 475 | let lower = [Int(gid), 0].asData() 476 | let upper = [Int(gid+1), 0].asData() 477 | 478 | var setsWithBounds = [(CharacteristicIDSet, Data, Data)]() 479 | try index.iterate(between: lower, and: upper) { (k, v) in 480 | let key = [Int].fromData(k) 481 | guard key[0] == gid else { 482 | return 483 | } 484 | let cs = try CharacteristicIDSet.fromData(v, in: gid) 485 | let i = key[1] 486 | let lower = [Int(gid), i, 0].asData() 487 | let upper = [Int(gid), i+1, 0].asData() 488 | setsWithBounds.append((cs, lower, upper)) // these are the bounds to look up corresponding Type Set records for this CS 489 | } 490 | 491 | if includeTypeSets { 492 | guard let typeIndex = self.env.database(named: typeIndexName) else { 493 | throw DiomedeError.indexError 494 | } 495 | var sets = [CharacteristicIDSet]() 496 | for (cs, lower, upper) in setsWithBounds { 497 | var cs = cs 498 | try typeIndex.iterate(between: lower, and: upper) { (k, v) in 499 | let key = [Int].fromData(k) 500 | guard key[0] == gid else { 501 | return 502 | } 503 | let ts = try TypeIDSet.fromData(v, in: gid) 504 | cs.types[ts.types] = ts.count 505 | } 506 | sets.append(cs) 507 | } 508 | return try CharacteristicDataSet(self, characteristicSets: sets) 509 | } else { 510 | let sets = setsWithBounds.map { $0.0 } 511 | return try CharacteristicDataSet(self, characteristicSets: sets) 512 | } 513 | } 514 | 515 | public func dropCharacteristicSets() throws { 516 | let indexName = "characteristicSets" 517 | let databases = Set(try env.databases()) 518 | if databases.contains(indexName) { 519 | // print("dropping \(indexName)...") 520 | if let index = self.env.database(named: indexName) { 521 | try index.drop() 522 | try self.touch(mtimeHeaders: ["CharacteristicSets-Last-Modified"]) // update the last-modified timestamp 523 | } 524 | } else { 525 | // print("no-op") 526 | } 527 | } 528 | 529 | public func computeCharacteristicSets(withTypeSets: Bool = false) throws { 530 | let csIndexName = "characteristicSets" 531 | let databases = Set(try env.databases()) 532 | 533 | let typeIndexName = "typeSets" 534 | 535 | if databases.contains(csIndexName) { 536 | guard let index = self.env.database(named: csIndexName) else { 537 | throw DiomedeError.indexError 538 | } 539 | try index.clear() 540 | 541 | if databases.contains(typeIndexName) { 542 | guard let index = self.env.database(named: typeIndexName) else { 543 | throw DiomedeError.indexError 544 | } 545 | try index.clear() 546 | } else if withTypeSets { 547 | try self.write { (txn) -> Int in 548 | try self.env.createDatabase(txn: txn, named: typeIndexName) 549 | return 0 550 | } 551 | } 552 | } else { 553 | try self.write { (txn) -> Int in 554 | try self.env.createDatabase(txn: txn, named: csIndexName) 555 | if withTypeSets { 556 | try self.env.createDatabase(txn: txn, named: typeIndexName) 557 | } 558 | return 0 559 | } 560 | } 561 | 562 | for graph in self.graphs() { 563 | let sets = try CharacteristicDataSet(self, in: graph) 564 | guard let gid = try self.id(for: graph) else { 565 | throw DiomedeError.nonExistentTermError 566 | } 567 | 568 | var csPairs = [(Data, Data)]() 569 | var typePairs = [(Data, Data)]() 570 | for (i, cs) in sets.enumerated() { 571 | let key = [Int(gid), i] 572 | let keyData = key.asData() 573 | let valueData = try cs.asData() 574 | csPairs.append((keyData, valueData)) 575 | if withTypeSets { 576 | let typeCombinations = cs.types.keys 577 | for (j, tc) in typeCombinations.enumerated() { 578 | let count = cs.types[tc]! 579 | let subkey = key + [j] 580 | let subKeyData = subkey.asData() 581 | 582 | let tids = TypeIDSet(graph: gid, types: tc, count: count) 583 | let typesData = try tids.asData() 584 | typePairs.append((subKeyData, typesData)) 585 | 586 | 587 | } 588 | } 589 | } 590 | 591 | try self.write(mtimeHeaders: ["CharacteristicSets-Last-Modified"]) { (txn) -> Int in 592 | let csIndex = self.env.database(txn: txn, named: csIndexName)! 593 | try csIndex.insert(txn: txn, uniqueKeysWithValues: csPairs) 594 | if withTypeSets { 595 | let typeIndex = self.env.database(txn: txn, named: typeIndexName)! 596 | try typeIndex.insert(txn: txn, uniqueKeysWithValues: typePairs) 597 | } 598 | return 0 599 | } 600 | } 601 | } 602 | } 603 | 604 | extension CharacteristicIDSet { 605 | public func asData() throws -> Data { 606 | // NOTE: this does not contain the graph ID, which is serialized in the key 607 | var value = [count] 608 | for (pred, count) in predCounts { 609 | value.append(contentsOf: [Int(pred), count.sum, count.min, count.max]) 610 | } 611 | let valueData = value.asData() 612 | return valueData 613 | } 614 | 615 | public static func fromData(_ data: Data, in gid: TermID) throws -> Self { 616 | var values = [Int].fromData(data) 617 | let count = values[0] 618 | values.removeFirst() 619 | let pairs = stride(from: 0, to: values.endIndex, by: 4).map { 620 | (UInt64(values[$0]), PredicateCount(sum: values[$0.advanced(by: 1)], min: values[$0.advanced(by: 2)], max: values[$0.advanced(by: 3)])) 621 | } 622 | let predCounts = Dictionary(uniqueKeysWithValues: pairs) 623 | let preds = Set(predCounts.keys) 624 | let cs = CharacteristicIDSet(graph: gid, predicates: preds, count: count, predCounts: predCounts, types: [:]) 625 | return cs 626 | } 627 | } 628 | -------------------------------------------------------------------------------- /Sources/Diomede/Diomede.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | import LMDB 3 | 4 | public enum DiomedeError: Error { 5 | case unknownError 6 | case encodingError 7 | case databaseOpenError 8 | case cursorOpenError(Int32) 9 | case cursorError 10 | case insertError(String) 11 | case mapFullError 12 | case getError 13 | case deleteError 14 | case indexError 15 | case nonExistentTermError 16 | case transactionError(Int32) 17 | } 18 | 19 | public struct DiomedeConfiguration { 20 | public static var `default` = DiomedeConfiguration( 21 | mapSize: 4_096_000_000, 22 | maxDatabases: 256, 23 | flags: MDB_NOSYNC, // | MDB_NOTLS 24 | mode: 0o0640 25 | ) 26 | 27 | public var mapSize: Int 28 | public var maxDatabases: UInt32 29 | public var flags: Int32 30 | public var mode: mdb_mode_t 31 | public init(mapSize: Int, maxDatabases: UInt32, flags: Int32, mode: mdb_mode_t) { 32 | self.mapSize = mapSize 33 | self.maxDatabases = maxDatabases 34 | self.flags = flags 35 | self.mode = mode 36 | } 37 | } 38 | 39 | public class Environment { 40 | var env : OpaquePointer? 41 | 42 | public init?(path: String, configuration _cfg: DiomedeConfiguration? = nil) { 43 | let cfg = _cfg ?? DiomedeConfiguration.default 44 | 45 | env = nil 46 | if (mdb_env_create(&env) != 0) { 47 | return nil 48 | } 49 | 50 | if (mdb_env_set_mapsize(env, cfg.mapSize) != 0) { 51 | mdb_env_close(env) 52 | env = nil 53 | return nil 54 | } 55 | 56 | if (mdb_env_set_maxdbs(env, cfg.maxDatabases) != 0) { 57 | mdb_env_close(env) 58 | env = nil 59 | return nil 60 | } 61 | 62 | let open_rc = mdb_env_open(env, path, UInt32(cfg.flags), cfg.mode) 63 | if (open_rc != 0) { 64 | print("*** \(String(cString: mdb_strerror(open_rc)))") 65 | mdb_env_close(env) 66 | env = nil 67 | return nil 68 | } 69 | 70 | var cleared: Int32 = 0 71 | let rc = mdb_reader_check(env, &cleared) 72 | if rc == 0 { 73 | // print("mdb_reader_check cleared \(cleared) slots") 74 | } else { 75 | print("mdb_reader_check returned \(rc)") 76 | } 77 | } 78 | 79 | deinit { 80 | if env != nil { 81 | mdb_env_close(env) 82 | } 83 | } 84 | 85 | public func read(handler: (OpaquePointer) throws -> Int) throws { 86 | try run(flags: UInt32(MDB_RDONLY), handler: handler) 87 | } 88 | 89 | public func write(handler: (OpaquePointer) throws -> Int) throws { 90 | try run(flags: 0, handler: handler) 91 | } 92 | 93 | func run(flags: UInt32, handler: (OpaquePointer) throws -> Int) rethrows { 94 | var txn : OpaquePointer? = nil 95 | let rc = mdb_txn_begin(env, nil, flags, &txn) 96 | if (rc == 0) { 97 | // let txid = mdb_txn_id(txn) 98 | // print("BEGIN \(txid)") 99 | if let txn = txn { 100 | do { 101 | let r = try handler(txn) 102 | if (r == 0) { 103 | let read_only = (UInt32(MDB_RDONLY) & flags) != 0 104 | if read_only { 105 | mdb_txn_commit(txn) 106 | // print("ROLLBACK \(txid)") 107 | } else { 108 | mdb_txn_commit(txn) 109 | // print("COMMIT \(txid)") 110 | } 111 | } else { 112 | mdb_txn_abort(txn) 113 | // print("ROLLBACK \(txid)") 114 | } 115 | } catch let e { 116 | mdb_txn_abort(txn) 117 | throw e 118 | } 119 | } else { 120 | fatalError() 121 | } 122 | } else { 123 | print("mdb_txn_begin returned \(rc)") 124 | } 125 | } 126 | 127 | public func databases() throws -> [String] { 128 | var names = [String]() 129 | try self.read { (txn) throws -> Int in 130 | // print("databases() read") 131 | var dbi: MDB_dbi = 0 132 | let r = withUnsafeMutablePointer(to: &dbi) { (dbip) -> Int in 133 | if (mdb_dbi_open(txn, nil, 0, dbip) != 0) { 134 | print("Failed to open database") 135 | return 1 136 | } 137 | return 0 138 | } 139 | if (r != 0) { 140 | throw DiomedeError.databaseOpenError 141 | } 142 | 143 | var key = MDB_val(mv_size: 0, mv_data: nil) 144 | var data = MDB_val(mv_size: 0, mv_data: nil) 145 | 146 | var cursor: OpaquePointer? 147 | let rc = mdb_cursor_open(txn, dbi, &cursor) 148 | guard (rc == 0) else { 149 | throw DiomedeError.cursorOpenError(rc) 150 | } 151 | defer { mdb_cursor_close(cursor) } 152 | 153 | var op = MDB_FIRST 154 | while (mdb_cursor_get(cursor, &key, &data, op) == 0) { 155 | op = MDB_NEXT 156 | let data = Data(bytes: key.mv_data, count: key.mv_size) 157 | if let name = String(data: data, encoding: .utf8) { 158 | names.append(name) 159 | } 160 | } 161 | return 0 162 | } 163 | return names 164 | } 165 | 166 | func createDatabase(named name: String) throws { 167 | try self.write { (txn) -> Int in 168 | var dbi: MDB_dbi = 0 169 | if (mdb_dbi_open(txn, name, UInt32(MDB_CREATE), &dbi) != 0) { 170 | throw DiomedeError.databaseOpenError 171 | } 172 | return 0 173 | } 174 | } 175 | 176 | public func createDatabase(txn: OpaquePointer, named name: String) throws { 177 | var dbi: MDB_dbi = 0 178 | if (mdb_dbi_open(txn, name, UInt32(MDB_CREATE), &dbi) != 0) { 179 | throw DiomedeError.databaseOpenError 180 | } 181 | } 182 | 183 | public func createDatabase(txn: OpaquePointer, named name: String, withSortedKeysAndValues keysAndValues: S) throws where S : Sequence, S.Element == (K, V) { 184 | var dbi: MDB_dbi = 0 185 | if (mdb_dbi_open(txn, name, UInt32(MDB_CREATE), &dbi) != 0) { 186 | throw DiomedeError.databaseOpenError 187 | } 188 | let d = Database(environment: self, name: name, dbi: dbi) 189 | try d.bulkInsert(txn: txn, uniqueKeysWithValues: keysAndValues) 190 | } 191 | 192 | public func dropDatabase(txn: OpaquePointer, named name: String) throws { 193 | guard let db = self.database(named: name) else { 194 | throw DiomedeError.databaseOpenError 195 | } 196 | mdb_drop(txn, db.dbi, 1) 197 | } 198 | 199 | public func database(named name: String) -> Database? { 200 | let d = Database(environment: self, name: name) 201 | return d 202 | } 203 | 204 | public func database(txn: OpaquePointer, named name: String) -> Database? { 205 | let d = Database(txn: txn, environment: self, name: name) 206 | return d 207 | } 208 | 209 | // public class _Cursor: IteratorProtocol { 210 | // public typealias Element = (OpaquePointer, Data, Data) 211 | // var txn : OpaquePointer 212 | // var cursor: OpaquePointer 213 | // var key : MDB_val 214 | // var data : MDB_val 215 | // var upperBound : Data? 216 | // var inclusive : Bool 217 | // var rc: Int32 218 | // 219 | // init?(txn : OpaquePointer, cursor: OpaquePointer, lowerBound: DataEncodable? = nil, upperBound: DataEncodable? = nil, inclusive: Bool = false) { 220 | // self.txn = txn 221 | // self.cursor = cursor 222 | // self.key = MDB_val(mv_size: 0, mv_data: nil) 223 | // self.data = MDB_val(mv_size: 0, mv_data: nil) 224 | // self.upperBound = nil 225 | // self.inclusive = inclusive 226 | // self.rc = 0 227 | // 228 | // var lower: Data? = nil 229 | // do { 230 | // self.upperBound = try upperBound?.asData() 231 | // lower = try lowerBound?.asData() 232 | // 233 | // if let lower = lower { 234 | // 235 | // lower.withUnsafeBytes { (lowerPtr) in 236 | // self.key = MDB_val(mv_size: lower.count, mv_data: UnsafeMutableRawPointer(mutating: lowerPtr.baseAddress)) 237 | // } 238 | // self.rc = mdb_cursor_get(cursor, &self.key, &self.data, MDB_SET_RANGE) 239 | // } else { 240 | // self.rc = mdb_cursor_get(cursor, &self.key, &self.data, MDB_FIRST) 241 | // } 242 | // } catch { 243 | // return nil 244 | // } 245 | // } 246 | // 247 | // deinit { 248 | // let txid = mdb_txn_id(txn) 249 | // mdb_cursor_close(cursor) 250 | // mdb_txn_commit(txn) 251 | // print("COMMIT \(txid)") 252 | // 253 | // let env = mdb_txn_env(txn) 254 | // var cleared: Int32 = 0 255 | // let rc = mdb_reader_check(env, &cleared) 256 | // if rc == 0 { 257 | // print("mdb_reader_check cleared \(cleared) slots") 258 | // } else { 259 | // print("mdb_reader_check returned \(rc)") 260 | // } 261 | // } 262 | // 263 | // public func next() -> (OpaquePointer, Data, Data)? { 264 | // guard self.rc == 0 else { 265 | // return nil 266 | // } 267 | // let keyData = Data(bytes: key.mv_data, count: key.mv_size) 268 | // let valueData = Data(bytes: data.mv_data, count: data.mv_size) 269 | // let pair = (txn, keyData, valueData) 270 | // defer { 271 | // self.rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 272 | // } 273 | // if let upper = self.upperBound { 274 | // let stop = upper.withUnsafeBytes { (upperPtr) -> Bool in 275 | // var upperBound = MDB_val(mv_size: upper.count, mv_data: UnsafeMutableRawPointer(mutating: upperPtr.baseAddress)) 276 | // let cmp = mdb_cmp(txn, mdb_cursor_dbi(self.cursor), &self.key, &upperBound) 277 | // if (cmp > 0) { 278 | // return true 279 | // } 280 | // return false 281 | // } 282 | // if stop { 283 | // return nil 284 | // } 285 | // return pair 286 | // } else { 287 | // return pair 288 | // } 289 | // } 290 | // } 291 | 292 | public class Database: CustomStringConvertible { 293 | public typealias Iterator = AnyIterator<(Data, Data)> 294 | 295 | var env: Environment 296 | var dbi: MDB_dbi = 0 297 | var name: String 298 | 299 | public var description: String { 300 | return "Database(\(self.dbi))" 301 | } 302 | 303 | init(environment: Environment, name: String, dbi: MDB_dbi) { 304 | self.env = environment 305 | self.name = name 306 | self.dbi = dbi 307 | } 308 | 309 | init?(txn: OpaquePointer, environment: Environment, name: String) { 310 | self.env = environment 311 | self.name = name 312 | do { 313 | var dbi: MDB_dbi = 0 314 | let r = withUnsafeMutablePointer(to: &dbi) { (dbip) -> Int in 315 | let rc = mdb_dbi_open(txn, name, 0, dbip) 316 | if (rc != 0) { 317 | print("mdb_dbi_open returned [\(rc)] for database \(name)") 318 | return 1 319 | } 320 | return 0 321 | } 322 | if (r != 0) { 323 | print("*** databaseOpenError") 324 | throw DiomedeError.databaseOpenError 325 | } 326 | // print("loaded dbi \(dbi)") 327 | self.dbi = dbi 328 | } catch { 329 | return nil 330 | } 331 | guard dbi != 0 else { 332 | return nil 333 | } 334 | } 335 | 336 | init?(environment: Environment, name: String) { 337 | self.env = environment 338 | self.name = name 339 | do { 340 | try env.read { (txn) throws -> Int in 341 | var dbi: MDB_dbi = 0 342 | let r = withUnsafeMutablePointer(to: &dbi) { (dbip) -> Int in 343 | let rc = mdb_dbi_open(txn, name, 0, dbip) 344 | if (rc != 0) { 345 | print("mdb_dbi_open returned [\(rc)] for database \(name)") 346 | return 1 347 | } 348 | return 0 349 | } 350 | if (r != 0) { 351 | print("*** databaseOpenError") 352 | throw DiomedeError.databaseOpenError 353 | } 354 | // print("loaded dbi \(dbi)") 355 | self.dbi = dbi 356 | return 0 357 | } 358 | } catch { 359 | return nil 360 | } 361 | guard dbi != 0 else { 362 | return nil 363 | } 364 | } 365 | 366 | public func unescapingIterator(txn: OpaquePointer, handler: (Data, Data) throws -> ()) throws { 367 | var cursor: OpaquePointer? 368 | var rc = mdb_cursor_open(txn, self.dbi, &cursor) 369 | guard (rc == 0) else { 370 | throw DiomedeError.cursorOpenError(rc) 371 | } 372 | defer { mdb_cursor_close(cursor) } 373 | 374 | var key = MDB_val(mv_size: 0, mv_data: nil) 375 | var data = MDB_val(mv_size: 0, mv_data: nil) 376 | rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST) 377 | while (rc == 0) { 378 | let keyData = Data(bytesNoCopy: key.mv_data, count: key.mv_size, deallocator: .none) 379 | let valueData = Data(bytesNoCopy: data.mv_data, count: data.mv_size, deallocator: .none) 380 | defer { 381 | rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 382 | } 383 | try handler(keyData, valueData) 384 | } 385 | } 386 | 387 | public func unescapingIterator(handler: (Data, Data) throws -> ()) throws { 388 | try self.env.read { (txn) throws -> Int in 389 | try self.unescapingIterator(txn: txn, handler: handler) 390 | return 0 391 | } 392 | } 393 | 394 | public func iterator(handler: @escaping (OpaquePointer, Data, Data) -> T) throws -> AnyIterator { 395 | var results = [T]() 396 | try self.env.read { (txn) throws -> Int in 397 | var cursor: OpaquePointer? 398 | var rc = mdb_cursor_open(txn, self.dbi, &cursor) 399 | guard (rc == 0) else { 400 | throw DiomedeError.cursorOpenError(rc) 401 | } 402 | defer { mdb_cursor_close(cursor) } 403 | 404 | var key = MDB_val(mv_size: 0, mv_data: nil) 405 | var data = MDB_val(mv_size: 0, mv_data: nil) 406 | rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST) 407 | while (rc == 0) { 408 | let keyData = Data(bytes: key.mv_data, count: key.mv_size) 409 | let valueData = Data(bytes: data.mv_data, count: data.mv_size) 410 | defer { 411 | rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 412 | } 413 | let element = handler(txn, keyData, valueData) 414 | results.append(element) 415 | } 416 | return 0 417 | } 418 | 419 | return AnyIterator(results.makeIterator()) 420 | } 421 | 422 | public func count(between lower: Data, and upper: Data, inclusive: Bool = false) throws -> Int { 423 | var count = 0 424 | try self.env.read { (txn) throws -> Int in 425 | var cursor: OpaquePointer? 426 | var rc = mdb_cursor_open(txn, self.dbi, &cursor) 427 | guard (rc == 0) else { 428 | throw DiomedeError.cursorOpenError(rc) 429 | } 430 | defer { mdb_cursor_close(cursor) } 431 | 432 | var key = MDB_val(mv_size: 0, mv_data: nil) 433 | var data = MDB_val(mv_size: 0, mv_data: nil) 434 | var upperBound = MDB_val(mv_size: 0, mv_data: nil) 435 | upper.withUnsafeBytes { (upperPtr) in 436 | upperBound = MDB_val(mv_size: upper.count, mv_data: UnsafeMutableRawPointer(mutating: upperPtr.baseAddress)) 437 | } 438 | 439 | lower.withUnsafeBytes { (lowerPtr) in 440 | key = MDB_val(mv_size: lower.count, mv_data: UnsafeMutableRawPointer(mutating: lowerPtr.baseAddress)) 441 | } 442 | rc = mdb_cursor_get(cursor, &key, &data, MDB_SET_RANGE) 443 | while (rc == 0) { 444 | defer { 445 | rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 446 | } 447 | 448 | let cmp = mdb_cmp(txn, mdb_cursor_dbi(cursor), &key, &upperBound) 449 | if inclusive { 450 | if (cmp > 0) { 451 | break 452 | } 453 | } else { 454 | if (cmp >= 0) { 455 | break 456 | } 457 | } 458 | count += 1 459 | } 460 | return 0 461 | } 462 | return count 463 | } 464 | 465 | public func iterator(between lower: Data, and upper: Data, inclusive: Bool = false, handler: @escaping (OpaquePointer, Data, Data) -> T) throws -> AnyIterator { 466 | var results = [T]() 467 | try self.env.read { (txn) throws -> Int in 468 | var cursor: OpaquePointer? 469 | var rc = mdb_cursor_open(txn, self.dbi, &cursor) 470 | guard (rc == 0) else { 471 | throw DiomedeError.cursorOpenError(rc) 472 | } 473 | defer { mdb_cursor_close(cursor) } 474 | 475 | var key = MDB_val(mv_size: 0, mv_data: nil) 476 | var data = MDB_val(mv_size: 0, mv_data: nil) 477 | var upperBound = MDB_val(mv_size: 0, mv_data: nil) 478 | upper.withUnsafeBytes { (upperPtr) in 479 | upperBound = MDB_val(mv_size: upper.count, mv_data: UnsafeMutableRawPointer(mutating: upperPtr.baseAddress)) 480 | } 481 | lower.withUnsafeBytes { (lowerPtr) in 482 | key = MDB_val(mv_size: lower.count, mv_data: UnsafeMutableRawPointer(mutating: lowerPtr.baseAddress)) 483 | } 484 | rc = mdb_cursor_get(cursor, &key, &data, MDB_SET_RANGE) 485 | while (rc == 0) { 486 | let keyData = Data(bytes: key.mv_data, count: key.mv_size) 487 | let valueData = Data(bytes: data.mv_data, count: data.mv_size) 488 | defer { 489 | rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 490 | } 491 | 492 | let stop = upper.withUnsafeBytes { (upperPtr) -> Bool in 493 | let cmp = mdb_cmp(txn, mdb_cursor_dbi(cursor), &key, &upperBound) 494 | if inclusive { 495 | if (cmp > 0) { 496 | return true 497 | } 498 | } else { 499 | if (cmp >= 0) { 500 | return true 501 | } 502 | } 503 | return false 504 | } 505 | if stop { 506 | break 507 | } 508 | 509 | let element = handler(txn, keyData, valueData) 510 | results.append(element) 511 | } 512 | // print("range get STOP [\(rc)]") 513 | return 0 514 | } 515 | return AnyIterator(results.makeIterator()) 516 | } 517 | 518 | public func iterate(handler: (Data, Data) throws -> ()) throws { 519 | let i = try self.iterator { ($1, $2) } 520 | let c = AnySequence { return i } 521 | for (k, v) in c { 522 | try handler(k, v) 523 | } 524 | } 525 | 526 | public func unescapingIterate(handler: (Data, Data) throws -> ()) throws { 527 | try self.unescapingIterator { 528 | try handler($0, $1) 529 | } 530 | } 531 | 532 | public func unescapingIterate(txn: OpaquePointer, handler: (Data, Data) throws -> ()) throws { 533 | try self.unescapingIterator(txn: txn) { 534 | try handler($0, $1) 535 | } 536 | } 537 | 538 | public func iterate(txn: OpaquePointer, handler: (Data, Data) throws -> ()) throws { 539 | print("iterating on database \(name)") 540 | var key = MDB_val(mv_size: 0, mv_data: nil) 541 | var data = MDB_val(mv_size: 0, mv_data: nil) 542 | 543 | var cursor: OpaquePointer? 544 | var rc = mdb_cursor_open(txn, dbi, &cursor) 545 | guard (rc == 0) else { 546 | print("failed to open cursor on database \(dbi)") 547 | throw DiomedeError.cursorOpenError(rc) 548 | } 549 | defer { mdb_cursor_close(cursor) } 550 | 551 | rc = mdb_cursor_get(cursor, &key, &data, MDB_FIRST) 552 | if rc == 0 { 553 | repeat { 554 | let keyData = Data(bytes: key.mv_data, count: key.mv_size) 555 | let valueData = Data(bytes: data.mv_data, count: data.mv_size) 556 | try handler(keyData, valueData) 557 | rc = mdb_cursor_get(cursor, &key, &data, MDB_NEXT) 558 | } while (rc == 0) 559 | // print("mdb_cursor_get: \(rc)") 560 | } else { 561 | // print("mdb_cursor_get: \(rc)") 562 | } 563 | } 564 | 565 | public func iterate(between lower: Data, and upper: Data, handler: (Data, Data) throws -> ()) throws { 566 | // print("pipelined iteration") 567 | let i = try self.iterator(between: lower, and: upper, inclusive: false) { ($1, $2) } 568 | let c = AnySequence { return i } 569 | for (k, v) in c { 570 | try handler(k, v) 571 | } 572 | } 573 | 574 | // public func iterate(between lower: Data, and upper: Data, handler: (Data, Data) throws -> ()) throws { 575 | //// print("iterating on database \(name)") 576 | // try lower.withUnsafeBytes { (lowerPtr) in 577 | // try upper.withUnsafeBytes { (upperPtr) in 578 | // var key = MDB_val(mv_size: lower.count, mv_data: UnsafeMutableRawPointer(mutating: lowerPtr.baseAddress)) 579 | // var data = MDB_val(mv_size: 0, mv_data: nil) 580 | // 581 | // var upperBound = MDB_val(mv_size: upper.count, mv_data: UnsafeMutableRawPointer(mutating: upperPtr.baseAddress)) 582 | // 583 | // try env.read { (txn) -> Int in 584 | // var cursor: OpaquePointer? 585 | // let rc = mdb_cursor_open(txn, dbi, &cursor) 586 | // guard (rc == 0) else { 587 | // throw DiomedeError.cursorOpenError(rc) 588 | // } 589 | // defer { mdb_cursor_close(cursor) } 590 | // 591 | // var op = MDB_SET_RANGE 592 | // while (mdb_cursor_get(cursor, &key, &data, op) == 0) { 593 | // op = MDB_NEXT 594 | // let keyData = Data(bytes: key.mv_data, count: key.mv_size) 595 | // let valueData = Data(bytes: data.mv_data, count: data.mv_size) 596 | // 597 | // let cmp = mdb_cmp(txn, dbi, &key, &upperBound) 598 | // if (cmp > 0) { 599 | // break 600 | // } 601 | // 602 | // try handler(keyData, valueData) 603 | // } 604 | // return 0 605 | // } 606 | // } 607 | // } 608 | // } 609 | 610 | public func count(txn: OpaquePointer) -> Int { 611 | var stat = MDB_stat(ms_psize: 0, ms_depth: 0, ms_branch_pages: 0, ms_leaf_pages: 0, ms_overflow_pages: 0, ms_entries: 0) 612 | mdb_stat(txn, dbi, &stat) 613 | return stat.ms_entries 614 | } 615 | 616 | public func size(txn: OpaquePointer) -> Int { 617 | var stat = MDB_stat(ms_psize: 0, ms_depth: 0, ms_branch_pages: 0, ms_leaf_pages: 0, ms_overflow_pages: 0, ms_entries: 0) 618 | let rc = mdb_stat(txn, dbi, &stat) 619 | if rc != 0 { 620 | print("*** mdb_stat call returned \(rc)") 621 | return 0 622 | } 623 | let leaf_count = stat.ms_leaf_pages 624 | let internal_count = stat.ms_branch_pages 625 | let pages = leaf_count + internal_count 626 | let size = pages * Int(stat.ms_psize) 627 | return size 628 | } 629 | 630 | public func count() throws -> Int { 631 | var count = 0 632 | try self.env.read { (txn) -> Int in 633 | var stat = MDB_stat(ms_psize: 0, ms_depth: 0, ms_branch_pages: 0, ms_leaf_pages: 0, ms_overflow_pages: 0, ms_entries: 0) 634 | mdb_stat(txn, dbi, &stat) 635 | count = stat.ms_entries 636 | return 0 637 | } 638 | return count 639 | } 640 | 641 | public func iterate(txn: OpaquePointer, between lower: Data, and upper: Data, inclusive: Bool, handler: (Data, Data) throws -> ()) throws { 642 | try lower.withUnsafeBytes { (lowerPtr) in 643 | try upper.withUnsafeBytes { (upperPtr) in 644 | var key = MDB_val(mv_size: lower.count, mv_data: UnsafeMutableRawPointer(mutating: lowerPtr.baseAddress)) 645 | var data = MDB_val(mv_size: 0, mv_data: nil) 646 | 647 | var upperBound = MDB_val(mv_size: upper.count, mv_data: UnsafeMutableRawPointer(mutating: upperPtr.baseAddress)) 648 | 649 | var cursor: OpaquePointer? 650 | let rc = mdb_cursor_open(txn, dbi, &cursor) 651 | guard (rc == 0) else { 652 | throw DiomedeError.cursorOpenError(rc) 653 | } 654 | defer { mdb_cursor_close(cursor) } 655 | 656 | var op = MDB_SET_RANGE 657 | while (mdb_cursor_get(cursor, &key, &data, op) == 0) { 658 | op = MDB_NEXT 659 | let keyData = Data(bytes: key.mv_data, count: key.mv_size) 660 | let valueData = Data(bytes: data.mv_data, count: data.mv_size) 661 | 662 | let cmp = mdb_cmp(txn, dbi, &key, &upperBound) 663 | if inclusive { 664 | if (cmp > 0) { 665 | break 666 | } 667 | try handler(keyData, valueData) 668 | } else { 669 | if (cmp >= 0) { 670 | break 671 | } 672 | try handler(keyData, valueData) 673 | } 674 | 675 | } 676 | } 677 | } 678 | } 679 | 680 | public func contains(key k: DataEncodable) throws -> Bool { 681 | var exists = false 682 | try env.read { (txn) throws -> Int in 683 | let kData = try k.asData() 684 | try kData.withUnsafeBytes { (kPtr) throws in 685 | var value = MDB_val(mv_size: 0, mv_data: nil) 686 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 687 | let rc = mdb_get(txn, dbi, &key, &value) 688 | if (rc == 0) { 689 | exists = true 690 | } 691 | } 692 | return 0 693 | } 694 | return exists 695 | } 696 | 697 | public func contains(txn: OpaquePointer, key k: DataEncodable) throws -> Bool { 698 | var exists = false 699 | let kData = try k.asData() 700 | try kData.withUnsafeBytes { (kPtr) throws in 701 | var value = MDB_val(mv_size: 0, mv_data: nil) 702 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 703 | let rc = mdb_get(txn, dbi, &key, &value) 704 | if (rc == 0) { 705 | exists = true 706 | } 707 | } 708 | return exists 709 | } 710 | 711 | public func delete(key k: DataEncodable) throws { 712 | try env.write { (txn) throws -> Int in 713 | let kData = try k.asData() 714 | try kData.withUnsafeBytes { (kPtr) throws in 715 | var value = MDB_val(mv_size: 0, mv_data: nil) 716 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 717 | let rc = mdb_del(txn, dbi, &key, &value) 718 | if (rc != 0) { 719 | throw DiomedeError.deleteError 720 | } 721 | } 722 | return 0 723 | } 724 | } 725 | 726 | public func delete(txn: OpaquePointer, key k: DataEncodable) throws { 727 | let kData = try k.asData() 728 | try kData.withUnsafeBytes { (kPtr) throws in 729 | var value = MDB_val(mv_size: 0, mv_data: nil) 730 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 731 | let rc = mdb_del(txn, dbi, &key, &value) 732 | if (rc != 0) { 733 | throw DiomedeError.deleteError 734 | } 735 | } 736 | } 737 | 738 | public func get(key k: DataEncodable) throws -> Data? { 739 | var result: Data? = nil 740 | try env.read { (txn) throws -> Int in 741 | let kData = try k.asData() 742 | try kData.withUnsafeBytes { (kPtr) throws in 743 | var value = MDB_val(mv_size: 0, mv_data: nil) 744 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 745 | let rc = mdb_get(txn, dbi, &key, &value) 746 | if (rc == MDB_NOTFOUND) { 747 | } else if (rc != 0) { 748 | throw DiomedeError.getError 749 | } else { 750 | result = Data(bytes: value.mv_data, count: value.mv_size) 751 | } 752 | } 753 | return 0 754 | } 755 | return result 756 | } 757 | 758 | public func get(txn: OpaquePointer, key k: DataEncodable) throws -> Data? { 759 | var result: Data? = nil 760 | let kData = try k.asData() 761 | try kData.withUnsafeBytes { (kPtr) throws in 762 | var value = MDB_val(mv_size: 0, mv_data: nil) 763 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 764 | let rc = mdb_get(txn, dbi, &key, &value) 765 | if (rc == MDB_NOTFOUND) { 766 | } else if (rc != 0) { 767 | print("*** \(String(cString: mdb_strerror(rc)))") 768 | print("*** \(String(cString: strerror(rc)))") 769 | throw DiomedeError.getError 770 | } else { 771 | result = Data(bytes: value.mv_data, count: value.mv_size) 772 | } 773 | } 774 | return result 775 | } 776 | 777 | public func bulkInsert(txn: OpaquePointer, uniqueKeysWithValues keysAndValues: S) throws where S : Sequence, S.Element == (K, V) { 778 | var cursor: OpaquePointer? 779 | let rc = mdb_cursor_open(txn, dbi, &cursor) 780 | guard (rc == 0) else { 781 | throw DiomedeError.cursorOpenError(rc) 782 | } 783 | defer { mdb_cursor_close(cursor) } 784 | 785 | // var key = MDB_val(mv_size: 0, mv_data: nil) 786 | // var value = MDB_val(mv_size: 0, mv_data: nil) 787 | // print("bulk load...") 788 | for (k, v) in keysAndValues { 789 | // print("inserting key: \(k)") 790 | let kData = try k.asData() 791 | let vData = try v.asData() 792 | try kData.withUnsafeBytes { (kPtr) in 793 | try vData.withUnsafeBytes { (vPtr) in 794 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 795 | var value = MDB_val(mv_size: vData.count, mv_data: UnsafeMutableRawPointer(mutating: vPtr.baseAddress)) 796 | // print("cursor put: \(kData._hexValue) => \(vData._hexValue)") 797 | let rc = mdb_cursor_put(cursor, &key, &value, UInt32(MDB_APPEND)) 798 | if (rc != 0) { 799 | let err = String(cString: mdb_strerror(rc)) 800 | throw DiomedeError.insertError("\(err): key: \(kData._hexValue); value: \(vData._hexValue)") 801 | } 802 | } 803 | } 804 | } 805 | } 806 | 807 | public func insert(uniqueKeysWithValues keysAndValues: S) throws where S : Sequence, S.Element == (K, V) { 808 | try env.write { (txn) throws -> Int in 809 | for (k, v) in keysAndValues { 810 | let kData = try k.asData() 811 | let vData = try v.asData() 812 | try kData.withUnsafeBytes { (kPtr) throws in 813 | try vData.withUnsafeBytes { (vPtr) throws in 814 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 815 | var value = MDB_val(mv_size: vData.count, mv_data: UnsafeMutableRawPointer(mutating: vPtr.baseAddress)) 816 | let rc = mdb_put(txn, dbi, &key, &value, 0); // MDB_NOOVERWRITE 817 | if (rc == MDB_MAP_FULL) { 818 | throw DiomedeError.mapFullError 819 | } else if (rc != 0) { 820 | throw DiomedeError.insertError(String(cString: mdb_strerror(rc))) 821 | } 822 | } 823 | } 824 | } 825 | return 0 826 | } 827 | } 828 | 829 | public func insert(txn: OpaquePointer, uniqueKeysWithValues keysAndValues: S) throws where S : Sequence, S.Element == (K, V) { 830 | for (k, v) in keysAndValues { 831 | let kData = try k.asData() 832 | // if name.count == 4 { 833 | // if kData.count != 32 { 834 | // print("Inserting data into quad index with bad length: \(kData.count): \(kData._hexValue)") 835 | // assert(false) 836 | // } 837 | // } 838 | let vData = try v.asData() 839 | try kData.withUnsafeBytes { (kPtr) throws in 840 | try vData.withUnsafeBytes { (vPtr) throws in 841 | var key = MDB_val(mv_size: kData.count, mv_data: UnsafeMutableRawPointer(mutating: kPtr.baseAddress)) 842 | var value = MDB_val(mv_size: vData.count, mv_data: UnsafeMutableRawPointer(mutating: vPtr.baseAddress)) 843 | let rc = mdb_put(txn, dbi, &key, &value, 0); // MDB_NOOVERWRITE 844 | if (rc == MDB_MAP_FULL) { 845 | throw DiomedeError.mapFullError 846 | } else if (rc != 0) { 847 | throw DiomedeError.insertError(String(cString: mdb_strerror(rc))) 848 | } 849 | } 850 | } 851 | } 852 | } 853 | 854 | public func drop() throws { 855 | try self.env.write { (txn) -> Int in 856 | let r = mdb_drop(txn, self.dbi, 1) 857 | return Int(r) 858 | } 859 | } 860 | 861 | public func clear() throws { 862 | try self.env.write { (txn) -> Int in 863 | let r = mdb_drop(txn, self.dbi, 0) 864 | return Int(r) 865 | } 866 | } 867 | } 868 | 869 | } 870 | --------------------------------------------------------------------------------