├── .gitignore ├── Cartfile ├── Cartfile.resolved ├── Common ├── Dataset │ ├── Dataset.swift │ ├── RAMDataset.swift │ └── SQLiteDataset.swift ├── Preprocessor │ ├── AdvancedPreprocessor.swift │ ├── Preprocessor.swift │ └── TrivialPreprocessor.swift └── TextClassifier │ ├── CoreMLClassifier.swift │ ├── LanguageRecognizerClassifier.swift │ ├── MemoryMappedNaiveBayesClassifier.swift │ ├── NaiveBayesClassifier.swift │ └── TextClassifier.swift ├── Keynote.playground ├── Contents.swift ├── contents.xcplayground ├── playground.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist └── timeline.xctimeline ├── LICENSE ├── MemoryMappedCollections ├── Info.plist ├── MMStringIntDictionary.h ├── MMStringIntDictionary.mm ├── MMStringIntDictionaryBuilder.h ├── MMStringIntDictionaryBuilder.mm ├── MemoryMappedCollections.h ├── flatbuffers │ ├── base.h │ ├── code_generators.h │ ├── flatbuffers.h │ ├── flatc.h │ ├── flexbuffers.h │ ├── grpc.h │ ├── hash.h │ ├── idl.h │ ├── minireflect.h │ ├── reflection.h │ ├── reflection_generated.h │ ├── registry.h │ ├── stl_emulation.h │ └── util.h ├── schema.fbs └── schema_generated.h ├── MemoryMappedCollectionsMacOS ├── Info.plist └── MemoryMappedCollectionsMacOS.h ├── MemoryMappedCollectionsTests ├── Info.plist └── MMStringIntDictionaryTests.swift ├── MessageFilteringApp ├── AppDelegate.swift ├── Assets.xcassets │ ├── AppIcon.appiconset │ │ └── Contents.json │ └── Contents.json ├── Base.lproj │ ├── LaunchScreen.storyboard │ └── Main.storyboard ├── ClassifiersComparison │ ├── ClassificationComparisonModel.swift │ ├── ClassifierComparisonResultCell.swift │ ├── ClassifierComparisonTextInputCell.swift │ └── ClassifiersComparisonViewController.swift └── Info.plist ├── MessageFilteringExtension ├── Info.plist └── MessageFilterExtension.swift ├── README.md ├── TestOutput ├── CoreMLLanguageClassifier.mlmodel ├── MemoryMappedBayes.model │ ├── de │ ├── en │ ├── info.plist │ ├── ru │ ├── ru_translit │ ├── uk │ └── uk_translit └── NaiveBayes.model ├── TextClassification.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist ├── xcshareddata │ ├── xcbaselines │ │ ├── CBDFCA34222A937700C5E282.xcbaseline │ │ │ ├── 50B73061-30A0-46E4-A070-D5EB7D0C8174.plist │ │ │ └── Info.plist │ │ └── CBF4C8522225EAB000CDF6F9.xcbaseline │ │ │ ├── 3405DF52-46FB-42BA-99DA-D92954CC76C1.plist │ │ │ └── Info.plist │ └── xcschemes │ │ ├── MemoryMappedCollections.xcscheme │ │ ├── MemoryMappedCollectionsMacOS.xcscheme │ │ ├── TextClassification.xcscheme │ │ └── TextClassificationMacOS.xcscheme └── xcuserdata │ └── killobatt.xcuserdatad │ └── xcschemes │ └── xcschememanagement.plist ├── TextClassification ├── Info.plist └── TextClassification.h ├── TextClassificationMacOS ├── Dataset │ └── Dataset+MLDataTable.swift ├── Info.plist ├── TextClassificationMacOS.h └── TextClassifier │ └── CoreMLClassifier+CreateML.swift └── TextClassificationMacOSTests ├── Info.plist ├── Resources └── spamer.db ├── TestCases ├── BaseClassifierTestCase.swift ├── Classifier │ ├── CoreMLClassifier+CreateMLTests.swift │ ├── LanguageRecognizerClassifierTests.swift │ ├── MemoryMappedNaiveBayesClassifierTests.swift │ └── NaiveBayesClassifierTests.swift ├── Dataset │ └── SQLiteDatasetTests.swift └── Preprocessor │ ├── AdvancedPreprocessorTests.swift │ └── TrivialPreprocessorTests.swift └── Tools ├── TestBundleSettings.swift ├── TestDatasets.swift ├── TestResults.swift └── TextClassifierExtensions.swift /.gitignore: -------------------------------------------------------------------------------- 1 | /Carthage 2 | TextClassification.xcodeproj/xcuserdata/ 3 | -------------------------------------------------------------------------------- /Cartfile: -------------------------------------------------------------------------------- 1 | github "stephencelis/SQLite.swift" ~> 0.12.2 2 | -------------------------------------------------------------------------------- /Cartfile.resolved: -------------------------------------------------------------------------------- 1 | github "stephencelis/SQLite.swift" "0.12.2" 2 | -------------------------------------------------------------------------------- /Common/Dataset/Dataset.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Dataset.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | public struct DatasetItem: Codable { 10 | public var id: Int 11 | public var text: String 12 | public var label: String 13 | public var predictedLabel: String? 14 | } 15 | 16 | public protocol Dataset { 17 | var items: [DatasetItem] { get } 18 | var labels: Set { get } 19 | func items(for label: String) -> [DatasetItem] 20 | } 21 | 22 | public extension Dataset { 23 | subscript(startPersentage: Double, endPersentage: Double) -> [DatasetItem] { 24 | get { 25 | guard 0 <= startPersentage, startPersentage < 1.0, 0 < endPersentage, endPersentage <= 1.0, 26 | startPersentage < endPersentage else { 27 | return [] 28 | } 29 | 30 | var result: [DatasetItem] = [] 31 | for label in labels { 32 | let items = self.items(for: label) 33 | let startIndex = Int((Double(items.count) * startPersentage).rounded(.down)) 34 | let endIndex = Int((Double(items.count) * endPersentage).rounded(.up)) 35 | result.append(contentsOf: items[startIndex.. (trainDataset: Dataset, testDataset: Dataset) { 42 | let trainDatasetItemsLeft = self[0, startPersentage] 43 | let trainDatasetItemsRight = self[endPersentage, 1] 44 | let testDatasetItems = self[startPersentage, endPersentage] 45 | return (trainDataset: RAMDataset(items: trainDatasetItemsLeft + trainDatasetItemsRight), 46 | testDataset: RAMDataset(items: testDatasetItems)) 47 | 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /Common/Dataset/RAMDataset.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RAMDataset.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct RAMDataset: Dataset { 12 | 13 | // MARK: - Dataset 14 | 15 | public var items: [DatasetItem] 16 | 17 | public var labels: Set { 18 | return Set(items.map { $0.label }) 19 | } 20 | 21 | public func items(for label: String) -> [DatasetItem] { 22 | return items.filter { $0.label == label } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Common/Dataset/SQLiteDataset.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SQLiteDataset.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import SQLite 10 | 11 | public class SQLiteDataset: Dataset { 12 | 13 | // MARK: - SQLiteDataset 14 | 15 | private let database: SQLite.Connection 16 | 17 | public init(databasePath: String) throws { 18 | self.database = try SQLite.Connection(databasePath) 19 | } 20 | 21 | private struct SQLRequests { 22 | static let allMessages = """ 23 | SELECT id, text, language_code 24 | FROM message 25 | WHERE language_code is NOT NULL; 26 | """ 27 | static let messagesByLabel = """ 28 | SELECT id, text, language_code 29 | FROM message 30 | WHERE language_code == ? 31 | """ 32 | static let allLabels = """ 33 | SELECT DISTINCT language_code 34 | FROM message; 35 | """ 36 | } 37 | 38 | // MARK: - Dataset 39 | 40 | public var items: [DatasetItem] { 41 | do { 42 | return try database.prepare(SQLRequests.allMessages).compactMap { DatasetItem(row: $0) } 43 | } catch let error { 44 | assertionFailure("Failed to execute SQL request: \(SQLRequests.allMessages), error: \(error)") 45 | return [] 46 | } 47 | } 48 | 49 | public var labels: Set { 50 | do { 51 | return Set(try database.prepare(SQLRequests.allLabels).compactMap { String(row: $0) }) 52 | } catch let error { 53 | assertionFailure("Failed to execute SQL request: \(SQLRequests.allMessages), error: \(error)") 54 | return [] 55 | } 56 | } 57 | 58 | public func items(for label: String) -> [DatasetItem] { 59 | do { 60 | return try database.prepare(SQLRequests.messagesByLabel, label).compactMap { DatasetItem(row: $0) } 61 | } catch let error { 62 | assertionFailure("Failed to execute SQL request: \(SQLRequests.allMessages), error: \(error)") 63 | return [] 64 | } 65 | } 66 | } 67 | 68 | fileprivate extension String { 69 | init?(row: SQLite.Statement.Element) { 70 | guard row.count >= 1, 71 | let value = row[0] as? String else { 72 | return nil 73 | } 74 | self.init(value) 75 | } 76 | } 77 | 78 | fileprivate extension DatasetItem { 79 | init?(row: SQLite.Statement.Element) { 80 | guard row.count >= 3, 81 | let id = row[0] as? Int64, 82 | let text = row[1] as? String, 83 | let languageCode = row[2] as? String else { 84 | return nil 85 | } 86 | 87 | self.id = Int(id) 88 | self.text = text 89 | self.label = languageCode 90 | self.predictedLabel = nil 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /Common/Preprocessor/AdvancedPreprocessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AdvancedPreprocessor.swift 3 | // MemoryMappedCollectionsTests 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public class AdvancedPreprocessor: Preprocessor { 12 | 13 | public func words(of text: String) -> [String] { 14 | var preprocessedText = text 15 | 16 | let types: NSTextCheckingResult.CheckingType = [.phoneNumber, .link, .date] 17 | if let detector = try? NSDataDetector(types: types.rawValue) { 18 | preprocessedText = detector.stringByReplacingMatches(in: text, 19 | options: [], 20 | range: NSRange(location: 0, length: preprocessedText.count), 21 | withTemplate: " ") 22 | } 23 | 24 | if let numberSequenceRegexp = try? NSRegularExpression(pattern: "\\d+") { 25 | preprocessedText = numberSequenceRegexp.stringByReplacingMatches(in: preprocessedText, 26 | options: [], 27 | range: NSRange(location: 0, length: preprocessedText.count), 28 | withTemplate: "") 29 | } 30 | 31 | let words = preprocessedText 32 | .components(separatedBy: .whitespacesAndNewlines) 33 | .map { $0.components(separatedBy: CharacterSet.punctuationCharacters).joined() } 34 | .filter { !$0.isEmpty } 35 | return words 36 | } 37 | 38 | public func preprocess(text: String) -> [String : Int] { 39 | let features = words(of: text).reduce(into: [String: Int]()) { result, word in 40 | result[word, default: 0] += 1 41 | } 42 | return features 43 | } 44 | 45 | public func preprocessedText(for text: String) -> String { 46 | return words(of: text).joined(separator: " ") 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /Common/Preprocessor/Preprocessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Preprocessor.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public protocol Preprocessor { 12 | /// Preprocesses text of DatasetItem. 13 | /// - returns: a feature dictionary: Key is feature (e.g. word), Value is how many times it is observed in text. 14 | func preprocess(text: String) -> [String: Int] 15 | 16 | func preprocessedText(for text: String) -> String 17 | } 18 | -------------------------------------------------------------------------------- /Common/Preprocessor/TrivialPreprocessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TrivialPreprocessor.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public class TrivialPreprocessor: Preprocessor { 12 | 13 | public init() { } 14 | 15 | public func words(of text: String) -> [String] { 16 | let words = text 17 | .components(separatedBy: .whitespacesAndNewlines) 18 | .map { $0.components(separatedBy: CharacterSet.punctuationCharacters).joined() } 19 | .filter { !$0.isEmpty } 20 | return words 21 | } 22 | 23 | public func preprocess(text: String) -> [String: Int] { 24 | let features = words(of: text).reduce(into: [String: Int]()) { result, word in 25 | result[word, default: 0] += 1 26 | } 27 | return features 28 | } 29 | 30 | public func preprocessedText(for text: String) -> String { 31 | return words(of: text).joined(separator: " ") 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /Common/TextClassifier/CoreMLClassifier.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CoreMLClassifier.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/26/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import CoreML 10 | 11 | public class CoreMLClassifier: TextClassifier { 12 | 13 | public enum DataTableColumnName: String { 14 | case id 15 | case text 16 | case label 17 | case predictedLabel 18 | } 19 | 20 | private let mlModel: MLModel 21 | 22 | public init(mlModel: MLModel) { 23 | self.mlModel = mlModel 24 | } 25 | 26 | // MARK: - TextClassifier 27 | 28 | public func predictedLabel(for string: String) -> String? { 29 | guard let input = try? MLDictionaryFeatureProvider(dictionary: [DataTableColumnName.text.rawValue: string]) else { 30 | return nil 31 | } 32 | let prediction = try? mlModel.prediction(from: input) 33 | return prediction?.featureValue(for: DataTableColumnName.label.rawValue)?.stringValue 34 | } 35 | 36 | } 37 | 38 | extension CoreMLClassifier { 39 | public convenience init(fileURL: URL) throws { 40 | let model = try MLModel(contentsOf: fileURL) 41 | self.init(mlModel: model) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /Common/TextClassifier/LanguageRecognizerClassifier.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LanguageRecognizerClassifier.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import NaturalLanguage 10 | 11 | public class LanguageRecognizerClassifier: TextClassifier { 12 | private let languageRecognizer: NLLanguageRecognizer 13 | 14 | public init() { 15 | languageRecognizer = NLLanguageRecognizer() 16 | } 17 | 18 | // MARK: - TextClassifier 19 | 20 | public static func train(on dataset: Dataset) -> TextClassifier { 21 | return LanguageRecognizerClassifier() 22 | } 23 | 24 | public func predictedLabel(for string: String) -> String? { 25 | languageRecognizer.processString(string) 26 | let label = languageRecognizer.dominantLanguage?.rawValue 27 | languageRecognizer.reset() 28 | return label 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Common/TextClassifier/MemoryMappedNaiveBayesClassifier.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MemoryMappedNaiveBayesClassifier.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import MemoryMappedCollections 11 | 12 | public class MemoryMappedNaiveBayesClassifier: TrainableTextClassifier { 13 | 14 | // MARK: - MemoryMappedNaiveBayesClassifier 15 | 16 | /// Trained model. 17 | /// Key is class, aka label 18 | /// Value is feature statistics dictionary: Key is feature (aka word), value is how often it was ever observed for given label 19 | typealias Model = [String: MMStringIntDictionary] 20 | private var model: Model = [:] 21 | private var modelBuilders: [String: MMStringIntDictionaryBuilder] = [:] 22 | private let preprocessor: Preprocessor 23 | private var laplasFactor: Double 24 | 25 | init(preprocessor: Preprocessor, model: Model, modelBuilders: [String: MMStringIntDictionaryBuilder] = [:], laplasFactor: Double = 0.3) { 26 | self.preprocessor = preprocessor 27 | self.model = model 28 | self.modelBuilders = modelBuilders 29 | self.laplasFactor = laplasFactor 30 | } 31 | 32 | // MARK: - TrainableTextClassifier 33 | 34 | public static func train(with preprocessor: Preprocessor, on dataset: Dataset) -> TextClassifier { 35 | var ramModel: [String: [String: Int]] = [:] 36 | dataset.items.forEach { item in 37 | let features = preprocessor.preprocess(text: item.text) 38 | if var statistics = ramModel[item.label] { 39 | statistics.merge(features, uniquingKeysWith: +) 40 | ramModel[item.label] = statistics 41 | } else { 42 | ramModel[item.label] = features 43 | } 44 | } 45 | 46 | var diskModel: Model = [:] 47 | var modelBuilders: [String: MMStringIntDictionaryBuilder] = [:] 48 | for (label, index) in ramModel { 49 | let builder = MMStringIntDictionaryBuilder(dictionary: index.mapValues { NSNumber(value: $0) }) 50 | modelBuilders[label] = builder 51 | diskModel[label] = MMStringIntDictionary(data: builder.serialize()) 52 | } 53 | return MemoryMappedNaiveBayesClassifier(preprocessor: preprocessor, model: diskModel, modelBuilders: modelBuilders) 54 | } 55 | 56 | // MARK: - TextClassifier 57 | 58 | public func predictedLabel(for text: String) -> String? { 59 | let features = preprocessor.preprocess(text: text) 60 | return mostProbableLabel(of: features)?.label 61 | } 62 | 63 | // MARK: - Calculations 64 | 65 | func allLabels() -> [String] { 66 | return model.map { $0.key } 67 | } 68 | 69 | private var numberOfFeaturesByLabelCache: [String: Int] = [:] 70 | 71 | func cachingNumberOfFeatures(for label: String) -> Int { 72 | if let number = numberOfFeaturesByLabelCache[label] { 73 | return number 74 | } else { 75 | let number = numberOfFeatures(for: label) 76 | numberOfFeaturesByLabelCache[label] = number 77 | return number 78 | } 79 | } 80 | 81 | private var cachedTotalNumberOfFeatures: Int? = nil 82 | func cachingTotalNumberOfFeatures() -> Int { 83 | if let number = cachedTotalNumberOfFeatures { 84 | return number 85 | } else { 86 | let number = totalNumberOfFeatures() 87 | cachedTotalNumberOfFeatures = number 88 | return number 89 | } 90 | } 91 | 92 | func numberOfFeatures(for label: String) -> Int { 93 | guard let statistics = model[label] else { return 0 } 94 | return Int(statistics.map { $0.value }.reduce(0, +)) 95 | } 96 | 97 | func totalNumberOfFeatures() -> Int { 98 | return model.map { cachingNumberOfFeatures(for: $0.key) }.reduce(0, +) 99 | } 100 | 101 | func featureCountInIndex(feature: String, label: String) -> Int64 { 102 | let number = model[label]?.int64(forKey: feature) ?? 0 103 | if number == NSNotFound { 104 | return 0 105 | } else { 106 | return number 107 | } 108 | } 109 | 110 | func probability(of features: [String: Int], toHaveLabel label: String) -> Double { 111 | let totalNumberOfFeatures = Double(self.cachingTotalNumberOfFeatures()) 112 | let numberOfLabelFeatures = Double(self.cachingNumberOfFeatures(for: label)) 113 | var sum = log(numberOfLabelFeatures / totalNumberOfFeatures) 114 | for (feature, featureCount) in features { 115 | let featureCountInModel = Double(featureCountInIndex(feature: feature, label: label)) 116 | sum += log(Double(featureCount) * (featureCountInModel + laplasFactor) / 117 | (numberOfLabelFeatures + totalNumberOfFeatures * laplasFactor)) 118 | } 119 | return sum 120 | } 121 | 122 | func mostProbableLabel(of features: [String: Int]) -> (label: String, probability: Double)? { 123 | let labelsByProbability = allLabels().reduce(into: [String: Double]()) { (result, label) in 124 | result[label] = probability(of: features, toHaveLabel: label) 125 | } 126 | 127 | let labelWithMaxProbability = labelsByProbability.max { (keyValue1, keyValue2) -> Bool in 128 | return keyValue1.value < keyValue2.value 129 | } 130 | 131 | guard let label = labelWithMaxProbability else { return nil } 132 | 133 | 134 | return (label: label.key, probability: label.value) 135 | } 136 | } 137 | 138 | 139 | extension MemoryMappedNaiveBayesClassifier { 140 | public convenience init(fileURL: URL, preprocessor: Preprocessor) throws { 141 | let data = try Data(contentsOf: fileURL.appendingPathComponent("info.plist")) 142 | let representation = try PropertyListDecoder().decode(StoredRepresentation.self, from: data) 143 | 144 | var model: Model = [:] 145 | for (label, filename) in representation.labelIndexFilenames { 146 | model[label] = try MMStringIntDictionary(fileURL: fileURL.appendingPathComponent(filename)) 147 | } 148 | 149 | self.init(preprocessor: preprocessor, 150 | model: model, 151 | laplasFactor: representation.laplasFactor) 152 | } 153 | 154 | public func store(toDirectory directoryURL: URL) throws { 155 | guard !modelBuilders.isEmpty else { 156 | throw NSError(domain: "com.text.classifier", code: -1, userInfo: [ 157 | NSLocalizedDescriptionKey: "Only model created by training can be stored on disk" 158 | ]) 159 | } 160 | 161 | let fileManager = FileManager.default 162 | try fileManager.createDirectory(at: directoryURL, withIntermediateDirectories: true, attributes: nil) 163 | 164 | let labelIndexFilenames = self.modelBuilders.keys.reduce(into: [String: String]()) { result, label in 165 | result[label] = label 166 | } 167 | 168 | let data = try PropertyListEncoder().encode(StoredRepresentation(laplasFactor: laplasFactor, 169 | labelIndexFilenames: labelIndexFilenames)) 170 | try data.write(to: directoryURL.appendingPathComponent("info.plist")) 171 | 172 | for (label, filename) in labelIndexFilenames { 173 | let data = self.modelBuilders[label]?.serialize() 174 | try data?.write(to: directoryURL.appendingPathComponent(filename)) 175 | } 176 | } 177 | 178 | private struct StoredRepresentation: Codable { 179 | var laplasFactor: Double 180 | var labelIndexFilenames: [String: String] 181 | } 182 | } 183 | 184 | 185 | extension MMStringIntDictionary { 186 | func map(_ transform: ((key: String, value: Int64)) -> T) -> [T] { 187 | var array: [T] = [] 188 | for key in self.allKeys { 189 | let value = self.int64(forKey: key) 190 | array.append(transform((key: key, value: value))) 191 | } 192 | return array 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /Common/TextClassifier/NaiveBayesClassifier.swift: -------------------------------------------------------------------------------- 1 | // 2 | // NaiveBayesClassifier.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public class NaiveBayesClassifier: TrainableTextClassifier { 12 | 13 | // MARK: - NaiveBayesClassifier 14 | 15 | /// Trained model. 16 | /// Key is class, aka label 17 | /// Value is feature statistics dictionary: Key is feature (aka word), value is how often it was ever observed for given label 18 | typealias Model = [String: [String: Int]] 19 | private var model: Model = [:] 20 | private let preprocessor: Preprocessor 21 | private var laplasFactor: Double 22 | 23 | init(preprocessor: Preprocessor, model: Model, laplasFactor: Double = 0.3) { 24 | self.preprocessor = preprocessor 25 | self.model = model 26 | self.laplasFactor = laplasFactor 27 | } 28 | 29 | // MARK: - TrainableTextClassifier 30 | 31 | public static func train(with preprocessor: Preprocessor, on dataset: Dataset) -> TextClassifier { 32 | var model: Model = [:] 33 | dataset.items.forEach { item in 34 | let features = preprocessor.preprocess(text: item.text) 35 | if var statistics = model[item.label] { 36 | statistics.merge(features, uniquingKeysWith: +) 37 | model[item.label] = statistics 38 | } else { 39 | model[item.label] = features 40 | } 41 | } 42 | return NaiveBayesClassifier(preprocessor: preprocessor, model: model) 43 | } 44 | 45 | // MARK: - TextClassifier 46 | 47 | public func predictedLabel(for text: String) -> String? { 48 | let features = preprocessor.preprocess(text: text) 49 | return mostProbableLabel(of: features)?.label 50 | } 51 | 52 | // MARK: - Calculations 53 | 54 | func allLabels() -> [String] { 55 | return model.map { $0.key } 56 | } 57 | 58 | private var numberOfFeaturesByLabelCache: [String: Int] = [:] 59 | 60 | func cachingNumberOfFeatures(for label: String) -> Int { 61 | if let number = numberOfFeaturesByLabelCache[label] { 62 | return number 63 | } else { 64 | let number = numberOfFeatures(for: label) 65 | numberOfFeaturesByLabelCache[label] = number 66 | return number 67 | } 68 | } 69 | 70 | private var cachedTotalNumberOfFeatures: Int? = nil 71 | func cachingTotalNumberOfFeatures() -> Int { 72 | if let number = cachedTotalNumberOfFeatures { 73 | return number 74 | } else { 75 | let number = totalNumberOfFeatures() 76 | cachedTotalNumberOfFeatures = number 77 | return number 78 | } 79 | } 80 | 81 | func numberOfFeatures(for label: String) -> Int { 82 | guard let statistics = model[label] else { return 0 } 83 | return statistics.map { $0.value }.reduce(0, +) 84 | } 85 | 86 | func totalNumberOfFeatures() -> Int { 87 | return model.map { cachingNumberOfFeatures(for: $0.key) }.reduce(0, +) 88 | } 89 | 90 | func featureCountInIndex(feature: String, label: String) -> Int { 91 | return model[label]?[feature] ?? 0 92 | } 93 | 94 | func probability(of features: [String: Int], toHaveLabel label: String) -> Double { 95 | let totalNumberOfFeatures = Double(self.cachingTotalNumberOfFeatures()) 96 | let numberOfLabelFeatures = Double(self.cachingNumberOfFeatures(for: label)) 97 | var sum = log(numberOfLabelFeatures / totalNumberOfFeatures) 98 | for (feature, featureCount) in features { 99 | let featureCountInModel = Double(featureCountInIndex(feature: feature, label: label)) 100 | sum += log(Double(featureCount) * (featureCountInModel + laplasFactor) / 101 | (numberOfLabelFeatures + totalNumberOfFeatures * laplasFactor)) 102 | } 103 | return sum 104 | } 105 | 106 | func mostProbableLabel(of features: [String: Int]) -> (label: String, probability: Double)? { 107 | let labelsByProbability = allLabels().reduce(into: [String: Double]()) { (result, label) in 108 | result[label] = probability(of: features, toHaveLabel: label) 109 | } 110 | 111 | let labelWithMaxProbability = labelsByProbability.max { (keyValue1, keyValue2) -> Bool in 112 | return keyValue1.value < keyValue2.value 113 | } 114 | 115 | guard let label = labelWithMaxProbability else { return nil } 116 | 117 | 118 | return (label: label.key, probability: label.value) 119 | } 120 | } 121 | 122 | 123 | extension NaiveBayesClassifier { 124 | public convenience init(fileURL: URL, preprocessor: Preprocessor) throws { 125 | let data = try Data(contentsOf: fileURL) 126 | let representation = try PropertyListDecoder().decode(StoredRepresentation.self, from: data) 127 | self.init(preprocessor: preprocessor, 128 | model: representation.model, 129 | laplasFactor: representation.laplasFactor) 130 | } 131 | 132 | public func store(toFile fileURL: URL) throws { 133 | let data = try PropertyListEncoder().encode(StoredRepresentation(model: model, laplasFactor: laplasFactor)) 134 | try data.write(to: fileURL) 135 | } 136 | 137 | private struct StoredRepresentation: Codable { 138 | var model: Model 139 | var laplasFactor: Double 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /Common/TextClassifier/TextClassifier.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TextClassifier.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public protocol TextClassifier { 12 | func predictedLabel(for text: String) -> String? 13 | } 14 | 15 | public protocol TrainableTextClassifier: TextClassifier { 16 | static func train(with preprocessor: Preprocessor, on dataset: Dataset) -> TextClassifier 17 | } 18 | -------------------------------------------------------------------------------- /Keynote.playground/Contents.swift: -------------------------------------------------------------------------------- 1 | import Cocoa 2 | import NaturalLanguage 3 | 4 | extension NLLanguage { 5 | var name: String { 6 | let code = self.rawValue 7 | let locale = NSLocale(localeIdentifier: code) 8 | return locale.displayName(forKey: NSLocale.Key.languageCode, value: code) ?? "unknown" 9 | } 10 | } 11 | 12 | 13 | // `NLLanguageRecognizer` чудово справляється із розпізнаванням мови у загальному випадку 14 | NLLanguageRecognizer.dominantLanguage(for: "Hello, how are you doing?")?.name 15 | // English 16 | 17 | NLLanguageRecognizer.dominantLanguage(for: "Привіт, як твої справи")?.name 18 | // Українська 19 | 20 | NLLanguageRecognizer.dominantLanguage(for: "Привет, как твои дела?")?.name 21 | // Русский 22 | 23 | NLLanguageRecognizer.dominantLanguage(for: "Hallo, wie geht es dir?")?.name 24 | // Deutsch 25 | 26 | 27 | 28 | 29 | 30 | 31 | // Однак, коли справа стосуєтсья дуже специфічних випадків, все не так гладко: 32 | let realWorldSMS = 33 | """ 34 | VITAEMO Kompiuternum vidbirom na nomer,vipav 35 | pryz:AUTO-MAZDA SX-5 36 | Detali: 37 | +38(095)857-58-64 38 | abo na saiti: 39 | www.mir-europay.com.ua 40 | """ 41 | 42 | NLLanguageRecognizer.dominantLanguage(for: realWorldSMS)?.name 43 | // Hrvatski 44 | 45 | 46 | 47 | // В Cocoa[Touch] є чудовий інструмент для транслітерації: 48 | let detransliteratedString = 49 | realWorldSMS.applyingTransform(StringTransform.latinToCyrillic, reverse: false) ?? "" 50 | print(detransliteratedString) 51 | 52 | // Але він не дуже допомагає в цьому випадку: 53 | NLLanguageRecognizer.dominantLanguage(for: detransliteratedString)?.name 54 | 55 | 56 | // Дану задачу я пробував вирішувати наступним чином: 57 | // let originalLanguageGuess = language(for: realWorldSMS) 58 | // let transliteratedLanguageGuess = language(for: detransliterate(realWorldSMS)) 59 | // if originalLanguageGuess.probability > transliteratedLanguageGuess.probability { 60 | // return originalLanguageGuess 61 | // else 62 | // return transliteratedLanguageGuess 63 | let recognizer = NLLanguageRecognizer() 64 | recognizer.processString(detransliteratedString) 65 | let (detransliteratedHypothesis, detransliteratedProbability) = recognizer.languageHypotheses(withMaximum: 1).first! 66 | recognizer.reset() 67 | 68 | recognizer.processString(realWorldSMS) 69 | let (hypothesis, probability) = recognizer.languageHypotheses(withMaximum: 1).first! 70 | recognizer.reset() 71 | 72 | if detransliteratedProbability < probability { 73 | 74 | } 75 | 76 | 77 | // Насправді, це дуже наївно припускати, що ймовірність вгадати текст після детранслітерації буде адекватною: 78 | 79 | let ukrainianTranslitText = "Privit, jak tvoji spravy?" 80 | let detransliteredUkrText = ukrainianTranslitText 81 | .applyingTransform(StringTransform.latinToCyrillic, reverse: false) ?? "" 82 | // Привит, йак твойи справы? 83 | 84 | let englishText = "Hello, how are you doing?" 85 | let detransliteredEngText = englishText 86 | .applyingTransform(StringTransform.latinToCyrillic, reverse: false) ?? "" 87 | // Хелло, хоу аре ыоу доинг? 88 | 89 | public protocol Preprocessor { 90 | /// Preprocesses text of DatasetItem. 91 | /// - returns: a feature dictionary: Key is feature (e.g. word), Value is how many times it is observed in text. 92 | func preprocess(text: String) -> [String] 93 | } 94 | 95 | 96 | class PreprocessingUnit: Preprocessor { 97 | func preprocess(text: String) -> [String] { 98 | return text.split(separator: " ") 99 | } 100 | } 101 | 102 | let preprocessor = PreprocessingUnit() 103 | 104 | let labels = ["label"] 105 | let textsForLabel = ["label": ["text1", "text2"]] 106 | for label in labels { 107 | for text in textsForLabel[label] ?? [] { 108 | let words = preprocessor.preprocess(text: text) 109 | for word in words { 110 | model[label][word] += 1 111 | } 112 | } 113 | } 114 | 115 | ["Зателефонуйте", "нам", "на"] 116 | 117 | [ 118 | "uk": 0.88, 119 | "ru": 0.74, 120 | "en": 0.2, 121 | ... 122 | ] 123 | 124 | 125 | -------------------------------------------------------------------------------- /Keynote.playground/contents.xcplayground: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /Keynote.playground/playground.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Keynote.playground/playground.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /Keynote.playground/timeline.xctimeline: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 9 | 10 | 14 | 15 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Viacheslav Volodko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MemoryMappedCollections/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | 22 | 23 | -------------------------------------------------------------------------------- /MemoryMappedCollections/MMStringIntDictionary.h: -------------------------------------------------------------------------------- 1 | // 2 | // MMStringIntDictionary.h 3 | // MemoryMappedCollections 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface MMStringIntDictionary : NSObject 14 | 15 | - (nullable instancetype)initWithFileURL:(NSURL *)fileURL error:(NSError *__autoreleasing *)error; 16 | - (instancetype)initWithData:(NSData *)data NS_DESIGNATED_INITIALIZER; 17 | - (instancetype)init NS_UNAVAILABLE; 18 | 19 | - (int64_t)int64ForKey:(NSString *)key; 20 | 21 | @property (nonatomic, readonly, copy) NSArray *allKeys; 22 | 23 | @end 24 | 25 | NS_ASSUME_NONNULL_END 26 | -------------------------------------------------------------------------------- /MemoryMappedCollections/MMStringIntDictionary.mm: -------------------------------------------------------------------------------- 1 | // 2 | // MMStringIntDictionary.m 3 | // MemoryMappedCollections 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import "MMStringIntDictionary.h" 10 | #import "schema_generated.h" 11 | 12 | @interface MMStringIntDictionary () 13 | @property (nonatomic, strong, nonnull) NSData *dataBuffer; 14 | @property (nonatomic, unsafe_unretained) const flatcollections::StringIntDictionary *dict; 15 | @end 16 | 17 | @implementation MMStringIntDictionary 18 | 19 | - (instancetype)initWithFileURL:(NSURL *)fileURL error:(NSError *__autoreleasing *)error { 20 | NSData *data = [NSData dataWithContentsOfURL:fileURL options:NSDataReadingMappedAlways error:error]; 21 | if (nil == data) { 22 | return nil; 23 | } 24 | return [self initWithData:data]; 25 | } 26 | 27 | - (instancetype)initWithData:(NSData *)data { 28 | self = [super init]; 29 | if (self) { 30 | self.dataBuffer = data; 31 | self.dict = flatcollections::GetStringIntDictionary(data.bytes); 32 | } 33 | return self; 34 | } 35 | 36 | - (int64_t)int64ForKey:(NSString *)key { 37 | @autoreleasepool { 38 | auto entries = self.dict->entries(); 39 | auto entry = entries->LookupByKey(key.UTF8String); 40 | if (NULL != entry) { 41 | return entry->value(); 42 | } 43 | return NSNotFound; 44 | } 45 | } 46 | 47 | - (NSArray *)allKeys { 48 | auto entries = self.dict->entries(); 49 | NSMutableArray *allKeys = @[].mutableCopy; 50 | for (int i = 0; i < entries->Length(); ++i) { 51 | auto entry = entries->Get(i); 52 | NSString *key = [NSString stringWithUTF8String:entry->key()->c_str()]; 53 | [allKeys addObject:key]; 54 | } 55 | return allKeys.copy; 56 | } 57 | 58 | @end 59 | -------------------------------------------------------------------------------- /MemoryMappedCollections/MMStringIntDictionaryBuilder.h: -------------------------------------------------------------------------------- 1 | // 2 | // MMStringIntDictionaryBuilder.h 3 | // MemoryMappedCollections 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface MMStringIntDictionaryBuilder : NSObject 14 | 15 | - (instancetype)initWithDictionary:(NSDictionary *)dictionary NS_DESIGNATED_INITIALIZER; 16 | - (instancetype)init NS_UNAVAILABLE; 17 | - (NSData *)serialize; 18 | 19 | @end 20 | 21 | NS_ASSUME_NONNULL_END 22 | -------------------------------------------------------------------------------- /MemoryMappedCollections/MMStringIntDictionaryBuilder.mm: -------------------------------------------------------------------------------- 1 | // 2 | // MMStringIntDictionaryBuilder.m 3 | // MemoryMappedCollections 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import "MMStringIntDictionaryBuilder.h" 10 | #import "schema_generated.h" 11 | 12 | using namespace flatbuffers; 13 | using namespace flatcollections; 14 | 15 | @interface MMStringIntDictionaryBuilder () 16 | @property (nonatomic, copy) NSDictionary *dictionary; 17 | @end 18 | 19 | @implementation MMStringIntDictionaryBuilder 20 | 21 | - (instancetype)initWithDictionary:(NSDictionary *)dictionary { 22 | self = [super init]; 23 | if (self) { 24 | self.dictionary = dictionary; 25 | } 26 | return self; 27 | } 28 | 29 | - (NSData *)serialize { 30 | // 1. Alloc 10MB buffer on stack 31 | FlatBufferBuilder builder(1024 * 1024 * 10); 32 | 33 | // 2. Iterate NSDictionary keys and values, converting them into flatcollections::StringIntDictionaryEntry structs 34 | std::vector> entries; 35 | for (NSString *key in self.dictionary.allKeys) { 36 | int64_t value = (int64_t)[self.dictionary objectForKey:key].integerValue; 37 | auto entry = CreateStringIntDictionaryEntryDirect(builder, 38 | key.UTF8String, 39 | value); 40 | entries.push_back(entry); 41 | } 42 | 43 | // 3. Create flatcollections::StringIntDictionary 44 | auto vector = builder.CreateVectorOfSortedTables(&entries); 45 | auto dictionary = CreateStringIntDictionary(builder, vector); 46 | 47 | // 4. Return flatbuffer as NSData 48 | builder.Finish(dictionary); 49 | NSData *data = [NSData dataWithBytes:builder.GetBufferPointer() 50 | length:builder.GetSize()]; 51 | return data; 52 | } 53 | 54 | @end 55 | -------------------------------------------------------------------------------- /MemoryMappedCollections/MemoryMappedCollections.h: -------------------------------------------------------------------------------- 1 | // 2 | // MemoryMappedCollections.h 3 | // MemoryMappedCollections 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | //! Project version number for MemoryMappedCollections. 12 | FOUNDATION_EXPORT double MemoryMappedCollectionsVersionNumber; 13 | 14 | //! Project version string for MemoryMappedCollections. 15 | FOUNDATION_EXPORT const unsigned char MemoryMappedCollectionsVersionString[]; 16 | 17 | #import 18 | #import 19 | 20 | 21 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/code_generators.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2014 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_CODE_GENERATORS_H_ 18 | #define FLATBUFFERS_CODE_GENERATORS_H_ 19 | 20 | #include 21 | #include 22 | #include "flatbuffers/idl.h" 23 | 24 | namespace flatbuffers { 25 | 26 | // Utility class to assist in generating code through use of text templates. 27 | // 28 | // Example code: 29 | // CodeWriter code; 30 | // code.SetValue("NAME", "Foo"); 31 | // code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }"; 32 | // code.SetValue("NAME", "Bar"); 33 | // code += "void {{NAME}}() { printf("%s", "{{NAME}}"); }"; 34 | // std::cout << code.ToString() << std::endl; 35 | // 36 | // Output: 37 | // void Foo() { printf("%s", "Foo"); } 38 | // void Bar() { printf("%s", "Bar"); } 39 | class CodeWriter { 40 | public: 41 | CodeWriter() {} 42 | 43 | // Clears the current "written" code. 44 | void Clear() { 45 | stream_.str(""); 46 | stream_.clear(); 47 | } 48 | 49 | // Associates a key with a value. All subsequent calls to operator+=, where 50 | // the specified key is contained in {{ and }} delimiters will be replaced by 51 | // the given value. 52 | void SetValue(const std::string &key, const std::string &value) { 53 | value_map_[key] = value; 54 | } 55 | 56 | // Appends the given text to the generated code as well as a newline 57 | // character. Any text within {{ and }} delimeters is replaced by values 58 | // previously stored in the CodeWriter by calling SetValue above. The newline 59 | // will be suppressed if the text ends with the \\ character. 60 | void operator+=(std::string text); 61 | 62 | // Returns the current contents of the CodeWriter as a std::string. 63 | std::string ToString() const { return stream_.str(); } 64 | 65 | private: 66 | std::map value_map_; 67 | std::stringstream stream_; 68 | }; 69 | 70 | class BaseGenerator { 71 | public: 72 | virtual bool generate() = 0; 73 | 74 | static std::string NamespaceDir(const Parser &parser, const std::string &path, 75 | const Namespace &ns); 76 | 77 | protected: 78 | BaseGenerator(const Parser &parser, const std::string &path, 79 | const std::string &file_name, 80 | const std::string qualifying_start, 81 | const std::string qualifying_separator) 82 | : parser_(parser), 83 | path_(path), 84 | file_name_(file_name), 85 | qualifying_start_(qualifying_start), 86 | qualifying_separator_(qualifying_separator) {} 87 | virtual ~BaseGenerator() {} 88 | 89 | // No copy/assign. 90 | BaseGenerator &operator=(const BaseGenerator &); 91 | BaseGenerator(const BaseGenerator &); 92 | 93 | std::string NamespaceDir(const Namespace &ns) const; 94 | 95 | static const char *FlatBuffersGeneratedWarning(); 96 | 97 | static std::string FullNamespace(const char *separator, const Namespace &ns); 98 | 99 | static std::string LastNamespacePart(const Namespace &ns); 100 | 101 | // tracks the current namespace for early exit in WrapInNameSpace 102 | // c++, java and csharp returns a different namespace from 103 | // the following default (no early exit, always fully qualify), 104 | // which works for js and php 105 | virtual const Namespace *CurrentNameSpace() const { return nullptr; } 106 | 107 | // Ensure that a type is prefixed with its namespace whenever it is used 108 | // outside of its namespace. 109 | std::string WrapInNameSpace(const Namespace *ns, 110 | const std::string &name) const; 111 | 112 | std::string WrapInNameSpace(const Definition &def) const; 113 | 114 | std::string GetNameSpace(const Definition &def) const; 115 | 116 | const Parser &parser_; 117 | const std::string &path_; 118 | const std::string &file_name_; 119 | const std::string qualifying_start_; 120 | const std::string qualifying_separator_; 121 | }; 122 | 123 | struct CommentConfig { 124 | const char *first_line; 125 | const char *content_line_prefix; 126 | const char *last_line; 127 | }; 128 | 129 | extern void GenComment(const std::vector &dc, 130 | std::string *code_ptr, const CommentConfig *config, 131 | const char *prefix = ""); 132 | 133 | class FloatConstantGenerator { 134 | public: 135 | virtual ~FloatConstantGenerator() {} 136 | std::string GenFloatConstant(const FieldDef &field) const; 137 | 138 | private: 139 | virtual std::string Value(double v, const std::string &src) const = 0; 140 | virtual std::string Inf(double v) const = 0; 141 | virtual std::string NaN(double v) const = 0; 142 | 143 | virtual std::string Value(float v, const std::string &src) const = 0; 144 | virtual std::string Inf(float v) const = 0; 145 | virtual std::string NaN(float v) const = 0; 146 | 147 | template 148 | std::string GenFloatConstantImpl(const FieldDef &field) const; 149 | }; 150 | 151 | class SimpleFloatConstantGenerator : public FloatConstantGenerator { 152 | public: 153 | SimpleFloatConstantGenerator(const char *nan_number, 154 | const char *pos_inf_number, 155 | const char *neg_inf_number); 156 | 157 | private: 158 | std::string Value(double v, 159 | const std::string &src) const FLATBUFFERS_OVERRIDE; 160 | std::string Inf(double v) const FLATBUFFERS_OVERRIDE; 161 | std::string NaN(double v) const FLATBUFFERS_OVERRIDE; 162 | 163 | std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE; 164 | std::string Inf(float v) const FLATBUFFERS_OVERRIDE; 165 | std::string NaN(float v) const FLATBUFFERS_OVERRIDE; 166 | 167 | const std::string nan_number_; 168 | const std::string pos_inf_number_; 169 | const std::string neg_inf_number_; 170 | }; 171 | 172 | // C++, C#, Java like generator. 173 | class TypedFloatConstantGenerator : public FloatConstantGenerator { 174 | public: 175 | TypedFloatConstantGenerator(const char *double_prefix, 176 | const char *single_prefix, const char *nan_number, 177 | const char *pos_inf_number, 178 | const char *neg_inf_number = ""); 179 | 180 | private: 181 | std::string Value(double v, 182 | const std::string &src) const FLATBUFFERS_OVERRIDE; 183 | std::string Inf(double v) const FLATBUFFERS_OVERRIDE; 184 | 185 | std::string NaN(double v) const FLATBUFFERS_OVERRIDE; 186 | 187 | std::string Value(float v, const std::string &src) const FLATBUFFERS_OVERRIDE; 188 | std::string Inf(float v) const FLATBUFFERS_OVERRIDE; 189 | std::string NaN(float v) const FLATBUFFERS_OVERRIDE; 190 | 191 | std::string MakeNaN(const std::string &prefix) const; 192 | std::string MakeInf(bool neg, const std::string &prefix) const; 193 | 194 | const std::string double_prefix_; 195 | const std::string single_prefix_; 196 | const std::string nan_number_; 197 | const std::string pos_inf_number_; 198 | const std::string neg_inf_number_; 199 | }; 200 | 201 | } // namespace flatbuffers 202 | 203 | #endif // FLATBUFFERS_CODE_GENERATORS_H_ 204 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/flatc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include "flatbuffers/flatbuffers.h" 21 | #include "flatbuffers/idl.h" 22 | #include "flatbuffers/util.h" 23 | 24 | #ifndef FLATC_H_ 25 | # define FLATC_H_ 26 | 27 | namespace flatbuffers { 28 | 29 | class FlatCompiler { 30 | public: 31 | // Output generator for the various programming languages and formats we 32 | // support. 33 | struct Generator { 34 | typedef bool (*GenerateFn)(const flatbuffers::Parser &parser, 35 | const std::string &path, 36 | const std::string &file_name); 37 | typedef std::string (*MakeRuleFn)(const flatbuffers::Parser &parser, 38 | const std::string &path, 39 | const std::string &file_name); 40 | 41 | GenerateFn generate; 42 | const char *generator_opt_short; 43 | const char *generator_opt_long; 44 | const char *lang_name; 45 | bool schema_only; 46 | GenerateFn generateGRPC; 47 | flatbuffers::IDLOptions::Language lang; 48 | const char *generator_help; 49 | MakeRuleFn make_rule; 50 | }; 51 | 52 | typedef void (*WarnFn)(const FlatCompiler *flatc, const std::string &warn, 53 | bool show_exe_name); 54 | 55 | typedef void (*ErrorFn)(const FlatCompiler *flatc, const std::string &err, 56 | bool usage, bool show_exe_name); 57 | 58 | // Parameters required to initialize the FlatCompiler. 59 | struct InitParams { 60 | InitParams() 61 | : generators(nullptr), 62 | num_generators(0), 63 | warn_fn(nullptr), 64 | error_fn(nullptr) {} 65 | 66 | const Generator *generators; 67 | size_t num_generators; 68 | WarnFn warn_fn; 69 | ErrorFn error_fn; 70 | }; 71 | 72 | explicit FlatCompiler(const InitParams ¶ms) : params_(params) {} 73 | 74 | int Compile(int argc, const char **argv); 75 | 76 | std::string GetUsageString(const char *program_name) const; 77 | 78 | private: 79 | void ParseFile(flatbuffers::Parser &parser, const std::string &filename, 80 | const std::string &contents, 81 | std::vector &include_directories) const; 82 | 83 | void LoadBinarySchema(Parser &parser, const std::string &filename, 84 | const std::string &contents); 85 | 86 | void Warn(const std::string &warn, bool show_exe_name = true) const; 87 | 88 | void Error(const std::string &err, bool usage = true, 89 | bool show_exe_name = true) const; 90 | 91 | InitParams params_; 92 | }; 93 | 94 | } // namespace flatbuffers 95 | 96 | #endif // FLATC_H_ 97 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/grpc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2014 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_GRPC_H_ 18 | #define FLATBUFFERS_GRPC_H_ 19 | 20 | // Helper functionality to glue FlatBuffers and GRPC. 21 | 22 | #include "flatbuffers/flatbuffers.h" 23 | #include "grpc++/support/byte_buffer.h" 24 | #include "grpc/byte_buffer_reader.h" 25 | 26 | namespace flatbuffers { 27 | namespace grpc { 28 | 29 | // Message is a typed wrapper around a buffer that manages the underlying 30 | // `grpc_slice` and also provides flatbuffers-specific helpers such as `Verify` 31 | // and `GetRoot`. Since it is backed by a `grpc_slice`, the underlying buffer 32 | // is refcounted and ownership is be managed automatically. 33 | template class Message { 34 | public: 35 | Message() : slice_(grpc_empty_slice()) {} 36 | 37 | Message(grpc_slice slice, bool add_ref) 38 | : slice_(add_ref ? grpc_slice_ref(slice) : slice) {} 39 | 40 | Message &operator=(const Message &other) = delete; 41 | 42 | Message(Message &&other) : slice_(other.slice_) { 43 | other.slice_ = grpc_empty_slice(); 44 | } 45 | 46 | Message(const Message &other) = delete; 47 | 48 | Message &operator=(Message &&other) { 49 | grpc_slice_unref(slice_); 50 | slice_ = other.slice_; 51 | other.slice_ = grpc_empty_slice(); 52 | return *this; 53 | } 54 | 55 | ~Message() { grpc_slice_unref(slice_); } 56 | 57 | const uint8_t *mutable_data() const { return GRPC_SLICE_START_PTR(slice_); } 58 | 59 | const uint8_t *data() const { return GRPC_SLICE_START_PTR(slice_); } 60 | 61 | size_t size() const { return GRPC_SLICE_LENGTH(slice_); } 62 | 63 | bool Verify() const { 64 | Verifier verifier(data(), size()); 65 | return verifier.VerifyBuffer(nullptr); 66 | } 67 | 68 | T *GetMutableRoot() { return flatbuffers::GetMutableRoot(mutable_data()); } 69 | 70 | const T *GetRoot() const { return flatbuffers::GetRoot(data()); } 71 | 72 | // This is only intended for serializer use, or if you know what you're doing 73 | const grpc_slice &BorrowSlice() const { return slice_; } 74 | 75 | private: 76 | grpc_slice slice_; 77 | }; 78 | 79 | class MessageBuilder; 80 | 81 | // SliceAllocator is a gRPC-specific allocator that uses the `grpc_slice` 82 | // refcounted slices to manage memory ownership. This makes it easy and 83 | // efficient to transfer buffers to gRPC. 84 | class SliceAllocator : public Allocator { 85 | public: 86 | SliceAllocator() : slice_(grpc_empty_slice()) {} 87 | 88 | SliceAllocator(const SliceAllocator &other) = delete; 89 | SliceAllocator &operator=(const SliceAllocator &other) = delete; 90 | 91 | SliceAllocator(SliceAllocator &&other) 92 | : slice_(grpc_empty_slice()) { 93 | // default-construct and swap idiom 94 | swap(other); 95 | } 96 | 97 | SliceAllocator &operator=(SliceAllocator &&other) { 98 | // move-construct and swap idiom 99 | SliceAllocator temp(std::move(other)); 100 | swap(temp); 101 | return *this; 102 | } 103 | 104 | void swap(SliceAllocator &other) { 105 | using std::swap; 106 | swap(slice_, other.slice_); 107 | } 108 | 109 | virtual ~SliceAllocator() { grpc_slice_unref(slice_); } 110 | 111 | virtual uint8_t *allocate(size_t size) override { 112 | FLATBUFFERS_ASSERT(GRPC_SLICE_IS_EMPTY(slice_)); 113 | slice_ = grpc_slice_malloc(size); 114 | return GRPC_SLICE_START_PTR(slice_); 115 | } 116 | 117 | virtual void deallocate(uint8_t *p, size_t size) override { 118 | FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_)); 119 | FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_)); 120 | grpc_slice_unref(slice_); 121 | slice_ = grpc_empty_slice(); 122 | } 123 | 124 | virtual uint8_t *reallocate_downward(uint8_t *old_p, size_t old_size, 125 | size_t new_size, size_t in_use_back, 126 | size_t in_use_front) override { 127 | FLATBUFFERS_ASSERT(old_p == GRPC_SLICE_START_PTR(slice_)); 128 | FLATBUFFERS_ASSERT(old_size == GRPC_SLICE_LENGTH(slice_)); 129 | FLATBUFFERS_ASSERT(new_size > old_size); 130 | grpc_slice old_slice = slice_; 131 | grpc_slice new_slice = grpc_slice_malloc(new_size); 132 | uint8_t *new_p = GRPC_SLICE_START_PTR(new_slice); 133 | memcpy_downward(old_p, old_size, new_p, new_size, in_use_back, 134 | in_use_front); 135 | slice_ = new_slice; 136 | grpc_slice_unref(old_slice); 137 | return new_p; 138 | } 139 | 140 | private: 141 | grpc_slice &get_slice(uint8_t *p, size_t size) { 142 | FLATBUFFERS_ASSERT(p == GRPC_SLICE_START_PTR(slice_)); 143 | FLATBUFFERS_ASSERT(size == GRPC_SLICE_LENGTH(slice_)); 144 | return slice_; 145 | } 146 | 147 | grpc_slice slice_; 148 | 149 | friend class MessageBuilder; 150 | }; 151 | 152 | // SliceAllocatorMember is a hack to ensure that the MessageBuilder's 153 | // slice_allocator_ member is constructed before the FlatBufferBuilder, since 154 | // the allocator is used in the FlatBufferBuilder ctor. 155 | namespace detail { 156 | struct SliceAllocatorMember { 157 | SliceAllocator slice_allocator_; 158 | }; 159 | } // namespace detail 160 | 161 | // MessageBuilder is a gRPC-specific FlatBufferBuilder that uses SliceAllocator 162 | // to allocate gRPC buffers. 163 | class MessageBuilder : private detail::SliceAllocatorMember, 164 | public FlatBufferBuilder { 165 | public: 166 | explicit MessageBuilder(uoffset_t initial_size = 1024) 167 | : FlatBufferBuilder(initial_size, &slice_allocator_, false) {} 168 | 169 | MessageBuilder(const MessageBuilder &other) = delete; 170 | MessageBuilder &operator=(const MessageBuilder &other) = delete; 171 | 172 | MessageBuilder(MessageBuilder &&other) 173 | : FlatBufferBuilder(1024, &slice_allocator_, false) { 174 | // Default construct and swap idiom. 175 | Swap(other); 176 | } 177 | 178 | /// Create a MessageBuilder from a FlatBufferBuilder. 179 | explicit MessageBuilder(FlatBufferBuilder &&src, void (*dealloc)(void*, size_t) = &DefaultAllocator::dealloc) 180 | : FlatBufferBuilder(1024, &slice_allocator_, false) { 181 | src.Swap(*this); 182 | src.SwapBufAllocator(*this); 183 | if (buf_.capacity()) { 184 | uint8_t *buf = buf_.scratch_data(); // pointer to memory 185 | size_t capacity = buf_.capacity(); // size of memory 186 | slice_allocator_.slice_ = grpc_slice_new_with_len(buf, capacity, dealloc); 187 | } 188 | else { 189 | slice_allocator_.slice_ = grpc_empty_slice(); 190 | } 191 | } 192 | 193 | /// Move-assign a FlatBufferBuilder to a MessageBuilder. 194 | /// Only FlatBufferBuilder with default allocator (basically, nullptr) is supported. 195 | MessageBuilder &operator=(FlatBufferBuilder &&src) { 196 | // Move construct a temporary and swap 197 | MessageBuilder temp(std::move(src)); 198 | Swap(temp); 199 | return *this; 200 | } 201 | 202 | MessageBuilder &operator=(MessageBuilder &&other) { 203 | // Move construct a temporary and swap 204 | MessageBuilder temp(std::move(other)); 205 | Swap(temp); 206 | return *this; 207 | } 208 | 209 | void Swap(MessageBuilder &other) { 210 | slice_allocator_.swap(other.slice_allocator_); 211 | FlatBufferBuilder::Swap(other); 212 | // After swapping the FlatBufferBuilder, we swap back the allocator, which restores 213 | // the original allocator back in place. This is necessary because MessageBuilder's 214 | // allocator is its own member (SliceAllocatorMember). The allocator passed to 215 | // FlatBufferBuilder::vector_downward must point to this member. 216 | buf_.swap_allocator(other.buf_); 217 | } 218 | 219 | // Releases the ownership of the buffer pointer. 220 | // Returns the size, offset, and the original grpc_slice that 221 | // allocated the buffer. Also see grpc_slice_unref(). 222 | uint8_t *ReleaseRaw(size_t &size, size_t &offset, grpc_slice &slice) { 223 | uint8_t *buf = FlatBufferBuilder::ReleaseRaw(size, offset); 224 | slice = slice_allocator_.slice_; 225 | slice_allocator_.slice_ = grpc_empty_slice(); 226 | return buf; 227 | } 228 | 229 | ~MessageBuilder() {} 230 | 231 | // GetMessage extracts the subslice of the buffer corresponding to the 232 | // flatbuffers-encoded region and wraps it in a `Message` to handle buffer 233 | // ownership. 234 | template Message GetMessage() { 235 | auto buf_data = buf_.scratch_data(); // pointer to memory 236 | auto buf_size = buf_.capacity(); // size of memory 237 | auto msg_data = buf_.data(); // pointer to msg 238 | auto msg_size = buf_.size(); // size of msg 239 | // Do some sanity checks on data/size 240 | FLATBUFFERS_ASSERT(msg_data); 241 | FLATBUFFERS_ASSERT(msg_size); 242 | FLATBUFFERS_ASSERT(msg_data >= buf_data); 243 | FLATBUFFERS_ASSERT(msg_data + msg_size <= buf_data + buf_size); 244 | // Calculate offsets from the buffer start 245 | auto begin = msg_data - buf_data; 246 | auto end = begin + msg_size; 247 | // Get the slice we are working with (no refcount change) 248 | grpc_slice slice = slice_allocator_.get_slice(buf_data, buf_size); 249 | // Extract a subslice of the existing slice (increment refcount) 250 | grpc_slice subslice = grpc_slice_sub(slice, begin, end); 251 | // Wrap the subslice in a `Message`, but don't increment refcount 252 | Message msg(subslice, false); 253 | return msg; 254 | } 255 | 256 | template Message ReleaseMessage() { 257 | Message msg = GetMessage(); 258 | Reset(); 259 | return msg; 260 | } 261 | 262 | private: 263 | // SliceAllocator slice_allocator_; // part of SliceAllocatorMember 264 | }; 265 | 266 | } // namespace grpc 267 | } // namespace flatbuffers 268 | 269 | namespace grpc { 270 | 271 | template class SerializationTraits> { 272 | public: 273 | static grpc::Status Serialize(const flatbuffers::grpc::Message &msg, 274 | grpc_byte_buffer **buffer, bool *own_buffer) { 275 | // We are passed in a `Message`, which is a wrapper around a 276 | // `grpc_slice`. We extract it here using `BorrowSlice()`. The const cast 277 | // is necesary because the `grpc_raw_byte_buffer_create` func expects 278 | // non-const slices in order to increment their refcounts. 279 | grpc_slice *slice = const_cast(&msg.BorrowSlice()); 280 | // Now use `grpc_raw_byte_buffer_create` to package the single slice into a 281 | // `grpc_byte_buffer`, incrementing the refcount in the process. 282 | *buffer = grpc_raw_byte_buffer_create(slice, 1); 283 | *own_buffer = true; 284 | return grpc::Status::OK; 285 | } 286 | 287 | // Deserialize by pulling the 288 | static grpc::Status Deserialize(grpc_byte_buffer *buffer, 289 | flatbuffers::grpc::Message *msg) { 290 | if (!buffer) { 291 | return ::grpc::Status(::grpc::StatusCode::INTERNAL, "No payload"); 292 | } 293 | // Check if this is a single uncompressed slice. 294 | if ((buffer->type == GRPC_BB_RAW) && 295 | (buffer->data.raw.compression == GRPC_COMPRESS_NONE) && 296 | (buffer->data.raw.slice_buffer.count == 1)) { 297 | // If it is, then we can reference the `grpc_slice` directly. 298 | grpc_slice slice = buffer->data.raw.slice_buffer.slices[0]; 299 | // We wrap a `Message` around the slice, incrementing the refcount. 300 | *msg = flatbuffers::grpc::Message(slice, true); 301 | } else { 302 | // Otherwise, we need to use `grpc_byte_buffer_reader_readall` to read 303 | // `buffer` into a single contiguous `grpc_slice`. The gRPC reader gives 304 | // us back a new slice with the refcount already incremented. 305 | grpc_byte_buffer_reader reader; 306 | grpc_byte_buffer_reader_init(&reader, buffer); 307 | grpc_slice slice = grpc_byte_buffer_reader_readall(&reader); 308 | grpc_byte_buffer_reader_destroy(&reader); 309 | // We wrap a `Message` around the slice, but dont increment refcount 310 | *msg = flatbuffers::grpc::Message(slice, false); 311 | } 312 | grpc_byte_buffer_destroy(buffer); 313 | #if FLATBUFFERS_GRPC_DISABLE_AUTO_VERIFICATION 314 | return ::grpc::Status::OK; 315 | #else 316 | if (msg->Verify()) { 317 | return ::grpc::Status::OK; 318 | } else { 319 | return ::grpc::Status(::grpc::StatusCode::INTERNAL, 320 | "Message verification failed"); 321 | } 322 | #endif 323 | } 324 | }; 325 | 326 | } // namespace grpc 327 | 328 | #endif // FLATBUFFERS_GRPC_H_ 329 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/hash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_HASH_H_ 18 | #define FLATBUFFERS_HASH_H_ 19 | 20 | #include 21 | #include 22 | 23 | #include "flatbuffers/flatbuffers.h" 24 | 25 | namespace flatbuffers { 26 | 27 | template struct FnvTraits { 28 | static const T kFnvPrime; 29 | static const T kOffsetBasis; 30 | }; 31 | 32 | template<> struct FnvTraits { 33 | static const uint32_t kFnvPrime = 0x01000193; 34 | static const uint32_t kOffsetBasis = 0x811C9DC5; 35 | }; 36 | 37 | template<> struct FnvTraits { 38 | static const uint64_t kFnvPrime = 0x00000100000001b3ULL; 39 | static const uint64_t kOffsetBasis = 0xcbf29ce484222645ULL; 40 | }; 41 | 42 | template FLATBUFFERS_CONSTEXPR_CPP14 T HashFnv1(const char *input) { 43 | T hash = FnvTraits::kOffsetBasis; 44 | for (const char *c = input; *c; ++c) { 45 | hash *= FnvTraits::kFnvPrime; 46 | hash ^= static_cast(*c); 47 | } 48 | return hash; 49 | } 50 | 51 | template FLATBUFFERS_CONSTEXPR_CPP14 T HashFnv1a(const char *input) { 52 | T hash = FnvTraits::kOffsetBasis; 53 | for (const char *c = input; *c; ++c) { 54 | hash ^= static_cast(*c); 55 | hash *= FnvTraits::kFnvPrime; 56 | } 57 | return hash; 58 | } 59 | 60 | template <> FLATBUFFERS_CONSTEXPR_CPP14 inline uint16_t HashFnv1(const char *input) { 61 | uint32_t hash = HashFnv1(input); 62 | return (hash >> 16) ^ (hash & 0xffff); 63 | } 64 | 65 | template <> FLATBUFFERS_CONSTEXPR_CPP14 inline uint16_t HashFnv1a(const char *input) { 66 | uint32_t hash = HashFnv1a(input); 67 | return (hash >> 16) ^ (hash & 0xffff); 68 | } 69 | 70 | template struct NamedHashFunction { 71 | const char *name; 72 | 73 | typedef T (*HashFunction)(const char *); 74 | HashFunction function; 75 | }; 76 | 77 | const NamedHashFunction kHashFunctions16[] = { 78 | { "fnv1_16", HashFnv1 }, 79 | { "fnv1a_16", HashFnv1a }, 80 | }; 81 | 82 | const NamedHashFunction kHashFunctions32[] = { 83 | { "fnv1_32", HashFnv1 }, 84 | { "fnv1a_32", HashFnv1a }, 85 | }; 86 | 87 | const NamedHashFunction kHashFunctions64[] = { 88 | { "fnv1_64", HashFnv1 }, 89 | { "fnv1a_64", HashFnv1a }, 90 | }; 91 | 92 | inline NamedHashFunction::HashFunction FindHashFunction16( 93 | const char *name) { 94 | std::size_t size = sizeof(kHashFunctions16) / sizeof(kHashFunctions16[0]); 95 | for (std::size_t i = 0; i < size; ++i) { 96 | if (std::strcmp(name, kHashFunctions16[i].name) == 0) { 97 | return kHashFunctions16[i].function; 98 | } 99 | } 100 | return nullptr; 101 | } 102 | 103 | inline NamedHashFunction::HashFunction FindHashFunction32( 104 | const char *name) { 105 | std::size_t size = sizeof(kHashFunctions32) / sizeof(kHashFunctions32[0]); 106 | for (std::size_t i = 0; i < size; ++i) { 107 | if (std::strcmp(name, kHashFunctions32[i].name) == 0) { 108 | return kHashFunctions32[i].function; 109 | } 110 | } 111 | return nullptr; 112 | } 113 | 114 | inline NamedHashFunction::HashFunction FindHashFunction64( 115 | const char *name) { 116 | std::size_t size = sizeof(kHashFunctions64) / sizeof(kHashFunctions64[0]); 117 | for (std::size_t i = 0; i < size; ++i) { 118 | if (std::strcmp(name, kHashFunctions64[i].name) == 0) { 119 | return kHashFunctions64[i].function; 120 | } 121 | } 122 | return nullptr; 123 | } 124 | 125 | } // namespace flatbuffers 126 | 127 | #endif // FLATBUFFERS_HASH_H_ 128 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/registry.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_REGISTRY_H_ 18 | #define FLATBUFFERS_REGISTRY_H_ 19 | 20 | #include "flatbuffers/idl.h" 21 | 22 | namespace flatbuffers { 23 | 24 | // Convenience class to easily parse or generate text for arbitrary FlatBuffers. 25 | // Simply pre-populate it with all schema filenames that may be in use, and 26 | // This class will look them up using the file_identifier declared in the 27 | // schema. 28 | class Registry { 29 | public: 30 | // Call this for all schemas that may be in use. The identifier has 31 | // a function in the generated code, e.g. MonsterIdentifier(). 32 | void Register(const char *file_identifier, const char *schema_path) { 33 | Schema schema; 34 | schema.path_ = schema_path; 35 | schemas_[file_identifier] = schema; 36 | } 37 | 38 | // Generate text from an arbitrary FlatBuffer by looking up its 39 | // file_identifier in the registry. 40 | bool FlatBufferToText(const uint8_t *flatbuf, size_t len, std::string *dest) { 41 | // Get the identifier out of the buffer. 42 | // If the buffer is truncated, exit. 43 | if (len < sizeof(uoffset_t) + FlatBufferBuilder::kFileIdentifierLength) { 44 | lasterror_ = "buffer truncated"; 45 | return false; 46 | } 47 | std::string ident( 48 | reinterpret_cast(flatbuf) + sizeof(uoffset_t), 49 | FlatBufferBuilder::kFileIdentifierLength); 50 | // Load and parse the schema. 51 | Parser parser; 52 | if (!LoadSchema(ident, &parser)) return false; 53 | // Now we're ready to generate text. 54 | if (!GenerateText(parser, flatbuf, dest)) { 55 | lasterror_ = "unable to generate text for FlatBuffer binary"; 56 | return false; 57 | } 58 | return true; 59 | } 60 | 61 | // Converts a binary buffer to text using one of the schemas in the registry, 62 | // use the file_identifier to indicate which. 63 | // If DetachedBuffer::data() is null then parsing failed. 64 | DetachedBuffer TextToFlatBuffer(const char *text, 65 | const char *file_identifier) { 66 | // Load and parse the schema. 67 | Parser parser; 68 | if (!LoadSchema(file_identifier, &parser)) return DetachedBuffer(); 69 | // Parse the text. 70 | if (!parser.Parse(text)) { 71 | lasterror_ = parser.error_; 72 | return DetachedBuffer(); 73 | } 74 | // We have a valid FlatBuffer. Detach it from the builder and return. 75 | return parser.builder_.Release(); 76 | } 77 | 78 | // Modify any parsing / output options used by the other functions. 79 | void SetOptions(const IDLOptions &opts) { opts_ = opts; } 80 | 81 | // If schemas used contain include statements, call this function for every 82 | // directory the parser should search them for. 83 | void AddIncludeDirectory(const char *path) { include_paths_.push_back(path); } 84 | 85 | // Returns a human readable error if any of the above functions fail. 86 | const std::string &GetLastError() { return lasterror_; } 87 | 88 | private: 89 | bool LoadSchema(const std::string &ident, Parser *parser) { 90 | // Find the schema, if not, exit. 91 | auto it = schemas_.find(ident); 92 | if (it == schemas_.end()) { 93 | // Don't attach the identifier, since it may not be human readable. 94 | lasterror_ = "identifier for this buffer not in the registry"; 95 | return false; 96 | } 97 | auto &schema = it->second; 98 | // Load the schema from disk. If not, exit. 99 | std::string schematext; 100 | if (!LoadFile(schema.path_.c_str(), false, &schematext)) { 101 | lasterror_ = "could not load schema: " + schema.path_; 102 | return false; 103 | } 104 | // Parse schema. 105 | parser->opts = opts_; 106 | if (!parser->Parse(schematext.c_str(), vector_data(include_paths_), 107 | schema.path_.c_str())) { 108 | lasterror_ = parser->error_; 109 | return false; 110 | } 111 | return true; 112 | } 113 | 114 | struct Schema { 115 | std::string path_; 116 | // TODO(wvo) optionally cache schema file or parsed schema here. 117 | }; 118 | 119 | std::string lasterror_; 120 | IDLOptions opts_; 121 | std::vector include_paths_; 122 | std::map schemas_; 123 | }; 124 | 125 | } // namespace flatbuffers 126 | 127 | #endif // FLATBUFFERS_REGISTRY_H_ 128 | -------------------------------------------------------------------------------- /MemoryMappedCollections/flatbuffers/stl_emulation.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_STL_EMULATION_H_ 18 | #define FLATBUFFERS_STL_EMULATION_H_ 19 | 20 | // clang-format off 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #if defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) 29 | #define FLATBUFFERS_CPP98_STL 30 | #endif // defined(_STLPORT_VERSION) && !defined(FLATBUFFERS_CPP98_STL) 31 | 32 | #if defined(FLATBUFFERS_CPP98_STL) 33 | #include 34 | #endif // defined(FLATBUFFERS_CPP98_STL) 35 | 36 | // Check if we can use template aliases 37 | // Not possible if Microsoft Compiler before 2012 38 | // Possible is the language feature __cpp_alias_templates is defined well 39 | // Or possible if the C++ std is C+11 or newer 40 | #if (defined(_MSC_VER) && _MSC_VER > 1700 /* MSVC2012 */) \ 41 | || (defined(__cpp_alias_templates) && __cpp_alias_templates >= 200704) \ 42 | || (defined(__cplusplus) && __cplusplus >= 201103L) 43 | #define FLATBUFFERS_TEMPLATES_ALIASES 44 | #endif 45 | 46 | // This header provides backwards compatibility for C++98 STLs like stlport. 47 | namespace flatbuffers { 48 | 49 | // Retrieve ::back() from a string in a way that is compatible with pre C++11 50 | // STLs (e.g stlport). 51 | inline char& string_back(std::string &value) { 52 | return value[value.length() - 1]; 53 | } 54 | 55 | inline char string_back(const std::string &value) { 56 | return value[value.length() - 1]; 57 | } 58 | 59 | // Helper method that retrieves ::data() from a vector in a way that is 60 | // compatible with pre C++11 STLs (e.g stlport). 61 | template inline T *vector_data(std::vector &vector) { 62 | // In some debug environments, operator[] does bounds checking, so &vector[0] 63 | // can't be used. 64 | return vector.empty() ? nullptr : &vector[0]; 65 | } 66 | 67 | template inline const T *vector_data( 68 | const std::vector &vector) { 69 | return vector.empty() ? nullptr : &vector[0]; 70 | } 71 | 72 | template 73 | inline void vector_emplace_back(std::vector *vector, V &&data) { 74 | #if defined(FLATBUFFERS_CPP98_STL) 75 | vector->push_back(data); 76 | #else 77 | vector->emplace_back(std::forward(data)); 78 | #endif // defined(FLATBUFFERS_CPP98_STL) 79 | } 80 | 81 | #ifndef FLATBUFFERS_CPP98_STL 82 | #if defined(FLATBUFFERS_TEMPLATES_ALIASES) 83 | template 84 | using numeric_limits = std::numeric_limits; 85 | #else 86 | template class numeric_limits : 87 | public std::numeric_limits {}; 88 | #endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) 89 | #else 90 | template class numeric_limits : 91 | public std::numeric_limits { 92 | public: 93 | // Android NDK fix. 94 | static T lowest() { 95 | return std::numeric_limits::min(); 96 | } 97 | }; 98 | 99 | template <> class numeric_limits : 100 | public std::numeric_limits { 101 | public: 102 | static float lowest() { return -FLT_MAX; } 103 | }; 104 | 105 | template <> class numeric_limits : 106 | public std::numeric_limits { 107 | public: 108 | static double lowest() { return -DBL_MAX; } 109 | }; 110 | 111 | template <> class numeric_limits { 112 | public: 113 | static unsigned long long min() { return 0ULL; } 114 | static unsigned long long max() { return ~0ULL; } 115 | static unsigned long long lowest() { 116 | return numeric_limits::min(); 117 | } 118 | }; 119 | 120 | template <> class numeric_limits { 121 | public: 122 | static long long min() { 123 | return static_cast(1ULL << ((sizeof(long long) << 3) - 1)); 124 | } 125 | static long long max() { 126 | return static_cast( 127 | (1ULL << ((sizeof(long long) << 3) - 1)) - 1); 128 | } 129 | static long long lowest() { 130 | return numeric_limits::min(); 131 | } 132 | }; 133 | #endif // FLATBUFFERS_CPP98_STL 134 | 135 | #if defined(FLATBUFFERS_TEMPLATES_ALIASES) 136 | #ifndef FLATBUFFERS_CPP98_STL 137 | template using is_scalar = std::is_scalar; 138 | template using is_same = std::is_same; 139 | template using is_floating_point = std::is_floating_point; 140 | template using is_unsigned = std::is_unsigned; 141 | template using make_unsigned = std::make_unsigned; 142 | #else 143 | // Map C++ TR1 templates defined by stlport. 144 | template using is_scalar = std::tr1::is_scalar; 145 | template using is_same = std::tr1::is_same; 146 | template using is_floating_point = 147 | std::tr1::is_floating_point; 148 | template using is_unsigned = std::tr1::is_unsigned; 149 | // Android NDK doesn't have std::make_unsigned or std::tr1::make_unsigned. 150 | template struct make_unsigned { 151 | static_assert(is_unsigned::value, "Specialization not implemented!"); 152 | using type = T; 153 | }; 154 | template<> struct make_unsigned { using type = unsigned char; }; 155 | template<> struct make_unsigned { using type = unsigned short; }; 156 | template<> struct make_unsigned { using type = unsigned int; }; 157 | template<> struct make_unsigned { using type = unsigned long; }; 158 | template<> 159 | struct make_unsigned { using type = unsigned long long; }; 160 | #endif // !FLATBUFFERS_CPP98_STL 161 | #else 162 | // MSVC 2010 doesn't support C++11 aliases. 163 | template struct is_scalar : public std::is_scalar {}; 164 | template struct is_same : public std::is_same {}; 165 | template struct is_floating_point : 166 | public std::is_floating_point {}; 167 | template struct is_unsigned : public std::is_unsigned {}; 168 | template struct make_unsigned : public std::make_unsigned {}; 169 | #endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) 170 | 171 | #ifndef FLATBUFFERS_CPP98_STL 172 | #if defined(FLATBUFFERS_TEMPLATES_ALIASES) 173 | template using unique_ptr = std::unique_ptr; 174 | #else 175 | // MSVC 2010 doesn't support C++11 aliases. 176 | // We're manually "aliasing" the class here as we want to bring unique_ptr 177 | // into the flatbuffers namespace. We have unique_ptr in the flatbuffers 178 | // namespace we have a completely independent implemenation (see below) 179 | // for C++98 STL implementations. 180 | template class unique_ptr : public std::unique_ptr { 181 | public: 182 | unique_ptr() {} 183 | explicit unique_ptr(T* p) : std::unique_ptr(p) {} 184 | unique_ptr(std::unique_ptr&& u) { *this = std::move(u); } 185 | unique_ptr(unique_ptr&& u) { *this = std::move(u); } 186 | unique_ptr& operator=(std::unique_ptr&& u) { 187 | std::unique_ptr::reset(u.release()); 188 | return *this; 189 | } 190 | unique_ptr& operator=(unique_ptr&& u) { 191 | std::unique_ptr::reset(u.release()); 192 | return *this; 193 | } 194 | unique_ptr& operator=(T* p) { 195 | return std::unique_ptr::operator=(p); 196 | } 197 | }; 198 | #endif // defined(FLATBUFFERS_TEMPLATES_ALIASES) 199 | #else 200 | // Very limited implementation of unique_ptr. 201 | // This is provided simply to allow the C++ code generated from the default 202 | // settings to function in C++98 environments with no modifications. 203 | template class unique_ptr { 204 | public: 205 | typedef T element_type; 206 | 207 | unique_ptr() : ptr_(nullptr) {} 208 | explicit unique_ptr(T* p) : ptr_(p) {} 209 | unique_ptr(unique_ptr&& u) : ptr_(nullptr) { reset(u.release()); } 210 | unique_ptr(const unique_ptr& u) : ptr_(nullptr) { 211 | reset(const_cast(&u)->release()); 212 | } 213 | ~unique_ptr() { reset(); } 214 | 215 | unique_ptr& operator=(const unique_ptr& u) { 216 | reset(const_cast(&u)->release()); 217 | return *this; 218 | } 219 | 220 | unique_ptr& operator=(unique_ptr&& u) { 221 | reset(u.release()); 222 | return *this; 223 | } 224 | 225 | unique_ptr& operator=(T* p) { 226 | reset(p); 227 | return *this; 228 | } 229 | 230 | const T& operator*() const { return *ptr_; } 231 | T* operator->() const { return ptr_; } 232 | T* get() const noexcept { return ptr_; } 233 | explicit operator bool() const { return ptr_ != nullptr; } 234 | 235 | // modifiers 236 | T* release() { 237 | T* value = ptr_; 238 | ptr_ = nullptr; 239 | return value; 240 | } 241 | 242 | void reset(T* p = nullptr) { 243 | T* value = ptr_; 244 | ptr_ = p; 245 | if (value) delete value; 246 | } 247 | 248 | void swap(unique_ptr& u) { 249 | T* temp_ptr = ptr_; 250 | ptr_ = u.ptr_; 251 | u.ptr_ = temp_ptr; 252 | } 253 | 254 | private: 255 | T* ptr_; 256 | }; 257 | 258 | template bool operator==(const unique_ptr& x, 259 | const unique_ptr& y) { 260 | return x.get() == y.get(); 261 | } 262 | 263 | template bool operator==(const unique_ptr& x, 264 | const D* y) { 265 | return static_cast(x.get()) == y; 266 | } 267 | 268 | template bool operator==(const unique_ptr& x, intptr_t y) { 269 | return reinterpret_cast(x.get()) == y; 270 | } 271 | #endif // !FLATBUFFERS_CPP98_STL 272 | 273 | } // namespace flatbuffers 274 | 275 | #endif // FLATBUFFERS_STL_EMULATION_H_ 276 | -------------------------------------------------------------------------------- /MemoryMappedCollections/schema.fbs: -------------------------------------------------------------------------------- 1 | // Flatbuffers schema 2 | // Compile this file with 3 | // > flatc --cpp schema.fbs 4 | // For more info look at Flatbuffers docs: https://google.github.io/flatbuffers/flatbuffers_guide_tutorial.html 5 | // 6 | 7 | namespace flatcollections; 8 | 9 | // Dictionary [String: Int] 10 | table StringIntDictionary { 11 | entries:[StringIntDictionaryEntry]; 12 | } 13 | 14 | table StringIntDictionaryEntry { 15 | key:string (key); 16 | value:int64; 17 | } 18 | 19 | root_type StringIntDictionary; 20 | -------------------------------------------------------------------------------- /MemoryMappedCollections/schema_generated.h: -------------------------------------------------------------------------------- 1 | // automatically generated by the FlatBuffers compiler, do not modify 2 | 3 | 4 | #ifndef FLATBUFFERS_GENERATED_SCHEMA_FLATCOLLECTIONS_H_ 5 | #define FLATBUFFERS_GENERATED_SCHEMA_FLATCOLLECTIONS_H_ 6 | 7 | #include "flatbuffers/flatbuffers.h" 8 | 9 | namespace flatcollections { 10 | 11 | struct StringIntDictionary; 12 | 13 | struct StringIntDictionaryEntry; 14 | 15 | struct StringIntDictionary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 16 | enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { 17 | VT_ENTRIES = 4 18 | }; 19 | const flatbuffers::Vector> *entries() const { 20 | return GetPointer> *>(VT_ENTRIES); 21 | } 22 | bool Verify(flatbuffers::Verifier &verifier) const { 23 | return VerifyTableStart(verifier) && 24 | VerifyOffset(verifier, VT_ENTRIES) && 25 | verifier.VerifyVector(entries()) && 26 | verifier.VerifyVectorOfTables(entries()) && 27 | verifier.EndTable(); 28 | } 29 | }; 30 | 31 | struct StringIntDictionaryBuilder { 32 | flatbuffers::FlatBufferBuilder &fbb_; 33 | flatbuffers::uoffset_t start_; 34 | void add_entries(flatbuffers::Offset>> entries) { 35 | fbb_.AddOffset(StringIntDictionary::VT_ENTRIES, entries); 36 | } 37 | explicit StringIntDictionaryBuilder(flatbuffers::FlatBufferBuilder &_fbb) 38 | : fbb_(_fbb) { 39 | start_ = fbb_.StartTable(); 40 | } 41 | StringIntDictionaryBuilder &operator=(const StringIntDictionaryBuilder &); 42 | flatbuffers::Offset Finish() { 43 | const auto end = fbb_.EndTable(start_); 44 | auto o = flatbuffers::Offset(end); 45 | return o; 46 | } 47 | }; 48 | 49 | inline flatbuffers::Offset CreateStringIntDictionary( 50 | flatbuffers::FlatBufferBuilder &_fbb, 51 | flatbuffers::Offset>> entries = 0) { 52 | StringIntDictionaryBuilder builder_(_fbb); 53 | builder_.add_entries(entries); 54 | return builder_.Finish(); 55 | } 56 | 57 | inline flatbuffers::Offset CreateStringIntDictionaryDirect( 58 | flatbuffers::FlatBufferBuilder &_fbb, 59 | const std::vector> *entries = nullptr) { 60 | auto entries__ = entries ? _fbb.CreateVector>(*entries) : 0; 61 | return flatcollections::CreateStringIntDictionary( 62 | _fbb, 63 | entries__); 64 | } 65 | 66 | struct StringIntDictionaryEntry FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { 67 | enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { 68 | VT_KEY = 4, 69 | VT_VALUE = 6 70 | }; 71 | const flatbuffers::String *key() const { 72 | return GetPointer(VT_KEY); 73 | } 74 | bool KeyCompareLessThan(const StringIntDictionaryEntry *o) const { 75 | return *key() < *o->key(); 76 | } 77 | int KeyCompareWithValue(const char *val) const { 78 | return strcmp(key()->c_str(), val); 79 | } 80 | int64_t value() const { 81 | return GetField(VT_VALUE, 0); 82 | } 83 | bool Verify(flatbuffers::Verifier &verifier) const { 84 | return VerifyTableStart(verifier) && 85 | VerifyOffsetRequired(verifier, VT_KEY) && 86 | verifier.VerifyString(key()) && 87 | VerifyField(verifier, VT_VALUE) && 88 | verifier.EndTable(); 89 | } 90 | }; 91 | 92 | struct StringIntDictionaryEntryBuilder { 93 | flatbuffers::FlatBufferBuilder &fbb_; 94 | flatbuffers::uoffset_t start_; 95 | void add_key(flatbuffers::Offset key) { 96 | fbb_.AddOffset(StringIntDictionaryEntry::VT_KEY, key); 97 | } 98 | void add_value(int64_t value) { 99 | fbb_.AddElement(StringIntDictionaryEntry::VT_VALUE, value, 0); 100 | } 101 | explicit StringIntDictionaryEntryBuilder(flatbuffers::FlatBufferBuilder &_fbb) 102 | : fbb_(_fbb) { 103 | start_ = fbb_.StartTable(); 104 | } 105 | StringIntDictionaryEntryBuilder &operator=(const StringIntDictionaryEntryBuilder &); 106 | flatbuffers::Offset Finish() { 107 | const auto end = fbb_.EndTable(start_); 108 | auto o = flatbuffers::Offset(end); 109 | fbb_.Required(o, StringIntDictionaryEntry::VT_KEY); 110 | return o; 111 | } 112 | }; 113 | 114 | inline flatbuffers::Offset CreateStringIntDictionaryEntry( 115 | flatbuffers::FlatBufferBuilder &_fbb, 116 | flatbuffers::Offset key = 0, 117 | int64_t value = 0) { 118 | StringIntDictionaryEntryBuilder builder_(_fbb); 119 | builder_.add_value(value); 120 | builder_.add_key(key); 121 | return builder_.Finish(); 122 | } 123 | 124 | inline flatbuffers::Offset CreateStringIntDictionaryEntryDirect( 125 | flatbuffers::FlatBufferBuilder &_fbb, 126 | const char *key = nullptr, 127 | int64_t value = 0) { 128 | auto key__ = key ? _fbb.CreateString(key) : 0; 129 | return flatcollections::CreateStringIntDictionaryEntry( 130 | _fbb, 131 | key__, 132 | value); 133 | } 134 | 135 | inline const flatcollections::StringIntDictionary *GetStringIntDictionary(const void *buf) { 136 | return flatbuffers::GetRoot(buf); 137 | } 138 | 139 | inline const flatcollections::StringIntDictionary *GetSizePrefixedStringIntDictionary(const void *buf) { 140 | return flatbuffers::GetSizePrefixedRoot(buf); 141 | } 142 | 143 | inline bool VerifyStringIntDictionaryBuffer( 144 | flatbuffers::Verifier &verifier) { 145 | return verifier.VerifyBuffer(nullptr); 146 | } 147 | 148 | inline bool VerifySizePrefixedStringIntDictionaryBuffer( 149 | flatbuffers::Verifier &verifier) { 150 | return verifier.VerifySizePrefixedBuffer(nullptr); 151 | } 152 | 153 | inline void FinishStringIntDictionaryBuffer( 154 | flatbuffers::FlatBufferBuilder &fbb, 155 | flatbuffers::Offset root) { 156 | fbb.Finish(root); 157 | } 158 | 159 | inline void FinishSizePrefixedStringIntDictionaryBuffer( 160 | flatbuffers::FlatBufferBuilder &fbb, 161 | flatbuffers::Offset root) { 162 | fbb.FinishSizePrefixed(root); 163 | } 164 | 165 | } // namespace flatcollections 166 | 167 | #endif // FLATBUFFERS_GENERATED_SCHEMA_FLATCOLLECTIONS_H_ 168 | -------------------------------------------------------------------------------- /MemoryMappedCollectionsMacOS/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2019 killobatt. All rights reserved. 23 | 24 | 25 | -------------------------------------------------------------------------------- /MemoryMappedCollectionsMacOS/MemoryMappedCollectionsMacOS.h: -------------------------------------------------------------------------------- 1 | // 2 | // MemoryMappedCollectionsMacOS.h 3 | // MemoryMappedCollectionsMacOS 4 | // 5 | // Created by Viacheslav Volodko on 25.07.2019. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | //! Project version number for MemoryMappedCollectionsMacOS. 12 | FOUNDATION_EXPORT double MemoryMappedCollectionsMacOSVersionNumber; 13 | 14 | //! Project version string for MemoryMappedCollectionsMacOS. 15 | FOUNDATION_EXPORT const unsigned char MemoryMappedCollectionsMacOSVersionString[]; 16 | 17 | #import 18 | #import 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /MemoryMappedCollectionsTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /MemoryMappedCollectionsTests/MMStringIntDictionaryTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MMStringIntDictionaryTests.swift 3 | // MemoryMappedCollectionsTests 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import MemoryMappedCollections 11 | 12 | class MMStringIntDictionaryTests: XCTestCase { 13 | 14 | override func setUp() { 15 | // Put setup code here. This method is called before the invocation of each test method in the class. 16 | } 17 | 18 | override func tearDown() { 19 | // Put teardown code here. This method is called after the invocation of each test method in the class. 20 | } 21 | 22 | func testBuildDictionary() { 23 | // GIVEN 24 | let testDictionary = [ 25 | "hello": 1, 26 | "world": 2, 27 | ].mapValues { NSNumber(value: $0) } 28 | 29 | // WHEN 30 | let dictionaryBuilder = MMStringIntDictionaryBuilder(dictionary: testDictionary) 31 | let data = dictionaryBuilder.serialize() 32 | 33 | // THEN 34 | let dictionary = MMStringIntDictionary(data: data) 35 | XCTAssertEqual(dictionary.int64(forKey: "hello"), 1) 36 | XCTAssertEqual(dictionary.int64(forKey: "world"), 2) 37 | XCTAssertEqual(dictionary.int64(forKey: "not_found"), Int64(NSNotFound)) 38 | } 39 | 40 | func testAllKeys() { 41 | // GIVEN 42 | let testDictionary = [ 43 | "hello": 1, 44 | "world": 2, 45 | ].mapValues { NSNumber(value: $0) } 46 | 47 | // WHEN 48 | let dictionaryBuilder = MMStringIntDictionaryBuilder(dictionary: testDictionary) 49 | let data = dictionaryBuilder.serialize() 50 | 51 | // THEN 52 | let dictionary = MMStringIntDictionary(data: data) 53 | XCTAssertEqual(dictionary.allKeys.sorted(), ["hello", "world"].sorted()) 54 | } 55 | 56 | func testBuildPerformance() { 57 | // GIVEN 58 | let testDictionary = Array(0..<100_000) 59 | .map { _ in UUID().uuidString } 60 | .reduce(into: [String: UInt32]()) { result, value in 61 | result[value] = arc4random() 62 | } 63 | .mapValues { NSNumber(value: $0) } 64 | 65 | self.measure { 66 | // MEASURE 67 | let builder = MMStringIntDictionaryBuilder(dictionary: testDictionary) 68 | _ = builder.serialize() 69 | } 70 | } 71 | 72 | func testReadAndGetPerformance() { 73 | // GIVEN 74 | let keys = Array(0..<100_000).map { _ in UUID().uuidString } 75 | let testDictionary = keys 76 | .reduce(into: [String: UInt32]()) { result, value in 77 | result[value] = arc4random() 78 | } 79 | .mapValues { NSNumber(value: $0) } 80 | 81 | let builder = MMStringIntDictionaryBuilder(dictionary: testDictionary) 82 | let data = builder.serialize() 83 | 84 | self.measure { 85 | // MEASURE 86 | let dictionary = MMStringIntDictionary(data: data) 87 | let _ = keys.map { dictionary.int64(forKey: $0) } 88 | } 89 | } 90 | 91 | func testReadPerformance() { 92 | // GIVEN 93 | let keys = Array(0..<100_000).map { _ in UUID().uuidString } 94 | let testDictionary = keys 95 | .reduce(into: [String: UInt32]()) { result, value in 96 | result[value] = arc4random() 97 | } 98 | .mapValues { NSNumber(value: $0) } 99 | 100 | let builder = MMStringIntDictionaryBuilder(dictionary: testDictionary) 101 | let data = builder.serialize() 102 | let dictionary = MMStringIntDictionary(data: data) 103 | 104 | self.measure { 105 | // MEASURE 106 | let _ = keys.map { dictionary.int64(forKey: $0) } 107 | } 108 | } 109 | 110 | func testDictionaryComparePerformance() { 111 | // GIVEN 112 | let keys = Array(0..<100_000).map { _ in UUID().uuidString } 113 | let testDictionary = keys 114 | .reduce(into: [String: UInt32]()) { result, value in 115 | result[value] = arc4random() 116 | } 117 | .mapValues { NSNumber(value: $0) } 118 | 119 | self.measure { 120 | // MEASURE 121 | let _ = keys.map { testDictionary[$0] } 122 | } 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /MessageFilteringApp/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // MessageFilteringApp 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | var window: UIWindow? 15 | 16 | 17 | func application(_ application: UIApplication, 18 | didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 19 | return true 20 | } 21 | 22 | func applicationWillResignActive(_ application: UIApplication) { } 23 | 24 | func applicationDidEnterBackground(_ application: UIApplication) { } 25 | 26 | func applicationWillEnterForeground(_ application: UIApplication) { } 27 | 28 | func applicationDidBecomeActive(_ application: UIApplication) { } 29 | 30 | func applicationWillTerminate(_ application: UIApplication) { } 31 | 32 | 33 | } 34 | 35 | -------------------------------------------------------------------------------- /MessageFilteringApp/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "size" : "1024x1024", 91 | "scale" : "1x" 92 | } 93 | ], 94 | "info" : { 95 | "version" : 1, 96 | "author" : "xcode" 97 | } 98 | } -------------------------------------------------------------------------------- /MessageFilteringApp/Assets.xcassets/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "info" : { 3 | "version" : 1, 4 | "author" : "xcode" 5 | } 6 | } -------------------------------------------------------------------------------- /MessageFilteringApp/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /MessageFilteringApp/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 63 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /MessageFilteringApp/ClassifiersComparison/ClassificationComparisonModel.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClassificationComparisonModel.swift 3 | // MessageFilteringApp 4 | // 5 | // Created by Viacheslav Volodko on 3/1/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import TextClassification 10 | import CoreML 11 | 12 | struct ClassificationResult { 13 | var classifierName: String 14 | var resultLabel: String 15 | } 16 | 17 | class ClassificationComparisonModel { 18 | private(set) var classifiers: [NamedTextClassifier] 19 | 20 | init() { 21 | // let languageRecognizerClassifier = LanguageRecognizerClassifier() 22 | // let coreMLClassifier = CoreMLLanguageClassifier() 23 | // let naiveBayesClassifier = NaiveBayesClassifier.loadClassifier() 24 | let memoryMappedNaiveBayes = MemoryMappedNaiveBayesClassifier.loadClassifier() 25 | self.classifiers = [ 26 | // languageRecognizerClassifier, 27 | // coreMLClassifier, 28 | // naiveBayesClassifier, 29 | memoryMappedNaiveBayes, 30 | ] 31 | } 32 | 33 | func comparePrediction(for text: String) -> [ClassificationResult] { 34 | return classifiers.map { ClassificationResult(classifierName: $0.name, 35 | resultLabel: $0.predictedLabel(for: text) ?? "unknown") } 36 | } 37 | } 38 | 39 | protocol NamedTextClassifier: TextClassifier { 40 | var name: String { get } 41 | } 42 | 43 | extension LanguageRecognizerClassifier: NamedTextClassifier { 44 | var name: String { 45 | return "NLLanguageRecognizer" 46 | } 47 | } 48 | 49 | extension CoreMLLanguageClassifier: NamedTextClassifier { 50 | var name: String { 51 | return "Core ML" 52 | } 53 | 54 | func predictedLabel(for text: String) -> String? { 55 | let prediction = try? self.prediction(text: text) 56 | return prediction?.label 57 | } 58 | } 59 | 60 | extension NaiveBayesClassifier: NamedTextClassifier { 61 | var name: String { 62 | return "Naive Bayes" 63 | } 64 | 65 | static func loadClassifier() -> NaiveBayesClassifier { 66 | guard let url = Bundle.main.url(forResource: "NaiveBayes", withExtension: "model") else { 67 | fatalError("Missing resource file: NaiveBayes.model") 68 | } 69 | do { 70 | return try NaiveBayesClassifier(fileURL: url, preprocessor: TrivialPreprocessor()) 71 | } catch let error { 72 | fatalError("Error loading NaiveBayesClassifier from \(url):\n \(error)") 73 | } 74 | 75 | } 76 | } 77 | 78 | extension MemoryMappedNaiveBayesClassifier: NamedTextClassifier { 79 | var name: String { 80 | return "Naive Bayes + Memory mapping" 81 | } 82 | 83 | static func loadClassifier() -> MemoryMappedNaiveBayesClassifier { 84 | guard let url = Bundle.main.url(forResource: "MemoryMappedBayes", withExtension: "model") else { 85 | fatalError("Missing resource file: NaiveBayes.model") 86 | } 87 | do { 88 | return try MemoryMappedNaiveBayesClassifier(fileURL: url, preprocessor: TrivialPreprocessor()) 89 | } catch let error { 90 | fatalError("Error loading MemoryMappedNaiveBayesClassifier from \(url):\n \(error)") 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /MessageFilteringApp/ClassifiersComparison/ClassifierComparisonResultCell.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClassifierComparisonResultCell.swift 3 | // MessageFilteringApp 4 | // 5 | // Created by Viacheslav Volodko on 3/1/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | class ClassifierComparisonResultCell: UITableViewCell { 12 | 13 | // MARK: - DI 14 | 15 | var result: ClassificationResult? { 16 | didSet { 17 | updateUI() 18 | } 19 | } 20 | 21 | // MARK: - IBOutlet 22 | 23 | @IBOutlet private weak var classifierNameLabel: UILabel! 24 | @IBOutlet private weak var classificationResultLabel: UILabel! 25 | 26 | // MARK: - UITableViewCell 27 | 28 | static var reuseIdentifier: String { 29 | return "\(self)" 30 | } 31 | 32 | override func awakeFromNib() { 33 | super.awakeFromNib() 34 | updateUI() 35 | } 36 | 37 | // MARK: - Private 38 | 39 | private func updateUI() { 40 | classifierNameLabel.text = result?.classifierName ?? "--" 41 | classificationResultLabel.text = result?.resultLabel ?? "--" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /MessageFilteringApp/ClassifiersComparison/ClassifierComparisonTextInputCell.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClassifierComparisonTextInputCell.swift 3 | // MessageFilteringApp 4 | // 5 | // Created by Viacheslav Volodko on 3/1/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | protocol ClassifierComparisonTextInputCellDelegate { 12 | func textChanged(_ text: String) 13 | } 14 | 15 | class ClassifierComparisonTextInputCell: UITableViewCell { 16 | 17 | // MARK: - DI 18 | 19 | var delegate: ClassifierComparisonTextInputCellDelegate? 20 | 21 | // MARK: - IBOutlet 22 | 23 | @IBOutlet private weak var textView: UITextView! 24 | 25 | // MARK: - UITableViewCell 26 | 27 | static var reuseIdentifier: String { 28 | return "\(self)" 29 | } 30 | 31 | override func awakeFromNib() { 32 | super.awakeFromNib() 33 | textView.delegate = self 34 | } 35 | } 36 | 37 | extension ClassifierComparisonTextInputCell: UITextViewDelegate { 38 | func textViewDidChange(_ textView: UITextView) { 39 | delegate?.textChanged(textView.text) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /MessageFilteringApp/ClassifiersComparison/ClassifiersComparisonViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ClassifiersComparisonViewController.swift 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 3/1/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | class ClassifiersComparisonViewController: UITableViewController { 12 | 13 | // MARK: - Data 14 | 15 | var model: ClassificationComparisonModel! 16 | var results: [ClassificationResult] = [] 17 | 18 | // MARK: - UIViewController 19 | 20 | override func viewDidLoad() { 21 | super.viewDidLoad() 22 | 23 | model = ClassificationComparisonModel() 24 | } 25 | 26 | // MARK: - UITableViewController 27 | 28 | override func numberOfSections(in tableView: UITableView) -> Int { 29 | return 2 30 | } 31 | 32 | override func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { 33 | if section == 0 { 34 | return 1 35 | } else { 36 | return results.count 37 | } 38 | } 39 | 40 | override func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { 41 | let cell: UITableViewCell 42 | if indexPath.section == 0 { 43 | cell = tableView.dequeueReusableCell(withIdentifier: ClassifierComparisonTextInputCell.reuseIdentifier, 44 | for: indexPath) 45 | } else { 46 | cell = tableView.dequeueReusableCell(withIdentifier: ClassifierComparisonResultCell.reuseIdentifier, 47 | for: indexPath) 48 | } 49 | 50 | if let textInputCell = cell as? ClassifierComparisonTextInputCell { 51 | textInputCell.delegate = self 52 | } else if let resultCell = cell as? ClassifierComparisonResultCell { 53 | resultCell.result = results[indexPath.row] 54 | } 55 | 56 | return cell 57 | } 58 | } 59 | 60 | 61 | extension ClassifiersComparisonViewController: ClassifierComparisonTextInputCellDelegate { 62 | func textChanged(_ text: String) { 63 | results = model.comparePrediction(for: text) 64 | tableView.reloadSections(IndexSet([1]), with: .none) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /MessageFilteringApp/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | UILaunchStoryboardName 24 | LaunchScreen 25 | UIMainStoryboardFile 26 | Main 27 | UIRequiredDeviceCapabilities 28 | 29 | armv7 30 | 31 | UISupportedInterfaceOrientations 32 | 33 | UIInterfaceOrientationPortrait 34 | UIInterfaceOrientationLandscapeLeft 35 | UIInterfaceOrientationLandscapeRight 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | UIInterfaceOrientationPortraitUpsideDown 41 | UIInterfaceOrientationLandscapeLeft 42 | UIInterfaceOrientationLandscapeRight 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /MessageFilteringExtension/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleDisplayName 8 | MessageFilteringExtension 9 | CFBundleExecutable 10 | $(EXECUTABLE_NAME) 11 | CFBundleIdentifier 12 | $(PRODUCT_BUNDLE_IDENTIFIER) 13 | CFBundleInfoDictionaryVersion 14 | 6.0 15 | CFBundleName 16 | $(PRODUCT_NAME) 17 | CFBundlePackageType 18 | XPC! 19 | CFBundleShortVersionString 20 | 1.0 21 | CFBundleVersion 22 | 1 23 | NSExtension 24 | 25 | NSExtensionAttributes 26 | 27 | ILMessageFilterExtensionNetworkURL 28 | https://www.example-sms-filter-application.com/api 29 | 30 | NSExtensionPointIdentifier 31 | com.apple.identitylookup.message-filter 32 | NSExtensionPrincipalClass 33 | $(PRODUCT_MODULE_NAME).MessageFilterExtension 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /MessageFilteringExtension/MessageFilterExtension.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MessageFilterExtension.swift 3 | // MessageFilteringExtension 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import IdentityLookup 10 | import TextClassification 11 | 12 | final class MessageFilterExtension: ILMessageFilterExtension {} 13 | 14 | extension MessageFilterExtension: ILMessageFilterQueryHandling { 15 | 16 | func handle(_ queryRequest: ILMessageFilterQueryRequest, context: ILMessageFilterExtensionContext, completion: @escaping (ILMessageFilterQueryResponse) -> Void) { 17 | let action = self.action(for: queryRequest) 18 | completion(ILMessageFilterQueryResponse(action: action)) 19 | } 20 | 21 | private func action(for queryRequest: ILMessageFilterQueryRequest) -> ILMessageFilterAction { 22 | guard let messageText = queryRequest.messageBody, 23 | let classifier = loadMemoryMappedNaiveBayesClassifier(), 24 | let label = classifier.predictedLabel(for: messageText) else { 25 | return .none 26 | } 27 | 28 | return label.contains("translit") ? .filter : .allow 29 | } 30 | 31 | private func loadClassifier() -> TextClassifier? { 32 | guard let classifierURL = Bundle(for: type(of: self)).url(forResource: "CoreMLLanguageClassifier", 33 | withExtension: "mlmodelc"), 34 | let classifier = try? CoreMLClassifier(fileURL: classifierURL) else { 35 | return nil 36 | } 37 | return classifier 38 | } 39 | 40 | private func loadLanguageRecognizerClassifier() -> LanguageRecognizerClassifier? { 41 | return LanguageRecognizerClassifier() 42 | } 43 | 44 | private func loadCoreMLClassifier() -> CoreMLClassifier? { 45 | guard let classifierURL = Bundle(for: type(of: self)).url(forResource: "CoreMLLanguageClassifier", 46 | withExtension: "mlmodelc") else { 47 | return nil 48 | } 49 | return try? CoreMLClassifier(fileURL: classifierURL) 50 | } 51 | 52 | private func loadNaiveBayesClassifier() -> NaiveBayesClassifier? { 53 | guard let url = Bundle(for: type(of: self)).url(forResource: "NaiveBayes", withExtension: "model") else { 54 | return nil 55 | } 56 | return try? NaiveBayesClassifier(fileURL: url, preprocessor: TrivialPreprocessor()) 57 | } 58 | 59 | private func loadMemoryMappedNaiveBayesClassifier() -> MemoryMappedNaiveBayesClassifier? { 60 | guard let url = Bundle(for: type(of: self)).url(forResource: "MemoryMappedBayes", withExtension: "model") else { 61 | return nil 62 | } 63 | return try? MemoryMappedNaiveBayesClassifier(fileURL: url, preprocessor: TrivialPreprocessor()) 64 | } 65 | } 66 | 67 | extension ILMessageFilterQueryResponse { 68 | convenience init(action: ILMessageFilterAction) { 69 | self.init() 70 | self.action = action 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TextClassification 2 | Text classification approaches in Swift 5.0. With/Without CoreML 3 | 4 | This repo has sample codes for tech talk "Classifying a text to iOS without CoreML: 5 | how and why?" at https://eatdog.com.ua on March 21, 2019 and on 15th CocoaHeads Kyiv https://cocoaheads.org.ua/cocoaheadskyiv/15 on July 28th, 2019. 6 | 7 | ## Getting started 8 | 9 | 1. Ensure you have carthage installed: 10 | 11 | ``` 12 | brew install carthage 13 | ``` 14 | 2. Install dependencies: 15 | 16 | ``` 17 | carthage bootstrap 18 | ``` 19 | 20 | 3. Run unit tests for `TextClassificationMacOS` target. Failing tests is expected: this wa y they display actual accuracy for classification method as an output. 21 | 22 | 4. To face **MessageFilteringExtension** RAM problem, use `CoreMLClassifier` for message filtering in `MessageFilterExtension.swift`. You will have to run this extension on real iPhone, and receive a real SMS from unknown sender to trigger the extension. Debugger works more or less fine. Changing text classifier onto `MemoryMappedNaiveBayesClassifier` demonstrates fitting into 6Mb memory limit. 23 | 24 | 6. If you wan't just to see text classification, run the **MessageFilteringApp** on either device or simulator. 25 | 26 | ## Licence 27 | 28 | This project has MIT licence. It uses Google Flatbuffer library as a dependency: https://github.com/google/flatbuffers/blob/master/LICENSE.txt 29 | -------------------------------------------------------------------------------- /TestOutput/CoreMLLanguageClassifier.mlmodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/CoreMLLanguageClassifier.mlmodel -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/de: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/de -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/en: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/en -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/info.plist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/info.plist -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/ru: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/ru -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/ru_translit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/ru_translit -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/uk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/uk -------------------------------------------------------------------------------- /TestOutput/MemoryMappedBayes.model/uk_translit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/MemoryMappedBayes.model/uk_translit -------------------------------------------------------------------------------- /TestOutput/NaiveBayes.model: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TestOutput/NaiveBayes.model -------------------------------------------------------------------------------- /TextClassification.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcbaselines/CBDFCA34222A937700C5E282.xcbaseline/50B73061-30A0-46E4-A070-D5EB7D0C8174.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | MMStringIntDictionaryTests 8 | 9 | testBuildPerformance() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 0.504 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | testDictionaryComparePerformance() 20 | 21 | com.apple.XCTPerformanceMetric_WallClockTime 22 | 23 | baselineAverage 24 | 0.048934 25 | baselineIntegrationDisplayName 26 | Local Baseline 27 | 28 | 29 | testReadAndGetPerformance() 30 | 31 | com.apple.XCTPerformanceMetric_WallClockTime 32 | 33 | baselineAverage 34 | 0.14882 35 | baselineIntegrationDisplayName 36 | Local Baseline 37 | 38 | 39 | testReadPerformance() 40 | 41 | com.apple.XCTPerformanceMetric_WallClockTime 42 | 43 | baselineAverage 44 | 0.15782 45 | baselineIntegrationDisplayName 46 | Local Baseline 47 | 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcbaselines/CBDFCA34222A937700C5E282.xcbaseline/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | runDestinationsByUUID 6 | 7 | 50B73061-30A0-46E4-A070-D5EB7D0C8174 8 | 9 | localComputer 10 | 11 | busSpeedInMHz 12 | 100 13 | cpuCount 14 | 1 15 | cpuKind 16 | Intel Core i7 17 | cpuSpeedInMHz 18 | 3100 19 | logicalCPUCoresPerPackage 20 | 8 21 | modelCode 22 | MacBookPro14,3 23 | physicalCPUCoresPerPackage 24 | 4 25 | platformIdentifier 26 | com.apple.platform.macosx 27 | 28 | targetArchitecture 29 | x86_64 30 | targetDevice 31 | 32 | modelCode 33 | iPhone11,8 34 | platformIdentifier 35 | com.apple.platform.iphonesimulator 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcbaselines/CBF4C8522225EAB000CDF6F9.xcbaseline/3405DF52-46FB-42BA-99DA-D92954CC76C1.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | AdvancedPreprocessorTests 8 | 9 | testPerformance() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 0.020202 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | 20 | MemoryMappedNaiveBayesClassifierTests 21 | 22 | testPerformance() 23 | 24 | com.apple.XCTPerformanceMetric_WallClockTime 25 | 26 | baselineAverage 27 | 0.0073586 28 | baselineIntegrationDisplayName 29 | Local Baseline 30 | 31 | 32 | 33 | NaiveBayesClassifierTests 34 | 35 | testPerformance() 36 | 37 | com.apple.XCTPerformanceMetric_WallClockTime 38 | 39 | baselineAverage 40 | 0.0018412 41 | baselineIntegrationDisplayName 42 | Local Baseline 43 | 44 | 45 | 46 | TrivialPreprocessorTests 47 | 48 | testPerformance() 49 | 50 | com.apple.XCTPerformanceMetric_WallClockTime 51 | 52 | baselineAverage 53 | 0.0072883 54 | baselineIntegrationDisplayName 55 | Local Baseline 56 | 57 | 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcbaselines/CBF4C8522225EAB000CDF6F9.xcbaseline/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | runDestinationsByUUID 6 | 7 | 3405DF52-46FB-42BA-99DA-D92954CC76C1 8 | 9 | localComputer 10 | 11 | busSpeedInMHz 12 | 100 13 | cpuCount 14 | 1 15 | cpuKind 16 | Intel Core i7 17 | cpuSpeedInMHz 18 | 3100 19 | logicalCPUCoresPerPackage 20 | 8 21 | modelCode 22 | MacBookPro14,3 23 | physicalCPUCoresPerPackage 24 | 4 25 | platformIdentifier 26 | com.apple.platform.macosx 27 | 28 | targetArchitecture 29 | x86_64 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcschemes/MemoryMappedCollections.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 44 | 45 | 47 | 53 | 54 | 55 | 56 | 57 | 63 | 64 | 65 | 66 | 67 | 68 | 78 | 79 | 85 | 86 | 87 | 88 | 89 | 90 | 96 | 97 | 103 | 104 | 105 | 106 | 108 | 109 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcschemes/MemoryMappedCollectionsMacOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 34 | 35 | 45 | 46 | 52 | 53 | 54 | 55 | 56 | 57 | 63 | 64 | 70 | 71 | 72 | 73 | 75 | 76 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcschemes/TextClassification.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 53 | 54 | 64 | 65 | 71 | 72 | 73 | 74 | 75 | 76 | 82 | 83 | 89 | 90 | 91 | 92 | 94 | 95 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcshareddata/xcschemes/TextClassificationMacOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 44 | 45 | 47 | 53 | 54 | 55 | 56 | 57 | 63 | 64 | 65 | 66 | 67 | 68 | 78 | 79 | 85 | 86 | 87 | 88 | 89 | 90 | 96 | 97 | 103 | 104 | 105 | 106 | 108 | 109 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /TextClassification.xcodeproj/xcuserdata/killobatt.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | MemoryMappedCollections.xcscheme_^#shared#^_ 8 | 9 | orderHint 10 | 2 11 | 12 | MemoryMappedCollectionsMacOS.xcscheme_^#shared#^_ 13 | 14 | orderHint 15 | 3 16 | 17 | MessageFilteringApp.xcscheme_^#shared#^_ 18 | 19 | orderHint 20 | 4 21 | 22 | MessageFilteringExtension.xcscheme_^#shared#^_ 23 | 24 | orderHint 25 | 5 26 | 27 | TextClassification.xcscheme_^#shared#^_ 28 | 29 | orderHint 30 | 1 31 | 32 | TextClassificationMacOS.xcscheme_^#shared#^_ 33 | 34 | orderHint 35 | 0 36 | 37 | 38 | SuppressBuildableAutocreation 39 | 40 | CB2ED11A22EA47AA008F0C91 41 | 42 | primary 43 | 44 | 45 | CB61FF95222304E700E21B14 46 | 47 | primary 48 | 49 | 50 | CB61FF9E222304E700E21B14 51 | 52 | primary 53 | 54 | 55 | CBDFC9C32229F15D00C5E282 56 | 57 | primary 58 | 59 | 60 | CBF4C8462225EA9800CDF6F9 61 | 62 | primary 63 | 64 | 65 | CBF4C8522225EAB000CDF6F9 66 | 67 | primary 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /TextClassification/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | 22 | 23 | -------------------------------------------------------------------------------- /TextClassification/TextClassification.h: -------------------------------------------------------------------------------- 1 | // 2 | // TextClassification.h 3 | // TextClassification 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | //! Project version number for TextClassification. 12 | FOUNDATION_EXPORT double TextClassificationVersionNumber; 13 | 14 | //! Project version string for TextClassification. 15 | FOUNDATION_EXPORT const unsigned char TextClassificationVersionString[]; 16 | 17 | // In this header, you should import all the public headers of your framework using statements like #import 18 | 19 | 20 | -------------------------------------------------------------------------------- /TextClassificationMacOS/Dataset/Dataset+MLDataTable.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Dataset+MLDataTable.swift 3 | // TextClassificationMacOS 4 | // 5 | // Created by Viacheslav Volodko on 2/26/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import CreateML 10 | 11 | extension Dataset { 12 | 13 | typealias Column = CoreMLClassifier.DataTableColumnName 14 | 15 | func mlDataTable(preprocessedUsing preprocessor: Preprocessor? = nil) -> MLDataTable { 16 | let data: [String: MLDataValueConvertible] = [ 17 | Column.id.rawValue: items.map { $0.id }, 18 | Column.text.rawValue: items.map { preprocessor?.preprocessedText(for: $0.text) ?? $0.text }, 19 | Column.label.rawValue: items.map { $0.label }, 20 | ] 21 | 22 | do { 23 | return try MLDataTable(dictionary: data) 24 | } catch let error { 25 | fatalError("Error creating data table: \(error)") 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /TextClassificationMacOS/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2019 killobatt. All rights reserved. 23 | 24 | 25 | -------------------------------------------------------------------------------- /TextClassificationMacOS/TextClassificationMacOS.h: -------------------------------------------------------------------------------- 1 | // 2 | // TextClassificationMacOS.h 3 | // TextClassificationMacOS 4 | // 5 | // Created by Viacheslav Volodko on 2/26/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | //! Project version number for TextClassificationMacOS. 12 | FOUNDATION_EXPORT double TextClassificationMacOSVersionNumber; 13 | 14 | //! Project version string for TextClassificationMacOS. 15 | FOUNDATION_EXPORT const unsigned char TextClassificationMacOSVersionString[]; 16 | 17 | // In this header, you should import all the public headers of your framework using statements like #import 18 | 19 | 20 | -------------------------------------------------------------------------------- /TextClassificationMacOS/TextClassifier/CoreMLClassifier+CreateML.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CoreMLClassifier+CreateML.swift 3 | // TextClassificationMacOS 4 | // 5 | // Created by Viacheslav Volodko on 2/26/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import CreateML 10 | 11 | extension CoreMLClassifier: TrainableTextClassifier { 12 | public static func train(with preprocessor: Preprocessor, on dataset: Dataset) -> TextClassifier { 13 | let mlClassifier = trainMLClassifier(with: preprocessor, on: dataset) 14 | return CoreMLClassifier(mlModel: mlClassifier.model) 15 | } 16 | 17 | public static func trainMLClassifier(with preprocessor: Preprocessor, on dataset: Dataset) -> MLTextClassifier { 18 | do { 19 | let trainingDataTable = dataset.mlDataTable(preprocessedUsing: preprocessor) 20 | let mlClassifier = try MLTextClassifier(trainingData: trainingDataTable, 21 | textColumn: DataTableColumnName.text.rawValue, 22 | labelColumn: DataTableColumnName.label.rawValue) 23 | return mlClassifier 24 | } catch let error { 25 | fatalError("Error training classifier: \(error)") 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | test-settings 22 | 23 | resources_path 24 | $(SRCROOT)/$(TARGET_NAME)/Resources 25 | output_path 26 | $(SRCROOT)/TestOutput 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Resources/spamer.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/killobatt/TextClassification/d7965213f18dc679cb4ced4a0b9f33406936da58/TextClassificationMacOSTests/Resources/spamer.db -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/BaseClassifierTestCase.swift: -------------------------------------------------------------------------------- 1 | // 2 | // BaseClassifierTestCase.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | 11 | class BaseClassifierTestCase: XCTestCase { 12 | 13 | var testDatasets: TestDatasets! 14 | var bundleSettings: TestBundleSettings! 15 | 16 | override func setUp() { 17 | let bundle = Bundle(for: type(of: self)) 18 | guard let settings = TestBundleSettings(bundle: bundle) else { 19 | XCTFail("Could not load bundle settings") 20 | return 21 | } 22 | bundleSettings = settings 23 | testDatasets = TestDatasets(bundleSettings: settings) 24 | } 25 | 26 | override func tearDown() { 27 | testDatasets = nil 28 | bundleSettings = nil 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Classifier/CoreMLClassifier+CreateMLTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CoreMLClassifier+CreateMLTests.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class CoreMLClassifier_CreateMLTests: BaseClassifierTestCase { 13 | 14 | override func setUp() { 15 | super.setUp() 16 | } 17 | 18 | override func tearDown() { 19 | super.tearDown() 20 | } 21 | 22 | func testTrain() { 23 | // GIVEN 24 | let preprocessor = TrivialPreprocessor() 25 | let testDataset = self.testDatasets.testDataset 26 | 27 | // WHEN 28 | let mlClassifier = CoreMLClassifier.trainMLClassifier(with: preprocessor, on: testDataset) 29 | try? mlClassifier.write(to: bundleSettings.outputURL.appendingPathComponent("CoreMLLanguageClassifier.mlmodel")) 30 | let classifier = CoreMLClassifier(mlModel: mlClassifier.model) 31 | 32 | // THEN 33 | let ukrPrediction = classifier.predictedLabel(for: "Вас вітає Славік!") 34 | XCTAssertEqual(ukrPrediction, "uk") 35 | 36 | let engPrediction = classifier.predictedLabel(for: "Welcome to paradise") 37 | XCTAssertEqual(engPrediction, "en") 38 | 39 | let dePrediction = classifier.predictedLabel(for: "Has du bist?") 40 | XCTAssertEqual(dePrediction, "de") 41 | 42 | let translitUkrPrediction = classifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 43 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 44 | 45 | XCTAssertEqual(translitUkrPrediction, "uk_translit") 46 | 47 | let translitRusPrediction = classifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 48 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 49 | XCTAssertEqual(translitRusPrediction, "ru_translit") 50 | } 51 | 52 | func testAccuracy() { 53 | // GIVEN 54 | let preprocessor = TrivialPreprocessor() 55 | let (trainDataset, testDataset) = self.testDatasets.testDataset.splitTestDataset(startPersentage: 0.8, 56 | endPersentage: 1.0) 57 | 58 | let classifier = CoreMLClassifier.train(with: preprocessor, on: trainDataset) 59 | 60 | // WHEN 61 | let testResults = classifier.test(on: testDataset) 62 | 63 | // THEN 64 | XCTAssertGreaterThan(testResults.accuracy, 1.0) 65 | } 66 | 67 | func testCrossvalidateTrivialPrepocessor() { 68 | // GIVEN 69 | let dataset = self.testDatasets.testDataset 70 | 71 | // WHEN 72 | let results = CoreMLClassifier.crossValidate(on: dataset, with: TrivialPreprocessor()) 73 | 74 | // THEN 75 | XCTAssertGreaterThan(results.accuracy, 1.0) 76 | } 77 | 78 | func testCrossvalidateAdvancedPreprocessor() { 79 | // GIVEN 80 | let dataset = self.testDatasets.testDataset 81 | 82 | // WHEN 83 | let results = CoreMLClassifier.crossValidate(on: dataset, with: AdvancedPreprocessor()) 84 | 85 | // THEN 86 | XCTAssertGreaterThan(results.accuracy, 1.0) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Classifier/LanguageRecognizerClassifierTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LanguageRecognizerClassifierTests.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class LanguageRecognizerClassifierTests: BaseClassifierTestCase { 13 | 14 | var testedClassifier: LanguageRecognizerClassifier! 15 | 16 | override func setUp() { 17 | super.setUp() 18 | testedClassifier = LanguageRecognizerClassifier() 19 | } 20 | 21 | override func tearDown() { 22 | testedClassifier = nil 23 | super.tearDown() 24 | } 25 | 26 | func testTrain() { 27 | // GIVEN 28 | 29 | // WHEN 30 | 31 | // THEN 32 | let ukrPrediction = testedClassifier.predictedLabel(for: "Вас вітає Славік!") 33 | XCTAssertEqual(ukrPrediction, "uk") 34 | 35 | let engPrediction = testedClassifier.predictedLabel(for: "Welcome to paradise") 36 | XCTAssertEqual(engPrediction, "en") 37 | 38 | let dePrediction = testedClassifier.predictedLabel(for: "Has du bist?") 39 | XCTAssertEqual(dePrediction, "de") 40 | 41 | let translitUkrPrediction = testedClassifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 42 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 43 | XCTAssertEqual(translitUkrPrediction, "uk_translit") 44 | 45 | let translitRusPrediction = testedClassifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 46 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 47 | XCTAssertEqual(translitRusPrediction, "ru_translit") 48 | } 49 | 50 | func testAccuracy() { 51 | // GIVEN 52 | let testDataset = testDatasets.testDataset 53 | 54 | // WHEN 55 | let results = testedClassifier.test(on: testDataset) 56 | 57 | // THEN 58 | XCTAssertGreaterThan(results.accuracy, 1.0) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Classifier/MemoryMappedNaiveBayesClassifierTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MemoryMappedNaiveBayesClassifierTests.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class MemoryMappedNaiveBayesClassifierTests: BaseClassifierTestCase { 13 | 14 | override func setUp() { 15 | super.setUp() 16 | } 17 | 18 | override func tearDown() { 19 | super.tearDown() 20 | } 21 | 22 | func testTrain() { 23 | // GIVEN 24 | let testDataset = self.testDatasets.testDataset 25 | 26 | // WHEN 27 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! MemoryMappedNaiveBayesClassifier 28 | try? classifier.store(toDirectory: bundleSettings.outputURL.appendingPathComponent("MemoryMappedBayes.model")) 29 | 30 | // THEN 31 | let ukrPrediction = classifier.predictedLabel(for: "Вас вітає Славік!") 32 | XCTAssertEqual(ukrPrediction, "uk") 33 | 34 | let engPrediction = classifier.predictedLabel(for: "Welcome to paradise") 35 | XCTAssertEqual(engPrediction, "en") 36 | 37 | let dePrediction = classifier.predictedLabel(for: "Has du bist?") 38 | XCTAssertEqual(dePrediction, "de") 39 | 40 | let translitUkrPrediction = classifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 41 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 42 | 43 | XCTAssertEqual(translitUkrPrediction, "uk_translit") 44 | 45 | let translitRusPrediction = classifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 46 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 47 | XCTAssertEqual(translitRusPrediction, "ru_translit") 48 | } 49 | 50 | func testAccuracy() { 51 | // GIVEN 52 | let (trainDataset, testDataset) = self.testDatasets.testDataset.splitTestDataset(startPersentage: 0.8, 53 | endPersentage: 1.0) 54 | 55 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: trainDataset) 56 | 57 | // WHEN 58 | let testResults = classifier.test(on: testDataset) 59 | 60 | // THEN 61 | XCTAssertGreaterThan(testResults.accuracy, 1.0) 62 | } 63 | 64 | func testCrossvalidateTrivialPrepocessor() { 65 | // GIVEN 66 | let dataset = self.testDatasets.testDataset 67 | 68 | // WHEN 69 | let results = MemoryMappedNaiveBayesClassifier.crossValidate(on: dataset, with: TrivialPreprocessor()) 70 | 71 | // THEN 72 | XCTAssertGreaterThan(results.accuracy, 1.0) 73 | } 74 | 75 | func testCrossvalidateAdvancedPreprocessor() { 76 | // GIVEN 77 | let dataset = self.testDatasets.testDataset 78 | 79 | // WHEN 80 | let results = MemoryMappedNaiveBayesClassifier.crossValidate(on: dataset, with: AdvancedPreprocessor()) 81 | 82 | // THEN 83 | XCTAssertGreaterThan(results.accuracy, 1.0) 84 | } 85 | 86 | 87 | func testAllLabels() { 88 | // GIVEN 89 | let testDataset = self.testDatasets.testDataset 90 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! MemoryMappedNaiveBayesClassifier 91 | 92 | // WHEN 93 | let allLabels = classifier.allLabels() 94 | 95 | // THEN 96 | XCTAssertEqual(allLabels.sorted(), ["en", "uk", "ru", "de", "uk_translit", "ru_translit"].sorted()) 97 | } 98 | 99 | func testNumberOfFeatures() { 100 | // GIVEN 101 | let testDataset = self.testDatasets.testDataset 102 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! MemoryMappedNaiveBayesClassifier 103 | 104 | // WHEN 105 | let numberOfFeatures = classifier.allLabels().reduce(into: [String: Int]()) { result, label in 106 | result[label] = classifier.numberOfFeatures(for: label) 107 | } 108 | 109 | // THEN 110 | XCTAssertEqual(numberOfFeatures, ["en": 52012, 111 | "uk": 52146, 112 | "ru": 34190, 113 | "de": 16868, 114 | "uk_translit": 5557, 115 | "ru_translit": 3025]) 116 | } 117 | 118 | func testNumberOfFeatureInLabelIndex() { 119 | // GIVEN 120 | let testDataset = self.testDatasets.testDataset 121 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! MemoryMappedNaiveBayesClassifier 122 | 123 | // WHEN 124 | let numberOfFeatures = classifier.allLabels().reduce(into: [String: Int64]()) { result, label in 125 | result[label] = classifier.featureCountInIndex(feature: "вітає", label: label) 126 | } 127 | 128 | // THEN 129 | XCTAssertEqual(numberOfFeatures, ["en": 0, 130 | "uk": 18, 131 | "ru": 0, 132 | "de": 0, 133 | "uk_translit": 0, 134 | "ru_translit": 0]) 135 | } 136 | 137 | func testProbabilityOfFeatures() { 138 | // GIVEN 139 | let testDataset = self.testDatasets.testDataset 140 | let preprocessor = TrivialPreprocessor() 141 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: preprocessor, on: testDataset) as! MemoryMappedNaiveBayesClassifier 142 | let features = preprocessor.preprocess(text: "Вас вітає Славік!") 143 | 144 | // WHEN 145 | let probabilityOfFeatures = classifier.allLabels().reduce(into: [String: Double]()) { result, label in 146 | result[label] = classifier.probability(of: features, toHaveLabel: label) 147 | } 148 | 149 | // THEN 150 | XCTAssertEqual(probabilityOfFeatures["en"]!, -39.33, accuracy: 0.01) 151 | XCTAssertEqual(probabilityOfFeatures["uk"]!, -28.46, accuracy: 0.01) 152 | XCTAssertEqual(probabilityOfFeatures["ru"]!, -33.72, accuracy: 0.01) 153 | XCTAssertEqual(probabilityOfFeatures["de"]!, -39.17, accuracy: 0.01) 154 | XCTAssertEqual(probabilityOfFeatures["uk_translit"]!, -39.72, accuracy: 0.01) 155 | XCTAssertEqual(probabilityOfFeatures["ru_translit"]!, -40.19, accuracy: 0.01) 156 | } 157 | 158 | func testPerformance() { 159 | // GIVEN 160 | let testDataset = self.testDatasets.testDataset 161 | 162 | // WHEN 163 | let classifier = MemoryMappedNaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! MemoryMappedNaiveBayesClassifier 164 | 165 | // THEN 166 | self.measure { 167 | _ = classifier.predictedLabel(for: "Вас вітає Славік!") 168 | 169 | _ = classifier.predictedLabel(for: "Welcome to paradise") 170 | 171 | _ = classifier.predictedLabel(for: "Has du bist?") 172 | 173 | _ = classifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 174 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 175 | 176 | _ = classifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 177 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Classifier/NaiveBayesClassifierTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // NaiveBayesClassifierTests.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/27/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class NaiveBayesClassifierTests: BaseClassifierTestCase { 13 | 14 | override func setUp() { 15 | super.setUp() 16 | } 17 | 18 | override func tearDown() { 19 | super.tearDown() 20 | } 21 | 22 | func testTrain() { 23 | // GIVEN 24 | let testDataset = self.testDatasets.testDataset 25 | 26 | // WHEN 27 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! NaiveBayesClassifier 28 | try? classifier.store(toFile: bundleSettings.outputURL.appendingPathComponent("NaiveBayes.model")) 29 | 30 | // THEN 31 | let ukrPrediction = classifier.predictedLabel(for: "Вас вітає Славік!") 32 | XCTAssertEqual(ukrPrediction, "uk") 33 | 34 | let engPrediction = classifier.predictedLabel(for: "Welcome to paradise") 35 | XCTAssertEqual(engPrediction, "en") 36 | 37 | let dePrediction = classifier.predictedLabel(for: "Has du bist?") 38 | XCTAssertEqual(dePrediction, "de") 39 | 40 | let translitUkrPrediction = classifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 41 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 42 | 43 | XCTAssertEqual(translitUkrPrediction, "uk_translit") 44 | 45 | let translitRusPrediction = classifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 46 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 47 | XCTAssertEqual(translitRusPrediction, "ru_translit") 48 | } 49 | 50 | func testAccuracy() { 51 | // GIVEN 52 | let (trainDataset, testDataset) = self.testDatasets.testDataset.splitTestDataset(startPersentage: 0.8, 53 | endPersentage: 1.0) 54 | 55 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: trainDataset) 56 | 57 | // WHEN 58 | let testResults = classifier.test(on: testDataset) 59 | 60 | // THEN 61 | XCTAssertGreaterThan(testResults.accuracy, 1.0) 62 | } 63 | 64 | func testCrossvalidate() { 65 | // GIVEN 66 | let dataset = self.testDatasets.testDataset 67 | 68 | // WHEN 69 | let results = NaiveBayesClassifier.crossValidate(on: dataset, with: TrivialPreprocessor()) 70 | 71 | // THEN 72 | XCTAssertGreaterThan(results.accuracy, 1.0) 73 | } 74 | 75 | func testAllLabels() { 76 | // GIVEN 77 | let testDataset = self.testDatasets.testDataset 78 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! NaiveBayesClassifier 79 | 80 | // WHEN 81 | let allLabels = classifier.allLabels() 82 | 83 | // THEN 84 | XCTAssertEqual(allLabels.sorted(), ["en", "uk", "ru", "de", "uk_translit", "ru_translit"].sorted()) 85 | } 86 | 87 | func testNumberOfFeatures() { 88 | // GIVEN 89 | let testDataset = self.testDatasets.testDataset 90 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! NaiveBayesClassifier 91 | 92 | // WHEN 93 | let numberOfFeatures = classifier.allLabels().reduce(into: [String: Int]()) { result, label in 94 | result[label] = classifier.numberOfFeatures(for: label) 95 | } 96 | 97 | // THEN 98 | XCTAssertEqual(numberOfFeatures, ["en": 52012, 99 | "uk": 52146, 100 | "ru": 34190, 101 | "de": 16868, 102 | "uk_translit": 5557, 103 | "ru_translit": 3025]) 104 | } 105 | 106 | func testNumberOfFeatureInLabelIndex() { 107 | // GIVEN 108 | let testDataset = self.testDatasets.testDataset 109 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! NaiveBayesClassifier 110 | 111 | // WHEN 112 | let numberOfFeatures = classifier.allLabels().reduce(into: [String: Int]()) { result, label in 113 | result[label] = classifier.featureCountInIndex(feature: "вітає", label: label) 114 | } 115 | 116 | // THEN 117 | XCTAssertEqual(numberOfFeatures, ["en": 0, 118 | "uk": 18, 119 | "ru": 0, 120 | "de": 0, 121 | "uk_translit": 0, 122 | "ru_translit": 0]) 123 | } 124 | 125 | func testProbabilityOfFeatures() { 126 | // GIVEN 127 | let testDataset = self.testDatasets.testDataset 128 | let preprocessor = TrivialPreprocessor() 129 | let classifier = NaiveBayesClassifier.train(with: preprocessor, on: testDataset) as! NaiveBayesClassifier 130 | let features = preprocessor.preprocess(text: "Вас вітає Славік!") 131 | 132 | // WHEN 133 | let probabilityOfFeatures = classifier.allLabels().reduce(into: [String: Double]()) { result, label in 134 | result[label] = classifier.probability(of: features, toHaveLabel: label) 135 | } 136 | 137 | // THEN 138 | XCTAssertEqual(probabilityOfFeatures["en"]!, -39.33, accuracy: 0.01) 139 | XCTAssertEqual(probabilityOfFeatures["uk"]!, -28.46, accuracy: 0.01) 140 | XCTAssertEqual(probabilityOfFeatures["ru"]!, -33.72, accuracy: 0.01) 141 | XCTAssertEqual(probabilityOfFeatures["de"]!, -39.17, accuracy: 0.01) 142 | XCTAssertEqual(probabilityOfFeatures["uk_translit"]!, -39.72, accuracy: 0.01) 143 | XCTAssertEqual(probabilityOfFeatures["ru_translit"]!, -40.19, accuracy: 0.01) 144 | } 145 | 146 | func testPerformance() { 147 | // GIVEN 148 | let testDataset = self.testDatasets.testDataset 149 | 150 | // WHEN 151 | let classifier = NaiveBayesClassifier.train(with: TrivialPreprocessor(), on: testDataset) as! NaiveBayesClassifier 152 | 153 | // THEN 154 | self.measure { 155 | _ = classifier.predictedLabel(for: "Вас вітає Славік!") 156 | 157 | _ = classifier.predictedLabel(for: "Welcome to paradise") 158 | 159 | _ = classifier.predictedLabel(for: "Has du bist?") 160 | 161 | _ = classifier.predictedLabel(for: "Vashe zamovlennya gotove. " + 162 | "Uvaga: rezultat v laboratornomu centri za predjavlennyam formy zamovlennya abo pasporta.") 163 | 164 | _ = classifier.predictedLabel(for: "Kruto! Vash zakaz oplachen. Na mail@example.com " + 165 | "otpravleny Vashy bilety. Ssylka na eti zhe bilety.") 166 | } 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Dataset/SQLiteDatasetTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SQLiteDatasetTests.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class SQLiteDatasetTests: BaseClassifierTestCase { 13 | 14 | var testedDataset: Dataset! 15 | 16 | override func setUp() { 17 | super.setUp() 18 | testedDataset = testDatasets.testDataset 19 | } 20 | 21 | override func tearDown() { 22 | testedDataset = nil 23 | bundleSettings = nil 24 | super.tearDown() 25 | } 26 | 27 | func testLoadAllMessages() { 28 | // GIVEN 29 | 30 | // WHEN 31 | let items = testedDataset.items 32 | 33 | // THEN 34 | XCTAssertEqual(items.count, 8621) 35 | } 36 | 37 | func testAllLabels() { 38 | // GIVEN 39 | 40 | // WHEN 41 | let labels = testedDataset.labels 42 | 43 | // THEN 44 | XCTAssertEqual(labels, Set(arrayLiteral: "uk", "en", "ru", "de", "uk_translit", "ru_translit")) 45 | } 46 | 47 | func testSplitDataset() { 48 | // GIVEN 49 | let fraction = 0.5 50 | 51 | // WHEN 52 | let (trainingSet, testingSet) = testedDataset.splitTestDataset(startPersentage: 1.0 - fraction, endPersentage: 1.0) 53 | 54 | // THEN 55 | for label in testedDataset.labels { 56 | let trainingItems = trainingSet.items(for: label) 57 | let testingItems = testingSet.items(for: label) 58 | XCTAssertEqual(Double(trainingItems.count), Double(testingItems.count), accuracy: 1) 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Preprocessor/AdvancedPreprocessorTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AdvancedPreprocessorTests.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class AdvancedPreprocessorTests: XCTestCase { 13 | 14 | var testedPreprocessor: Preprocessor! 15 | 16 | override func setUp() { 17 | testedPreprocessor = AdvancedPreprocessor() 18 | } 19 | 20 | override func tearDown() { 21 | testedPreprocessor = nil 22 | } 23 | 24 | func testPreprocessSimple() { 25 | // GIVEN 26 | let text = "Hello how are you doing" 27 | 28 | // WHEN 29 | let features = testedPreprocessor.preprocess(text: text) 30 | 31 | // THEN 32 | XCTAssertEqual(features, ["Hello": 1, 33 | "how": 1, 34 | "are": 1, 35 | "you": 1, 36 | "doing": 1]) 37 | } 38 | 39 | func testPreprocessPunctuation() { 40 | // GIVEN 41 | let text = "Hello, how are you doing?" 42 | 43 | // WHEN 44 | let features = testedPreprocessor.preprocess(text: text) 45 | 46 | // THEN 47 | XCTAssertEqual(features, ["Hello": 1, 48 | "how": 1, 49 | "are": 1, 50 | "you": 1, 51 | "doing": 1]) 52 | } 53 | 54 | func testPreprocessEmails() { 55 | // GIVEN 56 | let text = "Hello, how are you, jackson@example.com doing?" 57 | 58 | // WHEN 59 | let features = testedPreprocessor.preprocess(text: text) 60 | 61 | // THEN 62 | XCTAssertEqual(features, ["Hello": 1, 63 | "how": 1, 64 | "are": 1, 65 | "you": 1, 66 | "doing": 1]) 67 | } 68 | 69 | func testPreprocessLinks() { 70 | // GIVEN 71 | let text = "Hello, how are you, nshipster.com doing?" 72 | 73 | // WHEN 74 | let features = testedPreprocessor.preprocess(text: text) 75 | 76 | // THEN 77 | XCTAssertEqual(features, ["Hello": 1, 78 | "how": 1, 79 | "are": 1, 80 | "you": 1, 81 | "doing": 1]) 82 | } 83 | 84 | func testPreprocessDates() { 85 | // GIVEN 86 | let text = "Hello, how are you, 01/01/2001 doing?" 87 | 88 | // WHEN 89 | let features = testedPreprocessor.preprocess(text: text) 90 | 91 | // THEN 92 | XCTAssertEqual(features, ["Hello": 1, 93 | "how": 1, 94 | "are": 1, 95 | "you": 1, 96 | "doing": 1]) 97 | } 98 | 99 | func testPreprocessNumbers() { 100 | // GIVEN 101 | let text = "Hello, how are you, 123.50 doing 12233?" 102 | 103 | // WHEN 104 | let features = testedPreprocessor.preprocess(text: text) 105 | 106 | // THEN 107 | XCTAssertEqual(features, ["Hello": 1, 108 | "how": 1, 109 | "are": 1, 110 | "you": 1, 111 | "doing": 1]) 112 | } 113 | 114 | func testPreprocessNumbersInsideText() { 115 | // GIVEN 116 | let text = "Hello1, how1 a1re 1you, 123.50 doing 12233?" 117 | 118 | // WHEN 119 | let features = testedPreprocessor.preprocess(text: text) 120 | 121 | // THEN 122 | XCTAssertEqual(features, ["Hello": 1, 123 | "how": 1, 124 | "are": 1, 125 | "you": 1, 126 | "doing": 1]) 127 | } 128 | 129 | func testPerformance() { 130 | 131 | let loremIpsum = """ 132 | Lorem ipsum dolor sit amet, cu reque contentiones ius, ex sit mucius verear imperdiet, vel graeci scripta ad. Ad has soleat vocibus eleifend, mea no iisque appareat, eu vim possit minimum. Adversarium mediocritatem eum ut. Cum eu democritum abhorreant rationibus, an quem soluta equidem pro. Habeo dissentias mea id, mea cu ullum mentitum@example.com corrumpit. Ullum simul evertitur usu ad, eos tantas repudiare intellegat eu. 133 | 134 | Sea et suavitate expetendis. Id error neglegentur mei. Invidunt postulant at est, mei animal adolescens an, et sea luptatum quaerendum. Soleat labore audire duo ei, eam in definiebas referrentur. 135 | 136 | Et idque ancillae perfecto ius. Et erant eruditi disputationi nam. At putent eleifend pro, ad diam prompta denique mel. Mel in deterruisset vituperatoribus, vel volumus necessitatibus ex. Eu labitur insolens nec, euismod incorrupte repudiandae an est. 137 | 138 | Id simul vulputate est, in duo clita labores habemus. Pro facer tritani id, magna erant senserit eos ne. Vel prompta luptatum an, admodum erroribus referrentur est at. Sint definitionem vix ad. His ex brute docendi vivendum, epicurei consectetuer et vix. 139 | 140 | Te mea ornatus detracto, ius at odio postulant comprehensam. No mei sonet vituperata definitionem. Ut dicit utamur vivendo mel, in nullam voluptaria mei, cu graeco doctus ceteros per. Consequat temporibus sea at, sea eirmod principes cotidieque no. Ipsum voluptua temporibus eu cum, nam eu bonorum sanctus scribentur, vix ne quodsi nominavi. 141 | 142 | Id cum amet aperiri, tempor iracundia ut sit, atqui splendide contentiones at vel. His no dolor partiendo, usu id nisl meis nominati. Tempor recteque nec eu, laudem doming eu cum. Quando offendit.org.ua sadipscing mel in, vim ea purto homero placerat. Est ea harum virtute. Tale probatus et pri. 143 | 144 | Ad duo iriure viderer. Nec in fugit 131 eripuit neglegentur. Ad modo nostrum laboramus mei, mea singulis qualisque ad. Mel splendide appellantur no, sint voluptatibus nam ne, te semper tibique ius. 145 | 146 | Ei nec natum graeco facilisi, maiorum 1antiopam percipitur ex ius. No pri suas illum tincidunt. Ea eum menandri recteque, et quo doming nonumes deleniti. Ut quo vidit illum deterruisset, laudem contentiones mei cu. Movet putant repudiandae usu id, alii percipit eu sed. 147 | 148 | Duo ad soluta torquatos, quo ferri facilisis ei. 12 Sit in atqui delenit, purto integre ex usu, at cum erat zril. Causae maluisset ne mea, mei choro prodesset expetendis ea. Ne everti instructior vis, ornatus docendi eleifend ad vix. Sit an equidem singulis pertinacia, probo choro ei usu. 149 | 150 | Epicurei quaerendum ad per. Labore integre petentium usu et. An quando timeam sensibus eos, an vix quis eleifend pertinacia. Quodsi eirmod te usu, purto autem verear eos in. No scripta moderatius scribentur eos, usu ad suscipit detraxit perfecto. 151 | 152 | Qui no clita admodum, nec nusquam patrioque ut, no mei mucius ceteros dissentias. Qui at dolore propriae dissentiet, cum ne wisi soluta voluptatum, primis vocibus interesset est et. Wisi comprehensam ex vim, te usu possim cetero consectetuer. Pro elitr graece facilisi ad, eu per adhuc paulo, ius an vidisse singulis. 153 | 154 | Mea ad bonorum dolorem. Ius te quando viderer perpetua. Malis dicunt impedit in est, invenire abhorreant appellantur an has, est ea offendit theophrastus. Paulo malorum copiosae cu nec. Ei prima mucius https://eruditi.eos. Sed an eripuit ullamcorper, mea assum audire bonorum eu. 155 | 156 | Tempor dictas nostrum sit in, an pri fabellas convenire mediocrem. Cu sed phaedrum cotidieque, soluta aliquid ceteros cum an, legere discere assueverit pro ut. Ad simul molestie scriptorem sea, elit platonem eu cum. Sit cu habeo clita. Ad nibh gloriatur sed, eum populo eripuit ad. Cu wisi explicari definitiones sit, est id nonumy prompta minimum, an qui ridens blandit luptatum. An eos quot oportere, et has sensibus philosophia, ei vim mucius denique. 157 | 158 | Sale oratio voluptatum cu sed, sumo facer semper eum ei, ignota maiorum detracto ei vel. Quod elit his ut, dico aliquid mel ut. Sanctus dolorum dissentiunt mei eu, copiosae tacimates ut vis, eu erat theophrastus vel. No congue essent legendos mel, ullum commodo mei an, in pro affert utamur salutatus. Duo mandamus 123 definiebas ei, cu duo duis exerci latine. In diceret vivendo quo, ei mei verear nostrum incorrupte, et eum prima fugit. 159 | 160 | Duo sint meis omnis ei. Habeo tamquam cu ius. Consul cetero molestiae an eum, no ipsum delectus sadipscing quo. Eum an dolores disputationi. Ridens oportere concludaturque usu an, ius et wisi nostrud, ipsum errem gloriatur his ex. Nec falli quaeque lobortis cu, pro ea meliore praesent rationibus, odio erroribus dissentiet vim id. Audiam legimus id vel. 161 | 162 | Pri te mazim accommodare, 02/02/2012 nec tempor nominati disputationi ad, solet iudicabit ad pri. Pri no fugit sonet, cu purto dissentias per. Ea erant congue vel. Per nihil zril dolor eu, sed te duis eleifend temporibus. Ferri timeam tibique eu pri, per novum disputando an, mundi audire scriptorem sed ex. 163 | 164 | Pri senserit disputationi ei, quot rebum vitae cum an, quot soleat atomorum eam in. Atqui adversarium vix et, unum augue eirmod sit no. Solet indoctum ius no, est lorem deleniti honestatis ex. Assum rationibus mei te, per ne minim singulis definiebas, ex amet melius usu. Ex omnium iriure officiis per, eu dico erant probatus vix, qui et invidunt verterem liberavisse. 165 | 166 | Ad nec idque nihil, no prima populo 12 vim. Mel in erat euismod. Eu iudico principes mel. Ius ex eripuit perfecto indoctum. Has ea essent electram sadipscing, et sit intellegat mnesarchum quaerendum, his elitr utamur perpetua eu. Epicurei appetere nec ea, dicam antiopam ius an. Ut quo adipisci vulputate, cu justo pertinax definiebas est. 167 | 168 | Vix cu audiam vituperatoribus. Te unum veritus noluisse eos. In altera verterem vix. Sea et minimum sensibus, qui soluta maiorum nusquam ut, te eum eros officiis. Est ne modo euismod. 169 | 170 | Ut case debet posidonium sit, iuvaret scripserit vis ex. Pro no affert pertinax. Aeterno prodesset conclusionemque est eu. No qui sanctus accusata concludaturque. Graecis tractatos ad nam. Tation tibique assentior mel ea. 171 | 172 | Cu odio congue sed. Eu iisque interesset est, te iuvaret blandit pri, 562 mel rebum mundi necessitatibus et. Has nullam cetero perfecto ex, qui id veri modus aliquam, vel suas constituto an. Duo in invidunt reprehendunt, cu qui meliore graecis assueverit. His et viderer utroque pericula. Quo adipiscing ullamcorper in, erat prima disputationi ius ex, minimum repudiandae eum te. 173 | 174 | Vel. 175 | """ 176 | 177 | self.measure { 178 | _ = testedPreprocessor.preprocess(text: loremIpsum) 179 | } 180 | } 181 | 182 | } 183 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/TestCases/Preprocessor/TrivialPreprocessorTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TrivialPreprocessorTests.swift 3 | // TextClassificationMacOSTests 4 | // 5 | // Created by Viacheslav Volodko on 3/2/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import TextClassification 11 | 12 | class TrivialPreprocessorTests: XCTestCase { 13 | 14 | var testedPreprocessor: Preprocessor! 15 | 16 | override func setUp() { 17 | testedPreprocessor = TrivialPreprocessor() 18 | } 19 | 20 | override func tearDown() { 21 | testedPreprocessor = nil 22 | } 23 | 24 | func testPreprocessSimple() { 25 | // GIVEN 26 | let text = "Hello how are you doing" 27 | 28 | // WHEN 29 | let features = testedPreprocessor.preprocess(text: text) 30 | 31 | // THEN 32 | XCTAssertEqual(features, ["Hello": 1, 33 | "how": 1, 34 | "are": 1, 35 | "you": 1, 36 | "doing": 1]) 37 | } 38 | 39 | func testPreprocessPunctuation() { 40 | // GIVEN 41 | let text = "Hello, how are you doing?" 42 | 43 | // WHEN 44 | let features = testedPreprocessor.preprocess(text: text) 45 | 46 | // THEN 47 | XCTAssertEqual(features, ["Hello": 1, 48 | "how": 1, 49 | "are": 1, 50 | "you": 1, 51 | "doing": 1]) 52 | } 53 | 54 | func testPerformance() { 55 | 56 | let loremIpsum = """ 57 | Lorem ipsum dolor sit amet, cu reque contentiones ius, ex sit mucius verear imperdiet, vel graeci scripta ad. Ad has soleat vocibus eleifend, mea no iisque appareat, eu vim possit minimum. Adversarium mediocritatem eum ut. Cum eu democritum abhorreant rationibus, an quem soluta equidem pro. Habeo dissentias mea id, mea cu ullum mentitum corrumpit. Ullum simul evertitur usu ad, eos tantas repudiare intellegat eu. 58 | 59 | Sea et suavitate expetendis. Id error neglegentur mei. Invidunt postulant at est, mei animal adolescens an, et sea luptatum quaerendum. Soleat labore audire duo ei, eam in definiebas referrentur. 60 | 61 | Et idque ancillae perfecto ius. Et erant eruditi disputationi nam. At putent eleifend pro, ad diam prompta denique mel. Mel in deterruisset vituperatoribus, vel volumus necessitatibus ex. Eu labitur insolens nec, euismod incorrupte repudiandae an est. 62 | 63 | Id simul vulputate est, in duo clita labores habemus. Pro facer tritani id, magna erant senserit eos ne. Vel prompta luptatum an, admodum erroribus referrentur est at. Sint definitionem vix ad. His ex brute docendi vivendum, epicurei consectetuer et vix. 64 | 65 | Te mea ornatus detracto, ius at odio postulant comprehensam. No mei sonet vituperata definitionem. Ut dicit utamur vivendo mel, in nullam voluptaria mei, cu graeco doctus ceteros per. Consequat temporibus sea at, sea eirmod principes cotidieque no. Ipsum voluptua temporibus eu cum, nam eu bonorum sanctus scribentur, vix ne quodsi nominavi. 66 | 67 | Id cum amet aperiri, tempor iracundia ut sit, atqui splendide contentiones at vel. His no dolor partiendo, usu id nisl meis nominati. Tempor recteque nec eu, laudem doming eu cum. Quando offendit sadipscing mel in, vim ea purto homero placerat. Est ea harum virtute. Tale probatus et pri. 68 | 69 | Ad duo iriure viderer. Nec in fugit eripuit neglegentur. Ad modo nostrum laboramus mei, mea singulis qualisque ad. Mel splendide appellantur no, sint voluptatibus nam ne, te semper tibique ius. 70 | 71 | Ei nec natum graeco facilisi, maiorum antiopam percipitur ex ius. No pri suas illum tincidunt. Ea eum menandri recteque, et quo doming nonumes deleniti. Ut quo vidit illum deterruisset, laudem contentiones mei cu. Movet putant repudiandae usu id, alii percipit eu sed. 72 | 73 | Duo ad soluta torquatos, quo ferri facilisis ei. Sit in atqui delenit, purto integre ex usu, at cum erat zril. Causae maluisset ne mea, mei choro prodesset expetendis ea. Ne everti instructior vis, ornatus docendi eleifend ad vix. Sit an equidem singulis pertinacia, probo choro ei usu. 74 | 75 | Epicurei quaerendum ad per. Labore integre petentium usu et. An quando timeam sensibus eos, an vix quis eleifend pertinacia. Quodsi eirmod te usu, purto autem verear eos in. No scripta moderatius scribentur eos, usu ad suscipit detraxit perfecto. 76 | 77 | Qui no clita admodum, nec nusquam patrioque ut, no mei mucius ceteros dissentias. Qui at dolore propriae dissentiet, cum ne wisi soluta voluptatum, primis vocibus interesset est et. Wisi comprehensam ex vim, te usu possim cetero consectetuer. Pro elitr graece facilisi ad, eu per adhuc paulo, ius an vidisse singulis. 78 | 79 | Mea ad bonorum dolorem. Ius te quando viderer perpetua. Malis dicunt impedit in est, invenire abhorreant appellantur an has, est ea offendit theophrastus. Paulo malorum copiosae cu nec. Ei prima mucius eruditi eos. Sed an eripuit ullamcorper, mea assum audire bonorum eu. 80 | 81 | Tempor dictas nostrum sit in, an pri fabellas convenire mediocrem. Cu sed phaedrum cotidieque, soluta aliquid ceteros cum an, legere discere assueverit pro ut. Ad simul molestie scriptorem sea, elit platonem eu cum. Sit cu habeo clita. Ad nibh gloriatur sed, eum populo eripuit ad. Cu wisi explicari definitiones sit, est id nonumy prompta minimum, an qui ridens blandit luptatum. An eos quot oportere, et has sensibus philosophia, ei vim mucius denique. 82 | 83 | Sale oratio voluptatum cu sed, sumo facer semper eum ei, ignota maiorum detracto ei vel. Quod elit his ut, dico aliquid mel ut. Sanctus dolorum dissentiunt mei eu, copiosae tacimates ut vis, eu erat theophrastus vel. No congue essent legendos mel, ullum commodo mei an, in pro affert utamur salutatus. Duo mandamus definiebas ei, cu duo duis exerci latine. In diceret vivendo quo, ei mei verear nostrum incorrupte, et eum prima fugit. 84 | 85 | Duo sint meis omnis ei. Habeo tamquam cu ius. Consul cetero molestiae an eum, no ipsum delectus sadipscing quo. Eum an dolores disputationi. Ridens oportere concludaturque usu an, ius et wisi nostrud, ipsum errem gloriatur his ex. Nec falli quaeque lobortis cu, pro ea meliore praesent rationibus, odio erroribus dissentiet vim id. Audiam legimus id vel. 86 | 87 | Pri te mazim accommodare, nec tempor nominati disputationi ad, solet iudicabit ad pri. Pri no fugit sonet, cu purto dissentias per. Ea erant congue vel. Per nihil zril dolor eu, sed te duis eleifend temporibus. Ferri timeam tibique eu pri, per novum disputando an, mundi audire scriptorem sed ex. 88 | 89 | Pri senserit disputationi ei, quot rebum vitae cum an, quot soleat atomorum eam in. Atqui adversarium vix et, unum augue eirmod sit no. Solet indoctum ius no, est lorem deleniti honestatis ex. Assum rationibus mei te, per ne minim singulis definiebas, ex amet melius usu. Ex omnium iriure officiis per, eu dico erant probatus vix, qui et invidunt verterem liberavisse. 90 | 91 | Ad nec idque nihil, no prima populo vim. Mel in erat euismod. Eu iudico principes mel. Ius ex eripuit perfecto indoctum. Has ea essent electram sadipscing, et sit intellegat mnesarchum quaerendum, his elitr utamur perpetua eu. Epicurei appetere nec ea, dicam antiopam ius an. Ut quo adipisci vulputate, cu justo pertinax definiebas est. 92 | 93 | Vix cu audiam vituperatoribus. Te unum veritus noluisse eos. In altera verterem vix. Sea et minimum sensibus, qui soluta maiorum nusquam ut, te eum eros officiis. Est ne modo euismod. 94 | 95 | Ut case debet posidonium sit, iuvaret scripserit vis ex. Pro no affert pertinax. Aeterno prodesset conclusionemque est eu. No qui sanctus accusata concludaturque. Graecis tractatos ad nam. Tation tibique assentior mel ea. 96 | 97 | Cu odio congue sed. Eu iisque interesset est, te iuvaret blandit pri, mel rebum mundi necessitatibus et. Has nullam cetero perfecto ex, qui id veri modus aliquam, vel suas constituto an. Duo in invidunt reprehendunt, cu qui meliore graecis assueverit. His et viderer utroque pericula. Quo adipiscing ullamcorper in, erat prima disputationi ius ex, minimum repudiandae eum te. 98 | 99 | Vel. 100 | """ 101 | 102 | self.measure { 103 | _ = testedPreprocessor.preprocess(text: loremIpsum) 104 | } 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Tools/TestBundleSettings.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestBundleSettings.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/26/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | struct TestBundleSettings { 12 | var resourcesURL: URL 13 | var outputURL: URL 14 | 15 | init?(bundle: Bundle) { 16 | guard let settings = bundle.infoDictionary?[Keys.settings.rawValue] as? [String: String], 17 | let resourcesPath = settings[Keys.resourcesPath.rawValue], 18 | let outputPath = settings[Keys.outputPath.rawValue] else { 19 | return nil 20 | } 21 | 22 | self.resourcesURL = URL(fileURLWithPath: resourcesPath) 23 | self.outputURL = URL(fileURLWithPath: outputPath) 24 | } 25 | 26 | private enum Keys: String { 27 | case settings = "test-settings" 28 | case resourcesPath = "resources_path" 29 | case outputPath = "output_path" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Tools/TestDatasets.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestDatasets.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import TextClassification 11 | 12 | struct TestDatasets { 13 | private let bundleSettings: TestBundleSettings 14 | 15 | init(bundleSettings: TestBundleSettings) { 16 | self.bundleSettings = bundleSettings 17 | } 18 | 19 | var testDataset: Dataset { 20 | do { 21 | let databaseURL = bundleSettings.resourcesURL.appendingPathComponent("spamer.db") 22 | return try SQLiteDataset(databasePath: databaseURL.path) 23 | } catch let error { 24 | fatalError("Error creating dataset: \(error)") 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Tools/TestResults.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestResults.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | struct TestResults { 10 | var labelsTotal: Int 11 | var labelsMatched: Int 12 | var labelsUnpredicted: Int 13 | 14 | init() { 15 | labelsTotal = 0 16 | labelsMatched = 0 17 | labelsUnpredicted = 0 18 | } 19 | 20 | mutating func addResult(matched: Bool, predicted: Bool) { 21 | labelsTotal += 1 22 | labelsMatched = matched ? labelsMatched + 1 : labelsMatched 23 | labelsUnpredicted = predicted ? labelsUnpredicted : labelsUnpredicted + 1 24 | } 25 | 26 | var accuracy: Double { 27 | return Double(labelsMatched) / Double(labelsTotal) 28 | } 29 | 30 | var errorsRate: Double { 31 | return 1 - accuracy 32 | } 33 | 34 | static func + (lhs: TestResults, rhs: TestResults) -> TestResults { 35 | var result = TestResults() 36 | result.labelsTotal = lhs.labelsTotal + rhs.labelsTotal 37 | result.labelsMatched = lhs.labelsMatched + rhs.labelsMatched 38 | result.labelsUnpredicted = lhs.labelsUnpredicted + rhs.labelsUnpredicted 39 | return result 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /TextClassificationMacOSTests/Tools/TextClassifierExtensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TextClassifierExtensions.swift 3 | // TextClassificationTests 4 | // 5 | // Created by Viacheslav Volodko on 2/24/19. 6 | // Copyright © 2019 killobatt. All rights reserved. 7 | // 8 | 9 | import TextClassification 10 | 11 | extension TextClassifier { 12 | func test(on dataset: Dataset) -> TestResults { 13 | var results = TestResults() 14 | for item in dataset.items { 15 | let predictedLabel = self.predictedLabel(for: item.text) 16 | results.addResult(matched: predictedLabel == item.label, predicted: predictedLabel != nil) 17 | } 18 | return results 19 | } 20 | } 21 | 22 | extension TrainableTextClassifier { 23 | static func crossValidate(on dataset: Dataset, with preprocessor: Preprocessor) -> TestResults { 24 | let testDatasetLengthPersentage = 0.2 25 | var startTestPersentage = 0.0 26 | var results = TestResults() 27 | while startTestPersentage + testDatasetLengthPersentage <= 1 { 28 | let (trainDataset, testDataset) = dataset.splitTestDataset(startPersentage: startTestPersentage, 29 | endPersentage: startTestPersentage + testDatasetLengthPersentage) 30 | let classifier = train(with: preprocessor, on: trainDataset) 31 | results = results + classifier.test(on: testDataset) 32 | startTestPersentage += testDatasetLengthPersentage 33 | } 34 | return results 35 | } 36 | } 37 | --------------------------------------------------------------------------------