├── .gitignore ├── .gitmodules ├── AIDFD ├── pom.xml └── src │ └── main │ └── java │ ├── ch │ └── javasoft │ │ └── bitset │ │ ├── BitSetFactory.java │ │ ├── IBitSet.java │ │ ├── LongBitSet.java │ │ └── search │ │ ├── SubSetSearch.java │ │ ├── SuperSetSearch.java │ │ ├── TreeSearch.java │ │ └── tree │ │ ├── InterNode.java │ │ ├── LeafNode.java │ │ └── Node.java │ └── de │ └── metanome │ └── algorithms │ └── aidfd │ ├── AIDFD.java │ ├── helpers │ ├── ArrayIndexComparator.java │ ├── Cluster.java │ ├── FD.java │ ├── FastBloomFilter.java │ ├── Partition.java │ └── StrippedPartition.java │ └── results │ ├── CorrectnessMetanomeResultReceiver.java │ ├── MetanomeResultReceiver.java │ └── PrefixTreeResultGen.java ├── BINDER ├── BINDERAlgorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── binder │ │ │ ├── core │ │ │ └── BINDER.java │ │ │ ├── io │ │ │ ├── FileInputIterator.java │ │ │ ├── InputIterator.java │ │ │ └── SqlInputIterator.java │ │ │ ├── structures │ │ │ ├── Attribute.java │ │ │ ├── AttributeCombination.java │ │ │ ├── IntSingleLinkedList.java │ │ │ ├── Level.java │ │ │ └── PruningStatistics.java │ │ │ └── util │ │ │ └── LruCache.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── binder │ │ └── BINDERTest.java ├── BINDERDatabase │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── binder │ │ └── BINDERDatabase.java ├── BINDERFile │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── binder │ │ └── BINDERFile.java └── pom.xml ├── CFDFinder ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── cfdfinder │ │ ├── CFDFinder.java │ │ ├── Inductor.java │ │ ├── MemoryGuardian.java │ │ ├── PLICache.java │ │ ├── Sampler.java │ │ ├── Validator.java │ │ ├── expansion │ │ ├── ConstantPatternExpansionStrategy.java │ │ ├── ExpansionStrategy.java │ │ ├── PositiveAndNegativeConstantPatternExpansionStrategy.java │ │ └── RangePatternExpansionStrategy.java │ │ ├── pattern │ │ ├── ConstantPatternEntry.java │ │ ├── NegativeConstantPatternEntry.java │ │ ├── Pattern.java │ │ ├── PatternEntry.java │ │ ├── PatternTableau.java │ │ ├── RangePatternEntry.java │ │ └── VariablePatternEntry.java │ │ ├── pruning │ │ ├── LegacyPruning.java │ │ ├── PartialFdPruning.java │ │ ├── PruningStrategy.java │ │ ├── RhsFilterPruning.java │ │ └── SupportIndependentPruning.java │ │ ├── result │ │ ├── DirectOutputResultStrategy.java │ │ ├── FileResultStrategy.java │ │ ├── PruningLatticeResultStrategy.java │ │ ├── PruningLatticeToFileResultStrategy.java │ │ ├── PruningTreeResultStrategy.java │ │ ├── Result.java │ │ ├── ResultLattice.java │ │ ├── ResultStrategy.java │ │ └── ResultTree.java │ │ ├── structures │ │ ├── ClusterIdentifier.java │ │ ├── ClusterIdentifierWithRecord.java │ │ ├── ClusterTree.java │ │ ├── ClusterTreeElement.java │ │ ├── FDList.java │ │ ├── FDSet.java │ │ ├── FDTree.java │ │ ├── FDTreeElement.java │ │ ├── FDTreeElementLhsPair.java │ │ ├── IntegerPair.java │ │ ├── LhsTrie.java │ │ ├── LhsTrieElement.java │ │ ├── NonFDTree.java │ │ ├── NonFDTreeElement.java │ │ ├── PLIBuilder.java │ │ └── PositionListIndex.java │ │ └── utils │ │ ├── LhsUtils.java │ │ ├── Logger.java │ │ └── ValueComparator.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── cfdfinder │ ├── expansion │ └── ConstantPatternExpansionStrategyTest.java │ ├── pattern │ ├── PatternTest.java │ ├── UpdateCoverTest.java │ └── UpdateKeepersTest.java │ ├── result │ ├── ResultLatticeTest.java │ └── ResultTreeTest.java │ └── utils │ └── LhsUtilsTest.java ├── DVA ├── .gitignore ├── pom.xml └── src │ └── main │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dva │ ├── DVA.java │ └── DVAAlgorithm.java ├── DVAKMV ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvakmv │ │ ├── AKMV.java │ │ ├── DVAKMV.java │ │ ├── DVAKMVAlgorithm.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvakmv │ └── DVAKMVTest.java ├── DVAMS ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvams │ │ ├── AMS.java │ │ ├── DVAMS.java │ │ ├── DVAMSAlgorithm.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvams │ └── DVCAMSest.java ├── DVBJKST ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvbjkst │ │ ├── BJKST.java │ │ ├── DVBJKST.java │ │ ├── DVBJKSTAlgorithm.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvbjkst │ └── DVBJKSTest.java ├── DVBloomFilter ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvbf │ │ ├── BloomCalculations.java │ │ ├── BloomFilter.java │ │ ├── DVBloomFilter.java │ │ ├── DVBloomFilterAlgorithm.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvbf │ └── DVBFTest.java ├── DVFM ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvfm │ │ ├── DVFM.java │ │ ├── DVFMAlgorithm.java │ │ ├── FlajoletMartin.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvfm │ └── DVFMTest.java ├── DVHyperLogLog ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvhyperloglog │ │ ├── DVHyperLogLog.java │ │ ├── DVHyperLogLogAlgorithm.java │ │ ├── HyperLogLog.java │ │ ├── MurmurHash.java │ │ └── RegisterSet.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvhyperloglog │ └── DVHyperLogLogTest.java ├── DVHyperLogLogPlus ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvhyperloglogplus │ │ ├── DVHyperLogLogAlgorithmplus.java │ │ ├── DVHyperLogLogPlus.java │ │ ├── HyperLogLogPlus.java │ │ ├── MurmurHash.java │ │ ├── RegisterSet.java │ │ └── Varint.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvhyperloglog │ └── DVHyperLogLogPlusTest.java ├── DVLC ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvlc │ │ ├── DVLC.java │ │ ├── DVLCAlgorithm.java │ │ ├── LinearCounting.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvlc │ └── DVCLTest.java ├── DVLogLog ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvloglog │ │ ├── DVLogLog.java │ │ ├── DVLogLogAlgorithm.java │ │ ├── LogLog.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvloglog │ └── DVLogLogTest.java ├── DVMinCount ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvmincount │ │ ├── DVMinCount.java │ │ ├── DVMinCountAlgorithm.java │ │ ├── MinCount.java │ │ └── MurmurHash.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvmincount │ └── DVMinCountTest.java ├── DVPCSA ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvpcsa │ │ ├── DVPCSA.java │ │ ├── DVPCSAAlgorithm.java │ │ ├── MurmurHash.java │ │ └── PCSA.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvpcsa │ └── DVPCSATest.java ├── DVSuperLogLog ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dvsuperloglog │ │ ├── DVSuperLogLog.java │ │ ├── DVSuperLogLogAlgorithm.java │ │ ├── MurmurHash.java │ │ └── SuperLogLog.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dvsuperloglog │ └── DVSuperLogLogTest.java ├── FAIDA ├── FAIDAAlgorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── de │ │ │ │ └── hpi │ │ │ │ └── mpss2015n │ │ │ │ └── approxind │ │ │ │ ├── FAIDA.java │ │ │ │ ├── FAIDACore.java │ │ │ │ ├── InclusionTester.java │ │ │ │ ├── RowSampler.java │ │ │ │ ├── datastructures │ │ │ │ ├── BloomFilter.java │ │ │ │ ├── BottomKSketch.java │ │ │ │ ├── HyperLogLog.java │ │ │ │ ├── LongHeap.java │ │ │ │ ├── RegisterSet.java │ │ │ │ └── SampledInvertedIndex.java │ │ │ │ ├── inclusiontester │ │ │ │ ├── BloomFilterInclusionTester.java │ │ │ │ ├── BottomKSketchTester.java │ │ │ │ ├── CombinedHashSetInclusionTester.java │ │ │ │ ├── CombinedInclusionTester.java │ │ │ │ ├── HLLInclusionTester.java │ │ │ │ └── HashSetInclusionTester.java │ │ │ │ ├── sampler │ │ │ │ ├── IdentityRowSampler.java │ │ │ │ ├── RandomAccessSample.java │ │ │ │ ├── RandomAccessSampler.java │ │ │ │ ├── ReservoirRowSample.java │ │ │ │ ├── ReservoirRowSampler.java │ │ │ │ └── SampleGenerator.java │ │ │ │ └── utils │ │ │ │ ├── AOCacheMap.java │ │ │ │ ├── AbstractColumnStore.java │ │ │ │ ├── Arity.java │ │ │ │ ├── CandidateGenerator.java │ │ │ │ ├── ColumnIterator.java │ │ │ │ ├── DebugCounter.java │ │ │ │ ├── HLL │ │ │ │ └── HLLData.java │ │ │ │ ├── HashedColumnStore.java │ │ │ │ ├── IndConverter.java │ │ │ │ ├── ReservoirSampler.java │ │ │ │ ├── SimpleColumnCombination.java │ │ │ │ ├── SimpleInd.java │ │ │ │ └── VirtualColumnStore.java │ │ └── resources │ │ │ └── simplelogger.properties │ │ └── test │ │ ├── java │ │ └── de │ │ │ └── hpi │ │ │ └── mpss2015n │ │ │ └── approxind │ │ │ ├── FAIDACoreTest.java │ │ │ ├── inclusiontester │ │ │ └── InclusionTesterTest.java │ │ │ ├── mocks │ │ │ ├── RelationalInputBuilder.java │ │ │ ├── RelationalInputGeneratorMock.java │ │ │ └── RelationalInputMock.java │ │ │ ├── sampler │ │ │ └── ReservoirRowSamplerTest.java │ │ │ └── utils │ │ │ ├── CandidateGeneratorTest.java │ │ │ ├── HashedColumnStoreTest.java │ │ │ ├── SimpleColumnCombinationTest.java │ │ │ ├── SimpleIndTest.java │ │ │ └── VirtualColumnStoreTest.java │ │ └── resources │ │ └── simplelogger.properties └── pom.xml ├── HyFD ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── hyfd │ │ │ ├── HyFD.java │ │ │ ├── Inductor.java │ │ │ ├── MemoryGuardian.java │ │ │ ├── Sampler.java │ │ │ ├── Validator.java │ │ │ ├── deprecated │ │ │ └── HyFD.java │ │ │ ├── fdep │ │ │ └── FDEP.java │ │ │ ├── structures │ │ │ ├── ClusterIdentifier.java │ │ │ ├── ClusterIdentifierWithRecord.java │ │ │ ├── ClusterTree.java │ │ │ ├── ClusterTreeElement.java │ │ │ ├── FDList.java │ │ │ ├── FDSet.java │ │ │ ├── FDTree.java │ │ │ ├── FDTreeElement.java │ │ │ ├── FDTreeElementLhsPair.java │ │ │ ├── IntegerPair.java │ │ │ ├── LhsTrie.java │ │ │ ├── LhsTrieElement.java │ │ │ ├── NonFDTree.java │ │ │ ├── NonFDTreeElement.java │ │ │ ├── PLIBuilder.java │ │ │ └── PositionListIndex.java │ │ │ └── utils │ │ │ ├── Logger.java │ │ │ └── ValueComparator.java │ └── resources │ │ ├── abalone.csv │ │ └── bridges.csv │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── hyfd │ ├── FDAlgorithmTest.java │ ├── FDTreeInDepthTest.java │ ├── FDTreeTest.java │ ├── HyFDTest.java │ └── fixtures │ ├── AbaloneFixture.java │ ├── AbstractAlgorithmTestFixture.java │ ├── AlgorithmTestFixture.java │ ├── AlgorithmTestFixture1.java │ ├── AlgorithmTestFixture10.java │ ├── AlgorithmTestFixture11.java │ ├── AlgorithmTestFixture12.java │ ├── AlgorithmTestFixture13.java │ ├── AlgorithmTestFixture14.java │ ├── AlgorithmTestFixture15.java │ ├── AlgorithmTestFixture16.java │ ├── AlgorithmTestFixture17.java │ ├── AlgorithmTestFixture18.java │ ├── AlgorithmTestFixture19.java │ ├── AlgorithmTestFixture2.java │ ├── AlgorithmTestFixture20.java │ ├── AlgorithmTestFixture3.java │ ├── AlgorithmTestFixture4.java │ ├── AlgorithmTestFixture5.java │ ├── AlgorithmTestFixture6.java │ ├── AlgorithmTestFixture7.java │ ├── AlgorithmTestFixture8.java │ ├── AlgorithmTestFixture9.java │ └── BridgesFixture.java ├── HyMD ├── .gitignore ├── config │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── config │ │ ├── IndexBuilderConfiguration.java │ │ ├── MappingConfiguration.java │ │ ├── MinThresholdConfiguration.java │ │ ├── SimilarityComputerConfiguration.java │ │ ├── SimilarityMeasureConfiguration.java │ │ └── ThresholdFilterConfiguration.java ├── core │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ ├── ColumnMapping.java │ │ │ ├── Discoverer.java │ │ │ ├── MatchingDependency.java │ │ │ ├── MatchingDependencyResult.java │ │ │ ├── SupportCalculator.java │ │ │ ├── ThresholdFilter.java │ │ │ ├── ThresholdProvider.java │ │ │ ├── impl │ │ │ ├── ConstantSupportCalculator.java │ │ │ ├── SizeBasedSupportCalculator.java │ │ │ ├── result │ │ │ │ └── FileResultWriter.java │ │ │ └── threshold │ │ │ │ ├── ExactThresholdFilter.java │ │ │ │ ├── LimitSizeThresholdFilter.java │ │ │ │ ├── LimitSizeUtils.java │ │ │ │ ├── MultiThresholdProvider.java │ │ │ │ ├── RelativeLimitSizeThresholdFilter.java │ │ │ │ ├── SingleThresholdProvider.java │ │ │ │ ├── StepThresholdFilter.java │ │ │ │ ├── ThresholdFilterUtils.java │ │ │ │ ├── UniformDistributionThresholdProviderThresholdFilter.java │ │ │ │ └── UniformTrimmer.java │ │ │ ├── result │ │ │ ├── AbstractResultEmitter.java │ │ │ ├── ResultEmitter.java │ │ │ └── ResultListener.java │ │ │ └── util │ │ │ └── enforce │ │ │ ├── EnforceMatch.java │ │ │ ├── EnforcerBuilder.java │ │ │ └── MDEnforcer.java │ │ └── test │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ ├── impl │ │ ├── result │ │ │ └── FileResultWriterTest.java │ │ └── threshold │ │ │ ├── ExactThresholdProviderTest.java │ │ │ ├── MultiThresholdProviderTest.java │ │ │ ├── StepThresholdProviderTest.java │ │ │ └── UniformTrimmerTest.java │ │ └── result │ │ └── AbstractResultEmitterTest.java ├── db │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ └── relational │ │ │ ├── AbstractRow.java │ │ │ ├── Column.java │ │ │ ├── ColumnImpl.java │ │ │ ├── ColumnPair.java │ │ │ ├── HasName.java │ │ │ ├── HasSchema.java │ │ │ ├── InputCloseException.java │ │ │ ├── InputException.java │ │ │ ├── InputOpenException.java │ │ │ ├── Relation.java │ │ │ ├── RelationalInput.java │ │ │ ├── Row.java │ │ │ ├── RowImpl.java │ │ │ ├── Schema.java │ │ │ ├── SchemaImpl.java │ │ │ └── jdbc │ │ │ ├── ResultSetInput.java │ │ │ ├── ResultSetIterator.java │ │ │ ├── ResultSetRelation.java │ │ │ └── ResultSetSchemaFactory.java │ │ └── test │ │ ├── java │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ ├── relational │ │ │ ├── ColumnPairTest.java │ │ │ ├── RelationTest.java │ │ │ ├── RowImplTest.java │ │ │ └── jdbc │ │ │ │ ├── ResultSetInputTest.java │ │ │ │ ├── ResultSetRelationTest.java │ │ │ │ └── ResultSetSchemaFactoryTest.java │ │ │ └── util │ │ │ ├── ConnectionConfigurator.java │ │ │ └── JdbcTest.java │ │ └── resources │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── relational │ │ └── jdbc │ │ ├── dataset.xml │ │ └── schema.sql ├── demo │ ├── config │ │ ├── default.config.json │ │ ├── default_single.config.json │ │ ├── example.config.json │ │ └── fd.config.json │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── de │ │ │ │ └── hpi │ │ │ │ └── is │ │ │ │ └── md │ │ │ │ ├── demo │ │ │ │ ├── ContingencyTable.java │ │ │ │ ├── EnforceDemo.java │ │ │ │ ├── Jackson.java │ │ │ │ ├── MDDemo.java │ │ │ │ ├── MDParser.java │ │ │ │ ├── ResultProcessor.java │ │ │ │ ├── Runner.java │ │ │ │ ├── RunnerConfiguration.java │ │ │ │ └── input │ │ │ │ │ ├── IOProvider.java │ │ │ │ │ ├── IOReceiver.java │ │ │ │ │ └── InputLooper.java │ │ │ │ ├── eval │ │ │ │ ├── EvaluationTask.java │ │ │ │ └── Evaluator.java │ │ │ │ └── jcommander │ │ │ │ ├── Application.java │ │ │ │ ├── JCommanderJdbcConfiguration.java │ │ │ │ └── JCommanderRunner.java │ │ └── resources │ │ │ └── simplelogger.properties │ │ └── test │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── jcommander │ │ └── JCommanderRunnerTest.java ├── hybrid │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ ├── hybrid │ │ │ ├── Analyzer.java │ │ │ ├── ArrayDictionaryRecords.java │ │ │ ├── ColumnConfiguration.java │ │ │ ├── DictionaryRecords.java │ │ │ ├── DiscoveryConfiguration.java │ │ │ ├── HyMDProperties.java │ │ │ ├── HybridDiscoverer.java │ │ │ ├── HybridEnforcerBuilder.java │ │ │ ├── HybridExecutor.java │ │ │ ├── HybridExecutorBuilder.java │ │ │ ├── Lattice.java │ │ │ ├── LatticeHelper.java │ │ │ ├── LevelBundle.java │ │ │ ├── MDMapping.java │ │ │ ├── MDUtil.java │ │ │ ├── PositionListIndex.java │ │ │ ├── Preprocessed.java │ │ │ ├── PreprocessedColumnPair.java │ │ │ ├── PreprocessingColumnConfiguration.java │ │ │ ├── PreprocessingConfiguration.java │ │ │ ├── Preprocessor.java │ │ │ ├── ResultTransformer.java │ │ │ ├── Rhs.java │ │ │ ├── Sampler.java │ │ │ ├── SimilarityIndex.java │ │ │ ├── SimilaritySet.java │ │ │ ├── SupportedMD.java │ │ │ ├── ValidationResult.java │ │ │ ├── Validator.java │ │ │ ├── impl │ │ │ │ ├── ColumnPairWithThreshold.java │ │ │ │ ├── LhsSelector.java │ │ │ │ ├── RecordGrouper.java │ │ │ │ ├── RecordGrouperImpl.java │ │ │ │ ├── Selector.java │ │ │ │ ├── infer │ │ │ │ │ ├── FullLhsSpecializer.java │ │ │ │ │ ├── FullSpecializer.java │ │ │ │ │ ├── LhsModifier.java │ │ │ │ │ ├── LhsRhsDisjointnessFilter.java │ │ │ │ │ ├── LhsSpecializer.java │ │ │ │ │ ├── NonTrivialFilter.java │ │ │ │ │ ├── SpecializationFilter.java │ │ │ │ │ └── ThresholdLowerer.java │ │ │ │ ├── lattice │ │ │ │ │ ├── FullLattice.java │ │ │ │ │ ├── candidate │ │ │ │ │ │ ├── CandidateAddContext.java │ │ │ │ │ │ ├── CandidateContainsContext.java │ │ │ │ │ │ ├── CandidateLattice.java │ │ │ │ │ │ ├── CandidateNode.java │ │ │ │ │ │ ├── CandidateRemoveContext.java │ │ │ │ │ │ ├── CandidateThresholdNode.java │ │ │ │ │ │ ├── LhsContext.java │ │ │ │ │ │ └── ValueHolder.java │ │ │ │ │ ├── lhs │ │ │ │ │ │ ├── LhsAddContext.java │ │ │ │ │ │ ├── LhsContainsContext.java │ │ │ │ │ │ ├── LhsContext.java │ │ │ │ │ │ ├── LhsLattice.java │ │ │ │ │ │ ├── LhsNode.java │ │ │ │ │ │ ├── LhsThresholdNode.java │ │ │ │ │ │ └── ValueHolder.java │ │ │ │ │ └── md │ │ │ │ │ │ ├── AddContext.java │ │ │ │ │ │ ├── AddIfMinimalContext.java │ │ │ │ │ │ ├── Cardinality.java │ │ │ │ │ │ ├── ContainsContext.java │ │ │ │ │ │ ├── Dimensions.java │ │ │ │ │ │ ├── LatticeImpl.java │ │ │ │ │ │ ├── LatticeMDImpl.java │ │ │ │ │ │ ├── LevelFunction.java │ │ │ │ │ │ ├── LevelRetriever.java │ │ │ │ │ │ ├── LhsRhsPair.java │ │ │ │ │ │ ├── MDContext.java │ │ │ │ │ │ ├── MDSiteContext.java │ │ │ │ │ │ ├── MaxContext.java │ │ │ │ │ │ ├── Node.java │ │ │ │ │ │ ├── ThresholdNode.java │ │ │ │ │ │ └── ViolatedMDFinder.java │ │ │ │ ├── level │ │ │ │ │ ├── AnalyzeTask.java │ │ │ │ │ ├── BatchValidator.java │ │ │ │ │ ├── Candidate.java │ │ │ │ │ ├── CandidateProcessor.java │ │ │ │ │ ├── LevelStrategy.java │ │ │ │ │ ├── LevelWiseExecutor.java │ │ │ │ │ ├── LevelWiseExecutorBuilder.java │ │ │ │ │ ├── Statistics.java │ │ │ │ │ ├── ValidationTask.java │ │ │ │ │ ├── ViolationHandler.java │ │ │ │ │ ├── analyze │ │ │ │ │ │ ├── AnalyzeStrategy.java │ │ │ │ │ │ ├── AnalyzerImpl.java │ │ │ │ │ │ ├── InferHandler.java │ │ │ │ │ │ ├── MDSpecializer.java │ │ │ │ │ │ ├── NotSupportedStrategy.java │ │ │ │ │ │ ├── SupportBasedFactory.java │ │ │ │ │ │ └── SupportedStrategy.java │ │ │ │ │ ├── minimal │ │ │ │ │ │ ├── AlreadyMinimalStrategy.java │ │ │ │ │ │ └── CandidateBuilder.java │ │ │ │ │ └── minimizing │ │ │ │ │ │ ├── CandidateBuilder.java │ │ │ │ │ │ ├── IntermediateCandidate.java │ │ │ │ │ │ ├── LatticeMinimizer.java │ │ │ │ │ │ ├── MinimalRhsFilter.java │ │ │ │ │ │ ├── Minimizer.java │ │ │ │ │ │ ├── MinimizingLevelStrategy.java │ │ │ │ │ │ └── SimpleMinimizer.java │ │ │ │ ├── md │ │ │ │ │ ├── MDElementImpl.java │ │ │ │ │ ├── MDImpl.java │ │ │ │ │ └── MDSiteImpl.java │ │ │ │ ├── preprocessed │ │ │ │ │ ├── ArrayPositionListIndex.java │ │ │ │ │ ├── ColumnCompressor.java │ │ │ │ │ ├── CompressedColumn.java │ │ │ │ │ ├── CompressedRelation.java │ │ │ │ │ ├── Compressor.java │ │ │ │ │ ├── CompressorBuilder.java │ │ │ │ │ ├── MapDictionaryRecords.java │ │ │ │ │ ├── MapPositionListIndex.java │ │ │ │ │ ├── PreprocessedColumnPairBuilder.java │ │ │ │ │ ├── PreprocessedColumnPairImpl.java │ │ │ │ │ └── PreprocessorImpl.java │ │ │ │ ├── sampling │ │ │ │ │ ├── Inferrer.java │ │ │ │ │ ├── MDSpecializer.java │ │ │ │ │ ├── SamplerImpl.java │ │ │ │ │ ├── SamplingExecutor.java │ │ │ │ │ ├── SamplingExecutorBuilder.java │ │ │ │ │ ├── SimilaritySetProcessor.java │ │ │ │ │ └── Statistics.java │ │ │ │ ├── sim │ │ │ │ │ ├── AbstractSimilarityIndexBuilder.java │ │ │ │ │ ├── PreprocessedSimilarity.java │ │ │ │ │ ├── SimilarityArrayRowBuilder.java │ │ │ │ │ ├── SimilarityArrayTableFactory.java │ │ │ │ │ ├── SimilarityHashTableFactory.java │ │ │ │ │ ├── SimilarityMapRowBuilder.java │ │ │ │ │ ├── SimilarityPreprocessor.java │ │ │ │ │ ├── SimilarityReceiver.java │ │ │ │ │ ├── SimilarityRowBuilder.java │ │ │ │ │ ├── SimilarityTableBuilder.java │ │ │ │ │ ├── SimilarityTableBuilderImpl.java │ │ │ │ │ ├── slim │ │ │ │ │ │ ├── SlimSimilarityIndex.java │ │ │ │ │ │ ├── SlimSimilarityIndexBuilder.java │ │ │ │ │ │ └── SlimSimilarityReceiver.java │ │ │ │ │ └── threshold │ │ │ │ │ │ ├── CollectingSimilarityIndexBuilder.java │ │ │ │ │ │ ├── CollectingSimilarityReceiver.java │ │ │ │ │ │ ├── CollectingThresholdMap.java │ │ │ │ │ │ ├── FastSimilarityIndexBuilder.java │ │ │ │ │ │ ├── FastSimilarityReceiver.java │ │ │ │ │ │ ├── FlatThresholdMap.java │ │ │ │ │ │ ├── ThresholdMap.java │ │ │ │ │ │ ├── ThresholdMapArrayFlattener.java │ │ │ │ │ │ ├── ThresholdMapBuilder.java │ │ │ │ │ │ ├── ThresholdMapFlattener.java │ │ │ │ │ │ ├── ThresholdMapHashFlattener.java │ │ │ │ │ │ ├── ThresholdSimilarityIndex.java │ │ │ │ │ │ └── ThresholdSimilarityReceiver.java │ │ │ │ └── validation │ │ │ │ │ ├── AbstractRhsValidationTask.java │ │ │ │ │ ├── Classifier.java │ │ │ │ │ ├── GroupingRhsValidationTask.java │ │ │ │ │ ├── GroupingRhsValidationTaskFactory.java │ │ │ │ │ ├── RhsValidationTask.java │ │ │ │ │ ├── RhsValidationTaskImpl.java │ │ │ │ │ ├── RhsValidationTaskImplFactory.java │ │ │ │ │ ├── TrivialRhsValidationTask.java │ │ │ │ │ ├── TrivialRhsValidationTaskFactory.java │ │ │ │ │ ├── ValidationTask.java │ │ │ │ │ ├── ValidatorBuilder.java │ │ │ │ │ ├── ValidatorImpl.java │ │ │ │ │ ├── arbitrary │ │ │ │ │ ├── ArbitraryValidationTask.java │ │ │ │ │ ├── ArbitraryValidationTaskFactory.java │ │ │ │ │ ├── ArbitraryValidationTaskFactoryBuilder.java │ │ │ │ │ └── LhsValidationTaskFactory.java │ │ │ │ │ ├── empty │ │ │ │ │ ├── EmptyValidationTask.java │ │ │ │ │ ├── EmptyValidationTaskFactory.java │ │ │ │ │ └── RhsValidationTask.java │ │ │ │ │ └── single │ │ │ │ │ ├── SingleValidationTask.java │ │ │ │ │ ├── SingleValidationTaskFactory.java │ │ │ │ │ └── SingleValidationTaskFactoryBuilder.java │ │ │ └── md │ │ │ │ ├── MD.java │ │ │ │ ├── MDElement.java │ │ │ │ ├── MDSite.java │ │ │ │ └── MDSiteIterator.java │ │ │ └── util │ │ │ └── enforce │ │ │ ├── ActualEnforcer.java │ │ │ ├── ArbitraryActualEnforcer.java │ │ │ ├── CompressedEnforceMatch.java │ │ │ ├── EmptyActualEnforcer.java │ │ │ ├── EnforcerFactory.java │ │ │ ├── HybridMDEnforcer.java │ │ │ ├── RecordInflater.java │ │ │ ├── RecordSelector.java │ │ │ └── SingleActualEnforcer.java │ │ └── test │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── hybrid │ │ ├── DictionaryRecordsTest.java │ │ ├── LatticeHelperTest.java │ │ ├── LatticeTest.java │ │ ├── MDTest.java │ │ ├── PositionListIndexTest.java │ │ ├── ResultTransformerTest.java │ │ ├── SimilaritySetTest.java │ │ ├── ValidatorTest.java │ │ └── impl │ │ ├── infer │ │ └── ThresholdLowererTest.java │ │ ├── lattice │ │ ├── FullLatticeTest.java │ │ ├── lhs │ │ │ └── LhsLatticeTest.java │ │ └── md │ │ │ └── LatticeImplTest.java │ │ ├── level │ │ ├── BatchValidatorTest.java │ │ ├── CandidateProcessorTest.java │ │ ├── StatisticsTest.java │ │ ├── ValidationTaskTest.java │ │ ├── ViolationHandlerTest.java │ │ ├── analyze │ │ │ ├── AnalyzerTest.java │ │ │ ├── MDSpecializerTest.java │ │ │ └── SupportBasedFactoryTest.java │ │ └── minimizing │ │ │ ├── CandidateBuilderTest.java │ │ │ ├── CandidateTest.java │ │ │ └── MinimalRhsFilterTest.java │ │ ├── md │ │ └── MDImplTest.java │ │ ├── preprocessed │ │ ├── ArrayDictionaryRecordsTest.java │ │ ├── ArrayPositionListIndexTest.java │ │ ├── CompressorTest.java │ │ ├── MapDictionaryRecordsTest.java │ │ ├── MapPositionListIndexTest.java │ │ └── PreprocessedColumnPairImplTest.java │ │ ├── sampling │ │ ├── InferrerTest.java │ │ ├── MDSpecializerTest.java │ │ ├── SamplerImplTest.java │ │ ├── SimilaritySetProcessorTest.java │ │ └── StatisticsTest.java │ │ ├── sim │ │ ├── SimilarityIndexTest.java │ │ ├── SimilarityTableBuilderTest.java │ │ ├── slim │ │ │ └── SlimSimilarityIndexTest.java │ │ └── threshold │ │ │ ├── CollectingThresholdMapTest.java │ │ │ ├── ThresholdMapArrayFlattenerTest.java │ │ │ ├── ThresholdMapFlattenerTest.java │ │ │ ├── ThresholdMapHashFlattenerTest.java │ │ │ └── ThresholdSimilarityIndexTest.java │ │ └── validation │ │ ├── ClassifierTest.java │ │ ├── GroupingRhsValidationTaskTest.java │ │ ├── RhsValidationTaskImplTest.java │ │ ├── RhsValidationTaskTest.java │ │ └── ValidatorImplTest.java ├── mapping │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ └── mapping │ │ │ ├── SchemaMapper.java │ │ │ ├── SchemaMapperHelper.java │ │ │ └── impl │ │ │ ├── ColumnMappingsConverter.java │ │ │ ├── FixedSchemaMapper.java │ │ │ ├── SelfSchemaMapper.java │ │ │ └── TypeSchemaMapper.java │ │ └── test │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── mapping │ │ └── impl │ │ ├── FixedSchemaMapperTest.java │ │ ├── SelfSchemaMapperTest.java │ │ └── TypeSchemaMapperTest.java ├── metanome │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── hymd │ │ │ ├── HyMD.java │ │ │ ├── Jackson.java │ │ │ ├── MetanomeRelation.java │ │ │ ├── MetanomeRelationalInput.java │ │ │ ├── MetanomeRelationalInputIterator.java │ │ │ ├── MetanomeResultListener.java │ │ │ └── MetanomeSchema.java │ │ └── resources │ │ └── simplelogger.properties ├── pom.xml ├── sim │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── hpi │ │ │ └── is │ │ │ └── md │ │ │ └── sim │ │ │ ├── CachedSimilarityMeasure.java │ │ │ ├── DefaultSimilarityClassifier.java │ │ │ ├── DistanceMetric.java │ │ │ ├── PairGenerator.java │ │ │ ├── Similarity.java │ │ │ ├── SimilarityClassifier.java │ │ │ ├── SimilarityComputer.java │ │ │ ├── SimilarityMeasure.java │ │ │ └── impl │ │ │ ├── CrossPairGenerator.java │ │ │ ├── DateSimilarity.java │ │ │ ├── DistanceMetricSimilarity.java │ │ │ ├── EqualPairGenerator.java │ │ │ ├── EqualsSimilarityMeasure.java │ │ │ ├── LevenshteinDistanceMetric.java │ │ │ ├── LevenshteinSimilarity.java │ │ │ ├── NullIsNotNullSemantics.java │ │ │ ├── NullIsNullSemantics.java │ │ │ ├── SimilarityCalculator.java │ │ │ ├── SimilarityComputerImpl.java │ │ │ ├── SortedNeighborhoodPairGenerator.java │ │ │ ├── SqlDateSimilarity.java │ │ │ └── StringMetricSimilarityMeasure.java │ │ └── test │ │ └── java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── sim │ │ ├── CachedSimilarityMeasureTest.java │ │ ├── DefaultSimilarityClassifierTest.java │ │ ├── SimilarityComputerTest.java │ │ ├── SimilarityMeasureTest.java │ │ └── impl │ │ ├── CrossPairGeneratorTest.java │ │ ├── EqualsSimilarityMeasureTest.java │ │ ├── LevenshteinSimilarityTest.java │ │ ├── SimilarityComputerImplTest.java │ │ └── SortedNeighborhoodPairGeneratorTest.java └── util │ ├── pom.xml │ └── src │ ├── main │ └── java │ │ ├── com │ │ └── bakdata │ │ │ └── util │ │ │ └── jackson │ │ │ ├── CPSBase.java │ │ │ ├── CPSType.java │ │ │ ├── CPSTypeIdResolver.java │ │ │ └── CPSTypes.java │ │ └── de │ │ └── hpi │ │ └── is │ │ └── md │ │ └── util │ │ ├── AbstractDictionary.java │ │ ├── AbstractInt2Double2ObjectSortedTable.java │ │ ├── AbstractPollCollection.java │ │ ├── BetterConsumer.java │ │ ├── BetterFunction.java │ │ ├── BetterMap.java │ │ ├── BetterMapDecorator.java │ │ ├── BetterSupplier.java │ │ ├── BigDecimalUtils.java │ │ ├── CacheableSupplier.java │ │ ├── CastUtils.java │ │ ├── CollectionUtils.java │ │ ├── DefaultDictionary.java │ │ ├── Dictionary.java │ │ ├── DictionaryInverter.java │ │ ├── Differ.java │ │ ├── DiskCache.java │ │ ├── DoubleObjectBiConsumer.java │ │ ├── FileUtils.java │ │ ├── Hashable.java │ │ ├── Hasher.java │ │ ├── HeadAndTailIntersector.java │ │ ├── Int2Double2ObjectSortedArrayTable.java │ │ ├── Int2Double2ObjectSortedMapTable.java │ │ ├── Int2Double2ObjectSortedTable.java │ │ ├── Int2DoubleArrayRow.java │ │ ├── Int2DoubleMapRow.java │ │ ├── Int2Int2DoubleArrayTable.java │ │ ├── Int2Int2DoubleHashTable.java │ │ ├── Int2Int2DoubleTable.java │ │ ├── Int2ObjectHashMultimap.java │ │ ├── Int2ObjectMultimap.java │ │ ├── IntArrayPair.java │ │ ├── IntObjectBiConsumer.java │ │ ├── IteratorUtils.java │ │ ├── JdbcUtils.java │ │ ├── LazyArray.java │ │ ├── LazyMap.java │ │ ├── LazyMapImpl.java │ │ ├── MathUtils.java │ │ ├── MetricsUtils.java │ │ ├── NullComparator.java │ │ ├── ObjectUtils.java │ │ ├── OptionalDouble.java │ │ ├── Optionals.java │ │ ├── PollCollection.java │ │ ├── PollSet.java │ │ ├── ReflectionUtils.java │ │ ├── Reporter.java │ │ ├── StoredObjectService.java │ │ ├── StreamUtils.java │ │ ├── StringUtils.java │ │ ├── Trimmer.java │ │ ├── TupleUtils.java │ │ ├── UnorderedPair.java │ │ ├── ValueWrapper.java │ │ └── jackson │ │ ├── Converters.java │ │ ├── Entry.java │ │ ├── EnumNameDeserializer.java │ │ └── SingletonDeserializer.java │ └── test │ └── java │ └── de │ └── hpi │ └── is │ └── md │ └── util │ ├── BetterConsumerTest.java │ ├── BetterFunctionTest.java │ ├── BigDecimalUtilsTest.java │ ├── CastUtilsTest.java │ ├── CollectionUtilsTest.java │ ├── DefaultDictionaryTest.java │ ├── DictionaryTest.java │ ├── DifferTest.java │ ├── DiskCacheTest.java │ ├── DoubleObjectBiConsumerTest.java │ ├── FileUtilsTest.java │ ├── HashableTest.java │ ├── HasherTest.java │ ├── Int2Double2ObjectSortedArrayTableTest.java │ ├── Int2Double2ObjectSortedMapTableTest.java │ ├── Int2Double2ObjectSortedTableTest.java │ ├── Int2DoubleArrayRowTest.java │ ├── Int2DoubleMapRowTest.java │ ├── Int2DoubleRowTest.java │ ├── Int2Int2DoubleArrayTableTest.java │ ├── Int2Int2DoubleHashTableTest.java │ ├── Int2Int2DoubleTableTest.java │ ├── Int2ObjectHashMultimapTest.java │ ├── Int2ObjectMultimapTest.java │ ├── IntegerObjectBiConsumerTest.java │ ├── IteratorUtilsTest.java │ ├── JdbcUtilsTest.java │ ├── LazyArrayTest.java │ ├── LazyMapImplTest.java │ ├── LazyMapTest.java │ ├── MathUtilsTest.java │ ├── MetricsUtilsTest.java │ ├── NullComparatorTest.java │ ├── ObjectUtilsTest.java │ ├── OptionalDoubleTest.java │ ├── OptionalsTest.java │ ├── PollSetTest.java │ ├── ReflectionUtilsTest.java │ ├── StoredObjectServiceTest.java │ ├── StringUtilsTest.java │ └── UnorderedPairTest.java ├── HyUCC ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── hyucc │ │ ├── HyUCC.java │ │ ├── Inductor.java │ │ ├── MemoryGuardian.java │ │ ├── Sampler.java │ │ ├── Validator.java │ │ ├── structures │ │ ├── ClusterIdentifier.java │ │ ├── ClusterIdentifierWithRecord.java │ │ ├── ClusterTree.java │ │ ├── ClusterTreeElement.java │ │ ├── IntegerPair.java │ │ ├── PLIBuilder.java │ │ ├── PositionListIndex.java │ │ ├── UCCList.java │ │ ├── UCCSet.java │ │ ├── UCCTree.java │ │ ├── UCCTreeElement.java │ │ └── UCCTreeElementUCCPair.java │ │ └── utils │ │ ├── Logger.java │ │ └── ValueComparator.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── hyucc │ └── HyUCCTest.java ├── LICENSE ├── MANY ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── many │ │ ├── INDDetectionWorker.java │ │ ├── LRUCache.java │ │ ├── MANY.java │ │ ├── bitvectors │ │ ├── BitVector.java │ │ ├── BitVectorFactory.java │ │ ├── LongArrayBitVector.java │ │ ├── LongArrayHierarchicalBitVector.java │ │ └── SynchronizedBitVector.java │ │ ├── bloom_filtering │ │ ├── BloomFilter.java │ │ └── HashFactory.java │ │ ├── driver │ │ ├── AnelosimusDriver.java │ │ ├── AnelosimusParameters.java │ │ └── SynchronizedDiscInclusionDependencyResultReceiver.java │ │ ├── filter │ │ └── ColumnFilter.java │ │ ├── helper │ │ └── PrintHelper.java │ │ └── io │ │ ├── FileInputIterator.java │ │ └── InputIterator.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── many │ └── test │ ├── BitVectorTest.java │ ├── BloomFilterTest.java │ ├── HierarchicalBitVectorTest.java │ ├── MANYIT.java │ ├── MANYTest.java │ ├── RelationalInputGeneratorMock.java │ └── RelationalInputMock.java ├── MvdDet ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── mvddet │ │ ├── ColumnBasedMvdFinder.java │ │ ├── MvD.java │ │ ├── MvDAlgorithmConfig.java │ │ ├── MvDDetector.java │ │ ├── MvDDetectorAlgorithm.java │ │ ├── MvdFinder.java │ │ ├── PositionListIndex.java │ │ ├── Relation.java │ │ ├── StringTuple.java │ │ └── Tuple.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── mvddet │ └── MvDDetectorTest.java ├── Normalize ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── normalize │ │ ├── Main.java │ │ ├── Normi.java │ │ ├── aspects │ │ ├── NormiConversion.java │ │ └── NormiPersistence.java │ │ ├── config │ │ └── Config.java │ │ ├── fddiscovery │ │ ├── FdDiscoverer.java │ │ └── HyFDFdDiscoverer.java │ │ ├── fdextension │ │ ├── FdExtender.java │ │ ├── NaiveFdExtender.java │ │ ├── PullingFdExtender.java │ │ └── PushingFdExtender.java │ │ ├── structures │ │ ├── FunctionalDependency.java │ │ ├── LhsNode.java │ │ ├── LhsTree.java │ │ └── Schema.java │ │ └── utils │ │ └── Utils.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── normi │ └── FunctionalDependencyTest.java ├── ORDER ├── pom.xml └── src │ ├── main │ ├── java │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── order │ │ │ ├── ORDER.java │ │ │ ├── ORDERLhsRhs.java │ │ │ ├── check │ │ │ ├── DependencyChecker.java │ │ │ └── DependencyResult.java │ │ │ ├── measurements │ │ │ └── Statistics.java │ │ │ ├── sorting │ │ │ └── partitions │ │ │ │ ├── RowIndexedDateValue.java │ │ │ │ ├── RowIndexedDoubleValue.java │ │ │ │ ├── RowIndexedLongValue.java │ │ │ │ ├── RowIndexedStringValue.java │ │ │ │ ├── RowIndexedValue.java │ │ │ │ ├── SortedPartition.java │ │ │ │ └── SortedPartitionCreator.java │ │ │ └── types │ │ │ ├── ByteArray.java │ │ │ ├── ByteArrayPermutations.java │ │ │ ├── Datatype.java │ │ │ ├── DatatypeDate.java │ │ │ ├── DatatypeDouble.java │ │ │ ├── DatatypeLong.java │ │ │ ├── DatatypeString.java │ │ │ ├── Permutation.java │ │ │ └── TypeInferrer.java │ └── resources │ │ └── simplelogger.properties │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── order │ ├── check │ └── DependencyCheckerTest.java │ ├── measurements │ └── StatisticsTest.java │ └── sorting │ └── partitions │ ├── DataLoaderTestHelper.java │ ├── SortedPartitionCreatorTest.java │ ├── SortedPartitionTest.java │ ├── TestCsvFileFixture.java │ ├── TestFileFixture.java │ ├── TestFileInputGenerator.java │ └── TestFileIterator.java ├── README.md ├── SCDP ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── singlecolumnprofiler │ │ ├── ColumnMainProfile.java │ │ ├── DataTypes.java │ │ ├── SingleColumnProfiler.java │ │ ├── SingleColumnProfilerAlgorithm.java │ │ └── Util.java │ └── test │ ├── java │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── singlecolumnprofiler │ │ └── SCDPTest.java │ └── resources │ └── small.csv ├── SPIDER ├── SPIDERAlgorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── spider │ │ │ ├── core │ │ │ └── SPIDER.java │ │ │ ├── sorting │ │ │ ├── TPMMS.java │ │ │ └── TPMMSTuple.java │ │ │ └── structures │ │ │ └── Attribute.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── spider │ │ └── SPIDERTest.java ├── SPIDERDatabase │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── spider │ │ └── SPIDERDatabase.java ├── SPIDERFile │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── spider │ │ └── SPIDERFile.java └── pom.xml ├── cody ├── README.md ├── cody-core │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ ├── ch │ │ └── javasoft │ │ │ └── bitset │ │ │ ├── BitSetFactory.java │ │ │ ├── IBitSet.java │ │ │ ├── LongBitSet.java │ │ │ └── search │ │ │ ├── SubSetSearch.java │ │ │ ├── SuperSetSearch.java │ │ │ ├── TreeSearch.java │ │ │ └── tree │ │ │ ├── InterNode.java │ │ │ ├── LeafNode.java │ │ │ └── Node.java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── cody │ │ └── codycore │ │ ├── App.java │ │ ├── Configuration.java │ │ ├── Preprocessor.java │ │ ├── Validator.java │ │ ├── candidate │ │ ├── CheckedColumnCombination.java │ │ ├── ColumnCombination.java │ │ └── ColumnCombinationUtils.java │ │ ├── pruning │ │ ├── CliquePruner.java │ │ ├── ComponentPruner.java │ │ └── PrunerFactory.java │ │ └── runner │ │ ├── ApproximateRunner.java │ │ ├── BaseRunner.java │ │ └── ExactRunner.java ├── cody-metanome │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── cody │ │ └── codymetanome │ │ └── CodyMetanome.java └── pom.xml ├── collect.bat ├── collect.sh ├── dao ├── pom.xml └── src │ └── main │ └── java │ └── de │ └── uni_potsdam │ └── hpi │ └── dao │ ├── DB2DataAccessObject.java │ ├── DataAccessObject.java │ ├── MySQLDataAccessObject.java │ └── PostgreSQLDataAccessObject.java ├── dcfinder ├── .gitignore ├── pom.xml └── src │ ├── main │ └── java │ │ ├── ch │ │ └── javasoft │ │ │ └── bitset │ │ │ ├── BitSetFactory.java │ │ │ ├── IBitSet.java │ │ │ ├── LongBitSet.java │ │ │ └── search │ │ │ ├── ISubsetBackend.java │ │ │ ├── ITreeSearch.java │ │ │ ├── NTreeSearch.java │ │ │ ├── SubSetSearch.java │ │ │ ├── SuperSetSearch.java │ │ │ ├── TreeSearch.java │ │ │ └── tree │ │ │ ├── InterNode.java │ │ │ ├── LeafNode.java │ │ │ └── Node.java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dcfinder │ │ ├── DCFinder.java │ │ ├── DCFinderMetanome.java │ │ ├── denialconstraints │ │ ├── DenialConstraint.java │ │ └── DenialConstraintSet.java │ │ ├── evidenceset │ │ ├── AtomicLongEvidenceSet.java │ │ ├── HashEvidenceSet.java │ │ ├── IEvidenceSet.java │ │ ├── TroveEvidenceSet.java │ │ └── builders │ │ │ ├── BufferedEvidenceSetBuilder.java │ │ │ └── SplitReconstructEvidenceSetBuilder.java │ │ ├── helpers │ │ ├── ArrayIndexComparator.java │ │ ├── BitSetTranslator.java │ │ ├── IndexProvider.java │ │ ├── LongArrayIndexComparator.java │ │ └── ParserHelper.java │ │ ├── input │ │ ├── Column.java │ │ ├── ColumnPair.java │ │ ├── Input.java │ │ ├── ParsedColumn.java │ │ └── partitions │ │ │ └── clusters │ │ │ ├── PLI.java │ │ │ ├── TupleIDProvider.java │ │ │ └── indexers │ │ │ ├── CategoricalTpIDsIndexer.java │ │ │ ├── ITPIDsIndexer.java │ │ │ └── NumericalTpIDsIndexer.java │ │ ├── predicates │ │ ├── PartitionRefiner.java │ │ ├── Predicate.java │ │ ├── PredicateBuilder.java │ │ ├── PredicatePair.java │ │ ├── PredicateProvider.java │ │ ├── operands │ │ │ └── ColumnOperand.java │ │ └── sets │ │ │ ├── Closure.java │ │ │ ├── PredicateSet.java │ │ │ └── PredicateSetFactory.java │ │ └── setcover │ │ ├── IMinimalCoverSearch.java │ │ └── partial │ │ ├── MinimalCoverCandidate.java │ │ └── MinimalCoverSearch.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── dcfinder │ └── dcfinder │ └── AppTest.java ├── dcucc ├── README.md ├── dcucc │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── de │ │ │ │ └── metanome │ │ │ │ └── algorithms │ │ │ │ └── dcucc │ │ │ │ ├── AndConditionTraverser.java │ │ │ │ ├── AndOrConditionTraverser.java │ │ │ │ ├── Condition.java │ │ │ │ ├── ConditionEntry.java │ │ │ │ ├── ConditionLatticeTraverser.java │ │ │ │ ├── ConditionalPositionListIndex.java │ │ │ │ ├── Dcucc.java │ │ │ │ ├── NotAndConditionTraverser.java │ │ │ │ ├── OrConditionTraverser.java │ │ │ │ ├── ResultSingleton.java │ │ │ │ ├── SelfConditionFinder.java │ │ │ │ ├── SimpleConditionTraverser.java │ │ │ │ └── SingleCondition.java │ │ └── resources │ │ │ ├── abalone.csv │ │ │ ├── breast-cancer-wisconsin.csv │ │ │ ├── bridges.csv │ │ │ └── hepatitis.csv │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dcucc │ │ ├── AndConditionTraverserTest.java │ │ ├── ConditionalPositionListIndexFixture.java │ │ ├── ConditionalPositionListIndexTest.java │ │ ├── DcuccAndOrTest.java │ │ ├── DcuccAndTest.java │ │ ├── DcuccOrTest.java │ │ ├── DcuccSimpleTest.java │ │ ├── OrConditionFixture.java │ │ └── PartialUniqueTest.java ├── dcucc_test_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── test_helper │ │ └── fixtures │ │ ├── AbaloneFixture.java │ │ ├── AlgorithmTestFixture.java │ │ ├── BreastCancerFixture.java │ │ ├── BridgesFixture.java │ │ ├── ConditionalUniqueAndOrFixture.java │ │ ├── ConditionalUniqueNotFixture.java │ │ └── HepatitisFixture.java └── pom.xml ├── depminer ├── depminer_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── depminer │ │ └── depminer_algorithm │ │ ├── AlgorithmGroup2DepMiner.java │ │ ├── AlgorithmGroup2DepMiner10Kerne.java │ │ ├── AlgorithmGroup2DepMiner12Kerne.java │ │ ├── AlgorithmGroup2DepMiner14Kerne.java │ │ ├── AlgorithmGroup2DepMiner16Kerne.java │ │ ├── AlgorithmGroup2DepMiner18Kerne.java │ │ ├── AlgorithmGroup2DepMiner2Kerne.java │ │ ├── AlgorithmGroup2DepMiner4Kerne.java │ │ ├── AlgorithmGroup2DepMiner6Kerne.java │ │ ├── AlgorithmGroup2DepMiner8Kerne.java │ │ ├── AlgorithmGroup2DepMinerAuto.java │ │ ├── DepMiner.java │ │ ├── TestRunner │ │ └── RunnerDepMiner.java │ │ └── modules │ │ ├── CMAX_SET_Generator.java │ │ ├── FunctionalDependencyGenerator.java │ │ └── LeftHandSideGenerator.java ├── depminer_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── depminer │ │ └── depminer_helper │ │ ├── AlgorithmMetaGroup2.java │ │ ├── modules │ │ ├── AgreeSetGenerator.java │ │ ├── Algorithm_Group2_Modul.java │ │ ├── StrippedPartitionGenerator.java │ │ ├── TupleEquivalenceClassRelation.java │ │ └── container │ │ │ ├── AgreeSet.java │ │ │ ├── CMAX_SET.java │ │ │ ├── FunctionalDependencyGroup2.java │ │ │ ├── MAX_SET.java │ │ │ ├── StorageSet.java │ │ │ └── StrippedPartition.java │ │ ├── testRunner │ │ ├── AlgorithExecuteThread.java │ │ ├── CSVTestCase.java │ │ └── MemorySniffingThread.java │ │ └── util │ │ └── BitSetUtil.java └── pom.xml ├── dfd ├── dfdAlgorithm │ ├── pom.xml │ └── src │ │ └── fdiscovery │ │ ├── approach │ │ ├── ColumnOrder.java │ │ ├── equivalence │ │ │ ├── EquivalenceManagedComposedPartition.java │ │ │ ├── EquivalenceManagedFileBasedPartition.java │ │ │ ├── EquivalenceManagedFileBasedPartitions.java │ │ │ ├── EquivalenceManagedJoinedPartitions.java │ │ │ ├── EquivalenceManagedPartition.java │ │ │ └── PartitionEquivalences.java │ │ └── runner │ │ │ └── DFDMiner.java │ │ ├── columns │ │ ├── AgreeSet.java │ │ ├── AgreeSets.java │ │ ├── ColumnCollection.java │ │ ├── DifferenceSet.java │ │ ├── DifferenceSets.java │ │ ├── Path.java │ │ ├── Seed.java │ │ └── Seeds.java │ │ ├── equivalence │ │ ├── Equivalence.java │ │ ├── EquivalenceGroupHashSet.java │ │ ├── EquivalenceGroupTIntHashSet.java │ │ ├── EquivalenceGroupTreeSet.java │ │ └── TEquivalence.java │ │ ├── fastfds │ │ ├── CoverOrder.java │ │ ├── EquivalenceClass.java │ │ ├── EquivalenceClasses.java │ │ ├── MaximalEquivalenceClasses.java │ │ ├── PartialOrder.java │ │ └── runner │ │ │ └── FastFDs.java │ │ ├── general │ │ ├── Benchmarker.java │ │ ├── CLIParserBenchmarker.java │ │ ├── CLIParserMiner.java │ │ ├── CollectionSet.java │ │ ├── ColumnFile.java │ │ ├── ColumnFiles.java │ │ ├── FunctionalDependencies.java │ │ └── Miner.java │ │ ├── partitions │ │ ├── ComposedPartition.java │ │ ├── FileBasedPartition.java │ │ ├── FileBasedPartitions.java │ │ ├── JoinedPartitions.java │ │ ├── MemoryManagedJoinedPartitions.java │ │ ├── Partition.java │ │ ├── PartitionStatistics.java │ │ ├── ProbeTable.java │ │ ├── StrippedPartition.java │ │ └── StrippedPartitions.java │ │ ├── preprocessing │ │ ├── InputFileProcessor.java │ │ └── SVFileProcessor.java │ │ ├── pruning │ │ ├── Dependencies.java │ │ ├── Holes.java │ │ ├── NonDependencies.java │ │ ├── Observation.java │ │ ├── Observations.java │ │ ├── PruneHashSet.java │ │ ├── PruneInterface.java │ │ ├── PruneTable.java │ │ └── Seed.java │ │ └── tane │ │ ├── AprioriGeneration.java │ │ └── runner │ │ └── Tane.java ├── dfdMetanome │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── dfd │ │ └── DFDMetanome.java └── pom.xml ├── ducc ├── .gitignore ├── ducc_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── ducc │ │ │ ├── DuccAlgorithm.java │ │ │ └── UccGraphTraverser.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── ducc │ │ ├── DuccAlgorithmTest.java │ │ ├── DuccTestFixtureWithHoles.java │ │ ├── UccGraphTraverserFixture.java │ │ └── UccGraphTraverserTest.java ├── ducc_algorithm_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── ma2013n2 │ │ │ └── algorithm_helper │ │ │ └── data_structures │ │ │ ├── GraphTraverser.java │ │ │ ├── HoleFinder.java │ │ │ └── PruningGraph.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── ma2013n2 │ │ └── algorithm_helper │ │ └── data_structures │ │ ├── HoleFinderTest.java │ │ ├── PruningGraphFixture.java │ │ └── PruningGraphTest.java ├── ducc_for_metanome │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── ducc │ │ │ └── Ducc.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── ducc │ │ └── DuccTest.java ├── ducc_test_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── ducc │ │ │ └── test_helper │ │ │ └── fixtures │ │ │ ├── AbaloneFixture.java │ │ │ ├── AbaloneFixtureFixedUCC.java │ │ │ ├── AlgorithmTestFixture.java │ │ │ ├── BridgesFixture.java │ │ │ ├── FDmineFixture.java │ │ │ ├── FDminimizerShadowedFDFixture.java │ │ │ ├── FunFastCountFixture.java │ │ │ └── ShadowedSuperSetFixture.java │ │ └── resources │ │ ├── abalone.csv │ │ └── bridges.csv └── pom.xml ├── fastfds ├── fastfds_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── fastfds │ │ ├── AlgorithmGroup2FastFD.java │ │ ├── AlgorithmGroup2FastFD10Kerne.java │ │ ├── AlgorithmGroup2FastFD12Kerne.java │ │ ├── AlgorithmGroup2FastFD14Kerne.java │ │ ├── AlgorithmGroup2FastFD16Kerne.java │ │ ├── AlgorithmGroup2FastFD18Kerne.java │ │ ├── AlgorithmGroup2FastFD2Kerne.java │ │ ├── AlgorithmGroup2FastFD4Kerne.java │ │ ├── AlgorithmGroup2FastFD6Kerne.java │ │ ├── AlgorithmGroup2FastFD8Kerne.java │ │ ├── AlgorithmGroup2FastFDAuto.java │ │ ├── FastFD.java │ │ ├── TestRunner │ │ └── RunnerFastFD.java │ │ └── modules │ │ ├── DifferenceSetGenerator.java │ │ ├── FindCoversGenerator.java │ │ └── container │ │ └── DifferenceSet.java ├── fastfds_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── fastfds │ │ └── fastfds_helper │ │ ├── AlgorithmMetaGroup2.java │ │ ├── modules │ │ ├── AgreeSetGenerator.java │ │ ├── Algorithm_Group2_Modul.java │ │ ├── StrippedPartitionGenerator.java │ │ ├── TupleEquivalenceClassRelation.java │ │ └── container │ │ │ ├── AgreeSet.java │ │ │ ├── CMAX_SET.java │ │ │ ├── FunctionalDependencyGroup2.java │ │ │ ├── MAX_SET.java │ │ │ ├── StorageSet.java │ │ │ └── StrippedPartition.java │ │ ├── testRunner │ │ ├── AlgorithExecuteThread.java │ │ ├── CSVTestCase.java │ │ └── MemorySniffingThread.java │ │ └── util │ │ └── BitSetUtil.java └── pom.xml ├── fdep ├── fdep_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── fdep │ │ │ └── FdepAlgorithm.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── fdep │ │ └── FdepAlgorithmTest.java ├── fdep_algorithm_improved │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── fdep │ │ │ └── FdepAlgorithmHashValues.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── fdep │ │ └── FdepAlgorithmHashValuesTest.java └── pom.xml ├── fdmine ├── fdmine_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── uni_potsdam │ │ │ └── hpi │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── fdmine │ │ │ └── FdMine.java │ │ └── test │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── algorithms │ │ └── fdmine │ │ └── FdMineTest.java ├── fdmine_test_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── algorithms │ │ └── test_helper │ │ └── fixtures │ │ ├── AbaloneFixture.java │ │ ├── AbaloneFixtureFixedUCC.java │ │ ├── AlgorithmTestFixture.java │ │ ├── BridgesFixture.java │ │ ├── FDmineFixture.java │ │ ├── FDminimizerShadowedFDFixture.java │ │ ├── FunFastCountFixture.java │ │ └── ShadowedSuperSetFixture.java └── pom.xml ├── fun ├── .gitignore ├── fun_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── uni_potsdam │ │ │ └── hpi │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── fun │ │ │ ├── FunAlgorithm.java │ │ │ └── FunQuadruple.java │ │ └── test │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── algorithms │ │ └── fun │ │ ├── FixtureCandidateGeneration.java │ │ └── FunAlgorithmTest.java ├── fun_algorithm_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── de │ │ │ │ └── uni_potsdam │ │ │ │ └── hpi │ │ │ │ └── metanome │ │ │ │ └── ma2013n2 │ │ │ │ └── algorithm_helper │ │ │ │ └── data_structures │ │ │ │ ├── GraphTraverser.java │ │ │ │ ├── HoleFinder.java │ │ │ │ └── PruningGraph.java │ │ └── resources │ │ │ ├── abalone.csv │ │ │ └── bridges.csv │ │ └── test │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── ma2013n2 │ │ └── algorithm_helper │ │ └── data_structures │ │ ├── HoleFinderTest.java │ │ ├── PruningGraphFixture.java │ │ └── PruningGraphTest.java ├── fun_for_metanome │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── de │ │ │ │ └── uni_potsdam │ │ │ │ └── hpi │ │ │ │ └── metanome │ │ │ │ └── algorithms │ │ │ │ └── fun │ │ │ │ └── Fun.java │ │ └── resources │ │ │ ├── abalone.csv │ │ │ └── bridges.csv │ │ └── test │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── algorithms │ │ └── fun │ │ └── FunTest.java ├── fun_test_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ └── main │ │ └── java │ │ └── de │ │ └── uni_potsdam │ │ └── hpi │ │ └── metanome │ │ └── algorithms │ │ └── test_helper │ │ └── fixtures │ │ ├── AbaloneFixture.java │ │ ├── AbaloneFixtureFixedUCC.java │ │ ├── AlgorithmTestFixture.java │ │ ├── BridgesFixture.java │ │ ├── FDmineFixture.java │ │ ├── FDminimizerShadowedFDFixture.java │ │ ├── FunFastCountFixture.java │ │ └── ShadowedSuperSetFixture.java └── pom.xml ├── hydra ├── pom.xml └── src │ ├── main │ └── java │ │ ├── ch │ │ └── javasoft │ │ │ └── bitset │ │ │ ├── BitSetFactory.java │ │ │ ├── IBitSet.java │ │ │ ├── LongBitSet.java │ │ │ └── search │ │ │ ├── ISubsetBackend.java │ │ │ ├── ITreeSearch.java │ │ │ ├── NTreeSearch.java │ │ │ ├── SubSetSearch.java │ │ │ ├── SuperSetSearch.java │ │ │ ├── TranslatingTreeSearch.java │ │ │ ├── TreeSearch.java │ │ │ └── tree │ │ │ ├── InterNode.java │ │ │ ├── LeafNode.java │ │ │ └── Node.java │ │ └── de │ │ └── hpi │ │ └── naumann │ │ └── dc │ │ ├── algorithms │ │ └── hybrid │ │ │ ├── Hydra.java │ │ │ ├── HydraMetanome.java │ │ │ └── ResultCompletion.java │ │ ├── cover │ │ └── PrefixMinimalCoverSearch.java │ │ ├── denialcontraints │ │ ├── DenialConstraint.java │ │ └── DenialConstraintSet.java │ │ ├── evidenceset │ │ ├── HashEvidenceSet.java │ │ ├── IEvidenceSet.java │ │ ├── TroveEvidenceSet.java │ │ └── build │ │ │ ├── EvidenceSetBuilder.java │ │ │ ├── PartitionEvidenceSetBuilder.java │ │ │ └── sampling │ │ │ ├── ColumnAwareEvidenceSetBuilder.java │ │ │ ├── OrderedCluster.java │ │ │ ├── SystematicLinearEvidenceSetBuilder.java │ │ │ └── WeightedRandomPicker.java │ │ ├── helpers │ │ ├── ArrayIndexComparator.java │ │ ├── BitSetTranslator.java │ │ ├── IndexProvider.java │ │ ├── ParserHelper.java │ │ └── SuperSetWalker.java │ │ ├── input │ │ ├── Column.java │ │ ├── ColumnPair.java │ │ ├── Input.java │ │ └── ParsedColumn.java │ │ ├── paritions │ │ ├── Cluster.java │ │ ├── ClusterPair.java │ │ ├── IEJoin.java │ │ ├── LinePair.java │ │ └── StrippedPartition.java │ │ └── predicates │ │ ├── PartitionRefiner.java │ │ ├── Predicate.java │ │ ├── PredicateBuilder.java │ │ ├── PredicatePair.java │ │ ├── PredicateProvider.java │ │ ├── operands │ │ └── ColumnOperand.java │ │ └── sets │ │ ├── Closure.java │ │ ├── PredicateBitSet.java │ │ └── PredicateSetFactory.java │ └── test │ └── java │ └── de │ └── hpi │ └── naumann │ └── dc │ ├── DenialConstraintSetTest.java │ ├── fastdc │ └── IEJoinTest.java │ ├── helpers │ └── ParserHelperTest.java │ ├── input │ └── ColumnTest.java │ ├── paritions │ └── RefiningTest.java │ └── predicates │ ├── OperatorTest.java │ ├── PredicateSetTest.java │ └── PredicateTest.java ├── pom.xml ├── set-version.sh ├── tane ├── pom.xml ├── tane_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── tane │ │ │ ├── CombinationHelper.java │ │ │ ├── StrippedPartition.java │ │ │ └── TaneAlgorithm.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tane │ │ └── TaneAlgorithmTest.java ├── tane_algorithm_helper │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── tane │ │ │ └── algorithm_helper │ │ │ ├── FDTree.java │ │ │ ├── FDTreeElement.java │ │ │ └── test_helper │ │ │ ├── AlgorithmTester.java │ │ │ └── fixtures │ │ │ ├── AbstractAlgorithmTestFixture.java │ │ │ ├── AlgorithmTestFixture1.java │ │ │ ├── AlgorithmTestFixture10.java │ │ │ ├── AlgorithmTestFixture11.java │ │ │ ├── AlgorithmTestFixture12.java │ │ │ ├── AlgorithmTestFixture13.java │ │ │ ├── AlgorithmTestFixture14.java │ │ │ ├── AlgorithmTestFixture15.java │ │ │ ├── AlgorithmTestFixture16.java │ │ │ ├── AlgorithmTestFixture17.java │ │ │ ├── AlgorithmTestFixture18.java │ │ │ ├── AlgorithmTestFixture19.java │ │ │ ├── AlgorithmTestFixture2.java │ │ │ ├── AlgorithmTestFixture3.java │ │ │ ├── AlgorithmTestFixture4.java │ │ │ ├── AlgorithmTestFixture5.java │ │ │ ├── AlgorithmTestFixture6.java │ │ │ ├── AlgorithmTestFixture7.java │ │ │ ├── AlgorithmTestFixture8.java │ │ │ └── AlgorithmTestFixture9.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tane │ │ └── algorithm_helper │ │ └── FDTreeTest.java ├── tane_tree_dir_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── de │ │ │ └── metanome │ │ │ └── algorithms │ │ │ └── tane │ │ │ ├── CombinationHelper.java │ │ │ ├── StrippedPartition.java │ │ │ └── TaneAlgorithmFilterTreeDirect.java │ │ └── test │ │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tane │ │ └── TaneAlgorithmFilterTreeDirectTest.java └── tane_tree_end_algorithm │ ├── .gitignore │ ├── pom.xml │ └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tane │ │ ├── CombinationHelper.java │ │ ├── StrippedPartition.java │ │ └── TaneAlgorithmFilterTreeEnd.java │ └── test │ └── java │ └── de │ └── metanome │ └── algorithms │ └── tane │ └── TaneAlgorithmFilterTreeEndTest.java ├── tireless ├── pom.xml └── src │ ├── main │ └── java │ │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tireless │ │ ├── Main.java │ │ ├── Tireless.java │ │ ├── TirelessAlgorithm.java │ │ ├── TirelessAlgorithmLocal.java │ │ ├── algorithm │ │ ├── Alignment.java │ │ ├── NeedlemanWunschAlignment.java │ │ ├── NeedlemanWunschAlignmentPunishElongation.java │ │ ├── OutlierDetection.java │ │ └── RecursiveSubgroupDisjunctionAlgorithm.java │ │ ├── postprocessing │ │ ├── CombineSimilarCharClasses.java │ │ ├── CombinedPostprocessor.java │ │ ├── GeneralizeCharClasses.java │ │ ├── GeneralizeQuantifiers.java │ │ └── ReduceElementCount.java │ │ ├── preprocessing │ │ ├── AlgorithmConfiguration.java │ │ ├── CharClasses.java │ │ ├── InputReader.java │ │ ├── StatisticsCollector.java │ │ └── alphabet │ │ │ ├── Alphabet.java │ │ │ ├── AlphabetLeaf.java │ │ │ ├── AlphabetNode.java │ │ │ └── DefaultAlphabet.java │ │ └── regularexpression │ │ ├── RegularExpressionComparator.java │ │ ├── containerclasses │ │ ├── ExpressionType.java │ │ ├── RegularExpression.java │ │ ├── RegularExpressionCharacterClass.java │ │ ├── RegularExpressionConjunction.java │ │ ├── RegularExpressionDisjunctionOfTokens.java │ │ └── RegularExpressionToken.java │ │ └── matcherclasses │ │ ├── MatcherFactory.java │ │ ├── PseudoMatcher.java │ │ ├── RegularExpressionMatcher.java │ │ ├── RegularExpressionMatcherClassClass.java │ │ ├── RegularExpressionMatcherDisjunctionOTToken.java │ │ ├── RegularExpressionMatcherTokenDisjunctionOT.java │ │ └── RegularExpressionMatcherTokenToken.java │ └── test │ ├── java │ └── de │ │ └── metanome │ │ └── algorithms │ │ └── tireless │ │ ├── algorithm │ │ ├── CombinedAlgorithmTest.java │ │ ├── RecursiveSubgroupDisjunctionAlgorithmTestAlignment.java │ │ ├── RecursiveSubgroupDisjunctionAlgorithmTestLengthDeviation.java │ │ └── RecursiveSubgroupDisjunctionAlgorithmTestTokenSplitting.java │ │ ├── postprocessing │ │ ├── CombineSimilarCharClassesTest.java │ │ ├── CombineSimilarCharClassesTestIsMergeable.java │ │ ├── GeneralizeCharClassesTest.java │ │ └── GeneralizeQuantifiersTest.java │ │ └── preprocessing │ │ ├── AlgorithmConfigurationTest.java │ │ ├── CharClassesTest.java │ │ ├── InputReaderTest.java │ │ ├── StatisticsCollectorTest.java │ │ └── alphabet │ │ ├── AlphabetLeafTest.java │ │ ├── AlphabetNodeTest.java │ │ ├── AlphabetTest.java │ │ └── DefaultAlphabetTest.java │ └── resources │ ├── test_input_with_duplicates.csv │ └── test_input_without_header.csv └── utils ├── .gitignore ├── pom.xml └── src └── main └── java └── de └── uni_potsdam └── hpi └── utils ├── CollectionUtils.java ├── DatabaseUtils.java ├── FileUtils.java ├── LoggingUtils.java └── MeasurementUtils.java /.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | .classpath 3 | .settings 4 | /.settings 5 | *.releaseBackup 6 | release.properties 7 | .idea 8 | /.idea 9 | *.iml 10 | *.class 11 | data/ 12 | bin/ 13 | io/ 14 | temp/ 15 | target/ 16 | datasets/ 17 | output/ 18 | _COLLECTION_ 19 | ORDER/order.log 20 | .DS_Store 21 | *.versionsBackup 22 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pyro"] 2 | path = pyro 3 | url = https://github.com/HPI-Information-Systems/pyro.git 4 | [submodule "sindy"] 5 | path = sindy 6 | url = https://github.com/HPI-Information-Systems/sindy.git 7 | -------------------------------------------------------------------------------- /AIDFD/src/main/java/de/metanome/algorithms/aidfd/helpers/Cluster.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.aidfd.helpers; 2 | 3 | import gnu.trove.iterator.TIntIterator; 4 | import gnu.trove.list.array.TIntArrayList; 5 | 6 | public class Cluster { 7 | private TIntArrayList array; 8 | private int column; 9 | 10 | public Cluster(int column) { 11 | this.column = column; 12 | this.array = new TIntArrayList(); 13 | } 14 | 15 | public int getColumn() { 16 | return column; 17 | } 18 | 19 | public TIntArrayList getArray() { 20 | return array; 21 | } 22 | 23 | 24 | public void add(int value) { 25 | this.array.add(value); 26 | } 27 | 28 | public boolean contains(int value) { 29 | return array.binarySearch(value) >= 0; 30 | } 31 | 32 | public TIntIterator iterator() { 33 | return array.iterator(); 34 | } 35 | 36 | public int size() { 37 | return array.size(); 38 | } 39 | 40 | public int get(int index) { 41 | return array.get(index); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /AIDFD/src/main/java/de/metanome/algorithms/aidfd/helpers/FD.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.aidfd.helpers; 2 | 3 | import ch.javasoft.bitset.IBitSet; 4 | import ch.javasoft.bitset.LongBitSet; 5 | 6 | public class FD implements Comparable { 7 | public final int rhs; 8 | 9 | public final IBitSet lhs; 10 | public IBitSet lhsSort; 11 | 12 | public FD(int rhs, IBitSet lhs) { 13 | this.rhs = rhs; 14 | this.lhs = lhs; 15 | this.lhsSort = lhs; 16 | } 17 | 18 | @Override 19 | public int compareTo(FD o) { 20 | return o.lhsSort.compareTo(lhsSort); 21 | } 22 | 23 | @Override 24 | public String toString() { 25 | return lhs + "->" + rhs; 26 | } 27 | 28 | public void setSort(Integer[] indexes) { 29 | lhsSort = LongBitSet.FACTORY.create(); 30 | for (Integer i : indexes) { 31 | if (lhs.get(indexes[i.intValue()].intValue())) { 32 | lhsSort.set(i.intValue()); 33 | } 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /BINDER/BINDERAlgorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /BINDER/BINDERAlgorithm/src/main/java/de/metanome/algorithms/binder/io/InputIterator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.binder.io; 2 | 3 | import java.util.List; 4 | 5 | import de.metanome.algorithm_integration.input.InputIterationException; 6 | 7 | public interface InputIterator extends AutoCloseable { 8 | 9 | public boolean next() throws InputIterationException; 10 | public String getValue(int columnIndex) throws InputIterationException; 11 | public List getValues() throws InputIterationException; 12 | } 13 | -------------------------------------------------------------------------------- /BINDER/BINDERAlgorithm/src/main/java/de/metanome/algorithms/binder/util/LruCache.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.binder.util; 2 | 3 | import java.util.LinkedHashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * Simple LRU cache implementation. 8 | */ 9 | public class LruCache extends LinkedHashMap { 10 | 11 | private static final long serialVersionUID = 2109386626216996633L; 12 | 13 | private int capacity; 14 | 15 | public LruCache(int capacity) { 16 | super(capacity); 17 | this.capacity = capacity; 18 | } 19 | 20 | @Override 21 | protected boolean removeEldestEntry(Map.Entry eldest) { 22 | return this.size() > this.capacity; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /BINDER/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | de.metanome.algorithms.binder 5 | BINDERModules 6 | pom 7 | 8 | BINDERModules 9 | 10 | 11 | UTF-8 12 | 13 | 14 | 15 | de.metanome.algorithms 16 | algorithms 17 | 1.2-SNAPSHOT 18 | ../pom.xml 19 | 20 | 21 | 22 | BINDERAlgorithm 23 | BINDERDatabase 24 | BINDERFile 25 | 26 | 27 | -------------------------------------------------------------------------------- /CFDFinder/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | *.iml 4 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/expansion/ExpansionStrategy.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.expansion; 2 | 3 | import java.util.BitSet; 4 | import java.util.List; 5 | 6 | import de.metanome.algorithms.cfdfinder.pattern.Pattern; 7 | 8 | public abstract class ExpansionStrategy { 9 | 10 | protected int[][] values; 11 | 12 | public ExpansionStrategy(int[][] values) { 13 | this.values = values; 14 | } 15 | 16 | public abstract Pattern generateNullPattern(BitSet attributes); 17 | public abstract List getChildPatterns(Pattern currentPattern); 18 | } 19 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/pattern/NegativeConstantPatternEntry.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.pattern; 2 | 3 | public class NegativeConstantPatternEntry extends ConstantPatternEntry { 4 | 5 | public NegativeConstantPatternEntry(int constant) { 6 | super(constant); 7 | } 8 | 9 | @Override 10 | public String toString() { 11 | return "¬" + super.toString(); 12 | } 13 | 14 | @Override 15 | boolean matches(int value) { 16 | return this.getConstant() != value; 17 | } 18 | 19 | @Override 20 | public int hashCode() { 21 | return -331 * super.hashCode(); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/pattern/PatternEntry.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.pattern; 2 | 3 | public abstract class PatternEntry { 4 | 5 | abstract boolean matches(final int value); 6 | abstract boolean isVariable(); 7 | 8 | } 9 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/pattern/VariablePatternEntry.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.pattern; 2 | 3 | public class VariablePatternEntry extends PatternEntry { 4 | 5 | @Override 6 | public String toString() { 7 | return "_"; 8 | } 9 | 10 | @Override 11 | boolean matches(final int value) { 12 | return true; 13 | } 14 | 15 | @Override 16 | boolean isVariable() { 17 | return true; 18 | } 19 | 20 | @Override 21 | public boolean equals(Object o) { 22 | if (this == o) return true; 23 | if (o == null || getClass() != o.getClass()) return false; 24 | 25 | PatternEntry that = (PatternEntry) o; 26 | 27 | return that.isVariable(); 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return -1; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/result/DirectOutputResultStrategy.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.result; 2 | 3 | import de.metanome.algorithm_integration.ColumnIdentifier; 4 | import de.metanome.algorithm_integration.result_receiver.ConditionalFunctionalDependencyResultReceiver; 5 | import it.unimi.dsi.fastutil.objects.ObjectArrayList; 6 | 7 | public class DirectOutputResultStrategy extends ResultStrategy { 8 | 9 | public DirectOutputResultStrategy(ConditionalFunctionalDependencyResultReceiver resultReceiver, ObjectArrayList columnIdentifiers) { 10 | super(resultReceiver, columnIdentifiers); 11 | } 12 | 13 | public static String getIdentifier() { 14 | return "DirectOutputStrategy"; 15 | } 16 | 17 | @Override 18 | public void receiveResult(Result result) { 19 | super.sendToMetanome(result); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/structures/ClusterIdentifierWithRecord.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.structures; 2 | 3 | public class ClusterIdentifierWithRecord extends ClusterIdentifier { 4 | 5 | private final int record; 6 | 7 | public ClusterIdentifierWithRecord(final int[] cluster, final int record) { 8 | super(cluster); 9 | this.record = record; 10 | } 11 | 12 | public int getRecord() { 13 | return this.record; 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/structures/FDTreeElementLhsPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.structures; 2 | 3 | import java.util.BitSet; 4 | 5 | public class FDTreeElementLhsPair { 6 | 7 | private final FDTreeElement element; 8 | private final BitSet lhs; 9 | 10 | public FDTreeElement getElement() { 11 | return this.element; 12 | } 13 | 14 | public BitSet getLhs() { 15 | return this.lhs; 16 | } 17 | 18 | public FDTreeElementLhsPair(FDTreeElement element, BitSet lhs) { 19 | this.element = element; 20 | this.lhs = lhs; 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/structures/IntegerPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.structures; 2 | 3 | public class IntegerPair { 4 | 5 | private final int a; 6 | private final int b; 7 | 8 | public IntegerPair(final int a, final int b) { 9 | this.a = a; 10 | this.b = b; 11 | } 12 | 13 | public int a() { 14 | return this.a; 15 | } 16 | 17 | public int b() { 18 | return this.b; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/utils/Logger.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.utils; 2 | 3 | public class Logger { 4 | 5 | private static Logger instance = null; 6 | 7 | private StringBuilder log = new StringBuilder(); 8 | 9 | private Logger() { 10 | } 11 | 12 | public static Logger getInstance() { 13 | if (instance == null) 14 | instance = new Logger(); 15 | return instance; 16 | } 17 | 18 | public void write(String message) { 19 | this.log.append(message); 20 | System.out.print(message); 21 | } 22 | 23 | public void writeln(String message) { 24 | this.log.append(message + "\r\n"); 25 | System.out.println(message); 26 | } 27 | 28 | public void write(Object message) { 29 | this.write(message.toString());; 30 | } 31 | 32 | public void writeln(Object message) { 33 | this.writeln(message.toString());; 34 | } 35 | 36 | public String read() { 37 | return this.log.toString(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /CFDFinder/src/main/java/de/metanome/algorithms/cfdfinder/utils/ValueComparator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cfdfinder.utils; 2 | 3 | public class ValueComparator { 4 | 5 | private boolean isNullEqualNull; 6 | 7 | public ValueComparator(boolean isNullEqualNull) { 8 | this.isNullEqualNull = isNullEqualNull; 9 | } 10 | 11 | public boolean isNullEqualNull() { 12 | return this.isNullEqualNull; 13 | } 14 | 15 | public boolean isEqual(Object val1, Object val2) { 16 | if ((val1 == null) && (val2 == null)) 17 | return this.isNullEqualNull; 18 | 19 | return (val1 != null) && val1.equals(val2); 20 | } 21 | 22 | public boolean isEqual(int val1, int val2) { 23 | return (val1 >= 0) && (val2 >= 0) && (val1 == val2); 24 | } 25 | 26 | public boolean isDifferent(int val1, int val2) { 27 | return (val1 < 0) || (val2 < 0) || (val1 != val2); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /DVA/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVAKMV/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVAKMV/src/test/java/de/metanome/algorithms/dvakmv/DVAKMVTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvakmv; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVAKMVTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVAMS/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVAMS/src/test/java/de/metanome/algorithms/dvams/DVCAMSest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvams; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVCAMSest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVBJKST/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVBJKST/src/test/java/de/metanome/algorithms/dvbjkst/DVBJKSTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvbjkst; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVBJKSTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVBloomFilter/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVBloomFilter/src/test/java/de/metanome/algorithms/dvbf/DVBFTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvbf; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVBFTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVFM/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVFM/src/test/java/de/metanome/algorithms/dvfm/DVFMTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvfm; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVFMTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVHyperLogLog/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVHyperLogLog/src/test/java/de/metanome/algorithms/dvhyperloglog/DVHyperLogLogTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvhyperloglog; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVHyperLogLogTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVHyperLogLogPlus/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVHyperLogLogPlus/src/test/java/de/metanome/algorithms/dvhyperloglog/DVHyperLogLogPlusTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvhyperloglog; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVHyperLogLogPlusTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVLC/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVLC/src/test/java/de/metanome/algorithms/dvlc/DVCLTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvlc; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVCLTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVLogLog/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVLogLog/src/test/java/de/metanome/algorithms/dvloglog/DVLogLogTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvloglog; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVLogLogTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVMinCount/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVMinCount/src/test/java/de/metanome/algorithms/dvmincount/DVMinCountTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvmincount; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVMinCountTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVPCSA/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVPCSA/src/test/java/de/metanome/algorithms/dvpcsa/DVPCSATest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvpcsa; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVPCSATest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /DVSuperLogLog/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /DVSuperLogLog/src/test/java/de/metanome/algorithms/dvsuperloglog/DVSuperLogLogTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dvsuperloglog; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class DVSuperLogLogTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /temp 2 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/RowSampler.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind; 2 | 3 | import de.metanome.algorithm_integration.AlgorithmConfigurationException; 4 | import de.metanome.algorithm_integration.input.RelationalInputGenerator; 5 | 6 | 7 | public interface RowSampler { 8 | RelationalInputGenerator[] createSample(RelationalInputGenerator[] fileInputGenerators) throws AlgorithmConfigurationException; 9 | } 10 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/inclusiontester/CombinedHashSetInclusionTester.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.inclusiontester; 2 | 3 | import de.hpi.mpss2015n.approxind.utils.SimpleColumnCombination; 4 | import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet; 5 | import it.unimi.dsi.fastutil.longs.LongSet; 6 | 7 | public final class CombinedHashSetInclusionTester extends CombinedInclusionTester { 8 | 9 | @Override 10 | protected LongSet createApproximateDatastructures(SimpleColumnCombination combination) { 11 | return new LongOpenHashBigSet(); 12 | } 13 | 14 | @Override 15 | protected void insertRowIntoAD(SimpleColumnCombination combination, long hash, LongSet longSet) { 16 | longSet.add(hash); 17 | } 18 | 19 | @Override 20 | protected boolean testWithAds(LongSet longSet2, LongSet longSet1) { 21 | return longSet1.containsAll(longSet2); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/sampler/IdentityRowSampler.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.sampler; 2 | 3 | import de.hpi.mpss2015n.approxind.RowSampler; 4 | import de.metanome.algorithm_integration.input.RelationalInputGenerator; 5 | 6 | public final class IdentityRowSampler implements RowSampler { 7 | @Override 8 | public RelationalInputGenerator[] createSample(RelationalInputGenerator[] fileInputGenerators) { 9 | return fileInputGenerators; 10 | } 11 | 12 | @Override 13 | public String toString() { 14 | return "IdentityRowSampler"; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/sampler/SampleGenerator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.sampler; 2 | 3 | import de.metanome.algorithm_integration.input.InputGenerationException; 4 | import de.metanome.algorithm_integration.input.RelationalInput; 5 | import de.metanome.algorithm_integration.input.RelationalInputGenerator; 6 | 7 | public class SampleGenerator implements RelationalInputGenerator { 8 | 9 | RelationalInput input; 10 | 11 | public SampleGenerator(RelationalInput input){ 12 | this.input = input; 13 | } 14 | 15 | @Override 16 | public RelationalInput generateNewCopy() throws InputGenerationException { 17 | return input; 18 | } 19 | 20 | @Override 21 | public void close() throws Exception { 22 | try { 23 | if (this.input != null) this.input.close(); 24 | } finally { 25 | this.input = null; 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/utils/AOCacheMap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.utils; 2 | 3 | import java.util.LinkedHashMap; 4 | 5 | /** 6 | * Key-value cache with LRU semantics. 7 | */ 8 | public class AOCacheMap extends LinkedHashMap{ 9 | 10 | private static final long serialVersionUID = 2130793095028407165L; 11 | 12 | private int maxCapacity; 13 | 14 | public AOCacheMap(int maxCapacity) { 15 | super(maxCapacity,0.7f, true); 16 | this.maxCapacity=maxCapacity; 17 | } 18 | 19 | @Override 20 | protected boolean removeEldestEntry(java.util.Map.Entry eldest) { 21 | return size()>maxCapacity; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/utils/Arity.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.utils; 2 | 3 | public enum Arity { 4 | UNARY, N_ARY 5 | } 6 | -------------------------------------------------------------------------------- /FAIDA/FAIDAAlgorithm/src/main/java/de/hpi/mpss2015n/approxind/utils/ColumnIterator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.mpss2015n.approxind.utils; 2 | 3 | import java.util.Iterator; 4 | 5 | public interface ColumnIterator extends Iterator, AutoCloseable { 6 | 7 | @Override 8 | void close(); // No throws declarations. 9 | 10 | } -------------------------------------------------------------------------------- /HyFD/src/main/java/de/metanome/algorithms/hyfd/structures/ClusterIdentifierWithRecord.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyfd.structures; 2 | 3 | public class ClusterIdentifierWithRecord extends ClusterIdentifier { 4 | 5 | private final int record; 6 | 7 | public ClusterIdentifierWithRecord(final int[] cluster, final int record) { 8 | super(cluster); 9 | this.record = record; 10 | } 11 | 12 | public int getRecord() { 13 | return this.record; 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /HyFD/src/main/java/de/metanome/algorithms/hyfd/structures/FDTreeElementLhsPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyfd.structures; 2 | 3 | import java.util.BitSet; 4 | 5 | public class FDTreeElementLhsPair { 6 | 7 | private final FDTreeElement element; 8 | private final BitSet lhs; 9 | 10 | public FDTreeElement getElement() { 11 | return this.element; 12 | } 13 | 14 | public BitSet getLhs() { 15 | return this.lhs; 16 | } 17 | 18 | public FDTreeElementLhsPair(FDTreeElement element, BitSet lhs) { 19 | this.element = element; 20 | this.lhs = lhs; 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /HyFD/src/main/java/de/metanome/algorithms/hyfd/structures/IntegerPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyfd.structures; 2 | 3 | public class IntegerPair { 4 | 5 | private final int a; 6 | private final int b; 7 | 8 | public IntegerPair(final int a, final int b) { 9 | this.a = a; 10 | this.b = b; 11 | } 12 | 13 | public int a() { 14 | return this.a; 15 | } 16 | 17 | public int b() { 18 | return this.b; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /HyFD/src/main/java/de/metanome/algorithms/hyfd/utils/Logger.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyfd.utils; 2 | 3 | public class Logger { 4 | 5 | private static Logger instance = null; 6 | 7 | private StringBuilder log = new StringBuilder(); 8 | 9 | private Logger() { 10 | } 11 | 12 | public static Logger getInstance() { 13 | if (instance == null) 14 | instance = new Logger(); 15 | return instance; 16 | } 17 | 18 | public void write(String message) { 19 | this.log.append(message); 20 | System.out.print(message); 21 | } 22 | 23 | public void writeln(String message) { 24 | this.log.append(message + "\r\n"); 25 | System.out.println(message); 26 | } 27 | 28 | public void write(Object message) { 29 | this.write(message.toString());; 30 | } 31 | 32 | public void writeln(Object message) { 33 | this.writeln(message.toString());; 34 | } 35 | 36 | public String read() { 37 | return this.log.toString(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /HyFD/src/main/java/de/metanome/algorithms/hyfd/utils/ValueComparator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyfd.utils; 2 | 3 | public class ValueComparator { 4 | 5 | private boolean isNullEqualNull; 6 | 7 | public ValueComparator(boolean isNullEqualNull) { 8 | this.isNullEqualNull = isNullEqualNull; 9 | } 10 | 11 | public boolean isNullEqualNull() { 12 | return this.isNullEqualNull; 13 | } 14 | 15 | public boolean isEqual(Object val1, Object val2) { 16 | if ((val1 == null) && (val2 == null)) 17 | return this.isNullEqualNull; 18 | 19 | return (val1 != null) && val1.equals(val2); 20 | } 21 | 22 | public boolean isEqual(int val1, int val2) { 23 | return (val1 >= 0) && (val2 >= 0) && (val1 == val2); 24 | } 25 | 26 | public boolean isDifferent(int val1, int val2) { 27 | return (val1 < 0) || (val2 < 0) || (val1 != val2); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /HyMD/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | target/ -------------------------------------------------------------------------------- /HyMD/config/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | matching-dependencies 7 | de.hpi.is 8 | 1.2-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | md-config 13 | 14 | 15 | 16 | ${project.groupId} 17 | md-hybrid 18 | ${project.version} 19 | 20 | 21 | ${project.groupId} 22 | md-mapping 23 | ${project.version} 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /HyMD/core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | matching-dependencies 7 | de.hpi.is 8 | 1.2-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | md-core 13 | 14 | 15 | 16 | ${project.groupId} 17 | md-db 18 | ${project.version} 19 | 20 | 21 | ${project.groupId} 22 | md-sim 23 | ${project.version} 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/ColumnMapping.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import de.hpi.is.md.relational.ColumnPair; 4 | import de.hpi.is.md.sim.SimilarityMeasure; 5 | import de.hpi.is.md.util.Hashable; 6 | import de.hpi.is.md.util.Hasher; 7 | import java.io.Serializable; 8 | import lombok.Data; 9 | import lombok.NonNull; 10 | 11 | @Data 12 | public class ColumnMapping implements Serializable, Hashable { 13 | 14 | private static final long serialVersionUID = -158891076140735670L; 15 | @NonNull 16 | private final ColumnPair columns; 17 | @NonNull 18 | private final SimilarityMeasure similarityMeasure; 19 | 20 | @Override 21 | public void hash(Hasher hasher) { 22 | hasher 23 | .put(columns) 24 | .put(similarityMeasure); 25 | } 26 | 27 | @Override 28 | public String toString() { 29 | return columns.toString() + "(" + similarityMeasure.toString() + ")"; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/Discoverer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import de.hpi.is.md.relational.Relation; 4 | import de.hpi.is.md.result.ResultEmitter; 5 | 6 | public interface Discoverer extends ResultEmitter { 7 | 8 | default void discover(Relation relation) { 9 | discover(relation, relation); 10 | } 11 | 12 | void discover(Relation r, Relation s); 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/MatchingDependency.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import de.hpi.is.md.util.StreamUtils; 4 | import java.util.Collection; 5 | import lombok.Data; 6 | import lombok.NonNull; 7 | 8 | @Data 9 | public class MatchingDependency { 10 | 11 | @NonNull 12 | private final Collection> lhs; 13 | @NonNull 14 | private final ColumnMatchWithThreshold rhs; 15 | 16 | @Override 17 | public String toString() { 18 | return "[" + StreamUtils.seq(lhs).toString(",") + "]->" + rhs; 19 | } 20 | 21 | @Data 22 | public static class ColumnMatchWithThreshold { 23 | 24 | @NonNull 25 | private final ColumnMapping match; 26 | private final double threshold; 27 | 28 | @Override 29 | public String toString() { 30 | return match.toString() + "@" + threshold; 31 | } 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/MatchingDependencyResult.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import lombok.Data; 4 | import lombok.NonNull; 5 | 6 | @Data 7 | public class MatchingDependencyResult { 8 | 9 | @NonNull 10 | private final MatchingDependency dependency; 11 | private final long support; 12 | 13 | @Override 14 | public String toString() { 15 | return dependency + " (support=" + support + ")"; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/SupportCalculator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import com.bakdata.util.jackson.CPSBase; 4 | import com.bakdata.util.jackson.CPSTypeIdResolver; 5 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; 7 | import de.hpi.is.md.relational.Relation; 8 | 9 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "type") 10 | @JsonTypeIdResolver(CPSTypeIdResolver.class) 11 | @CPSBase 12 | public interface SupportCalculator { 13 | 14 | long calculateSupport(Relation relation); 15 | 16 | long calculateSupport(Relation r, Relation s); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/ThresholdFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import com.bakdata.util.jackson.CPSBase; 4 | import com.bakdata.util.jackson.CPSTypeIdResolver; 5 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; 7 | import de.hpi.is.md.util.Hashable; 8 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 9 | 10 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "type") 11 | @JsonTypeIdResolver(CPSTypeIdResolver.class) 12 | @CPSBase 13 | public interface ThresholdFilter extends Hashable { 14 | 15 | Iterable filter(DoubleSet similarities); 16 | } 17 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/ThresholdProvider.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md; 2 | 3 | import de.hpi.is.md.util.OptionalDouble; 4 | import it.unimi.dsi.fastutil.doubles.DoubleSortedSet; 5 | import java.util.List; 6 | 7 | public interface ThresholdProvider { 8 | 9 | List getAll(); 10 | 11 | OptionalDouble getNext(int attr, double threshold); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/ConstantSupportCalculator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import de.hpi.is.md.SupportCalculator; 5 | import de.hpi.is.md.relational.Relation; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @CPSType(id = "constant", base = SupportCalculator.class) 9 | @RequiredArgsConstructor 10 | public class ConstantSupportCalculator implements SupportCalculator { 11 | 12 | private final long support; 13 | 14 | @Override 15 | public long calculateSupport(Relation relation) { 16 | return support; 17 | } 18 | 19 | @Override 20 | public long calculateSupport(Relation r, Relation s) { 21 | return support; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/result/FileResultWriter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.result; 2 | 3 | import de.hpi.is.md.result.ResultListener; 4 | import java.io.BufferedWriter; 5 | import java.io.Closeable; 6 | import java.io.File; 7 | import java.io.FileWriter; 8 | import java.io.IOException; 9 | import java.io.PrintWriter; 10 | import lombok.NonNull; 11 | import lombok.RequiredArgsConstructor; 12 | 13 | @RequiredArgsConstructor 14 | public class FileResultWriter implements ResultListener, Closeable { 15 | 16 | @NonNull 17 | private final PrintWriter out; 18 | 19 | public FileResultWriter(File file) throws IOException { 20 | this(new PrintWriter(new BufferedWriter(new FileWriter(file)))); 21 | } 22 | 23 | @Override 24 | public void close() { 25 | out.close(); 26 | } 27 | 28 | @Override 29 | public void receiveResult(T result) { 30 | out.println(result); 31 | out.flush(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/threshold/ExactThresholdFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.threshold; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import de.hpi.is.md.ThresholdFilter; 5 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 6 | 7 | @CPSType(id = "exact", base = ThresholdFilter.class) 8 | public class ExactThresholdFilter implements ThresholdFilter { 9 | 10 | @Override 11 | public DoubleSet filter(DoubleSet similarities) { 12 | return similarities; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/threshold/LimitSizeThresholdFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.threshold; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import de.hpi.is.md.ThresholdFilter; 5 | import de.hpi.is.md.util.Hasher; 6 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 7 | import lombok.NonNull; 8 | import lombok.RequiredArgsConstructor; 9 | 10 | @CPSType(id = "limit", base = ThresholdFilter.class) 11 | @RequiredArgsConstructor 12 | public class LimitSizeThresholdFilter implements ThresholdFilter { 13 | 14 | private final int size; 15 | @NonNull 16 | private final ThresholdFilter underlying; 17 | 18 | @Override 19 | public Iterable filter(DoubleSet similarities) { 20 | return LimitSizeUtils.limitSize(similarities, size); 21 | } 22 | 23 | @Override 24 | public void hash(Hasher hasher) { 25 | hasher 26 | .putClass(LimitSizeThresholdFilter.class) 27 | .putInt(size) 28 | .put(underlying); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/threshold/LimitSizeUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.threshold; 2 | 3 | import de.hpi.is.md.util.Trimmer; 4 | import it.unimi.dsi.fastutil.doubles.DoubleCollection; 5 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 6 | import lombok.AccessLevel; 7 | import lombok.NoArgsConstructor; 8 | 9 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 10 | final class LimitSizeUtils { 11 | 12 | public static Iterable limitSize(DoubleCollection similarities, int size) { 13 | DoubleSet filtered = ThresholdFilterUtils.sorted(similarities); 14 | Trimmer trimmer = new Trimmer(size); 15 | trimmer.trim(filtered); 16 | return filtered; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/threshold/StepThresholdFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.threshold; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import de.hpi.is.md.ThresholdFilter; 5 | import de.hpi.is.md.util.Hasher; 6 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 7 | import java.util.Collection; 8 | import lombok.NonNull; 9 | import lombok.RequiredArgsConstructor; 10 | 11 | @CPSType(id = "step", base = ThresholdFilter.class) 12 | @RequiredArgsConstructor 13 | class StepThresholdFilter implements ThresholdFilter { 14 | 15 | @NonNull 16 | private final Collection steps; 17 | 18 | @Override 19 | public Iterable filter(DoubleSet similarities) { 20 | return steps; 21 | } 22 | 23 | @Override 24 | public void hash(Hasher hasher) { 25 | hasher.putClass(StepThresholdFilter.class); 26 | steps.forEach(hasher::putDouble); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/impl/threshold/ThresholdFilterUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.impl.threshold; 2 | 3 | import it.unimi.dsi.fastutil.doubles.DoubleCollection; 4 | import it.unimi.dsi.fastutil.doubles.DoubleComparators; 5 | import it.unimi.dsi.fastutil.doubles.DoubleRBTreeSet; 6 | import it.unimi.dsi.fastutil.doubles.DoubleSortedSet; 7 | import lombok.AccessLevel; 8 | import lombok.NoArgsConstructor; 9 | 10 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 11 | final class ThresholdFilterUtils { 12 | 13 | public static DoubleSortedSet sorted(DoubleCollection values) { 14 | //iterate backwards to ensure we retain the maximal threshold 15 | DoubleSortedSet filtered = new DoubleRBTreeSet(DoubleComparators.OPPOSITE_COMPARATOR); 16 | filtered.addAll(values); 17 | return filtered; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/result/AbstractResultEmitter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.result; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | 6 | public abstract class AbstractResultEmitter implements ResultEmitter { 7 | 8 | private final Collection> resultListeners = new ArrayList<>(); 9 | 10 | @Override 11 | public ResultEmitter register(ResultListener resultListener) { 12 | resultListeners.add(resultListener); 13 | return this; 14 | } 15 | 16 | @Override 17 | public void unregisterAll() { 18 | resultListeners.clear(); 19 | } 20 | 21 | protected void emitResult(T result) { 22 | resultListeners.forEach(listener -> listener.receiveResult(result)); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/result/ResultEmitter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.result; 2 | 3 | public interface ResultEmitter { 4 | 5 | ResultEmitter register(ResultListener resultListener); 6 | 7 | void unregisterAll(); 8 | } 9 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/result/ResultListener.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.result; 2 | 3 | public interface ResultListener { 4 | 5 | void receiveResult(T result); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/util/enforce/EnforceMatch.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import de.hpi.is.md.util.StreamUtils; 4 | import java.util.Arrays; 5 | import java.util.List; 6 | import lombok.Data; 7 | 8 | @Data 9 | public class EnforceMatch { 10 | 11 | private final Iterable left; 12 | private final Iterable right; 13 | 14 | private static String toString(Iterable objects) { 15 | List strings = StreamUtils.seq(objects) 16 | .map(Arrays::toString) 17 | .toList(); 18 | return strings.toString(); 19 | } 20 | 21 | @Override 22 | public String toString() { 23 | return toString(left) + "," + toString(right); 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/util/enforce/EnforcerBuilder.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import de.hpi.is.md.relational.Relation; 4 | 5 | public interface EnforcerBuilder { 6 | 7 | MDEnforcer create(Relation r, Relation s); 8 | 9 | default MDEnforcer create(Relation r) { 10 | return create(r, r); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/core/src/main/java/de/hpi/is/md/util/enforce/MDEnforcer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import de.hpi.is.md.MatchingDependency.ColumnMatchWithThreshold; 4 | import java.util.Collection; 5 | 6 | public interface MDEnforcer { 7 | 8 | Collection enforce(Collection> lhs); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/AbstractRow.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import lombok.Getter; 4 | import lombok.NonNull; 5 | import lombok.RequiredArgsConstructor; 6 | 7 | @RequiredArgsConstructor 8 | @Getter 9 | public abstract class AbstractRow implements Row { 10 | 11 | @NonNull 12 | private final Schema schema; 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/ColumnPair.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import de.hpi.is.md.util.Hashable; 4 | import de.hpi.is.md.util.Hasher; 5 | import java.io.Serializable; 6 | import lombok.Data; 7 | import lombok.NonNull; 8 | import org.jooq.lambda.Seq; 9 | 10 | @Data 11 | public class ColumnPair implements Serializable, Hashable { 12 | 13 | private static final long serialVersionUID = 8470259831636852058L; 14 | @NonNull 15 | private final Column left; 16 | @NonNull 17 | private final Column right; 18 | 19 | public Class getType() { 20 | return left.getType(); 21 | } 22 | 23 | @Override 24 | public void hash(Hasher hasher) { 25 | hasher 26 | .put(left) 27 | .put(right); 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "[" + Seq.of(left, right) 33 | .distinct() 34 | .toString(", ") + "]"; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/HasName.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public interface HasName { 4 | 5 | String getName(); 6 | 7 | default boolean hasName(String name) { 8 | return getName().equalsIgnoreCase(name); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/HasSchema.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public interface HasSchema { 4 | 5 | Schema getSchema(); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/InputCloseException.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public class InputCloseException extends InputException { 4 | 5 | private static final long serialVersionUID = -5364416885339689164L; 6 | 7 | public InputCloseException(String message, Throwable cause) { 8 | super(message, cause); 9 | } 10 | 11 | public InputCloseException(Throwable cause) { 12 | super(cause); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/InputException.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public abstract class InputException extends RuntimeException { 4 | 5 | private static final long serialVersionUID = 1451389975923156317L; 6 | 7 | InputException(Throwable cause) { 8 | super(cause); 9 | } 10 | 11 | InputException(String message, Throwable cause) { 12 | super(message, cause); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/InputOpenException.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public class InputOpenException extends InputException { 4 | 5 | private static final long serialVersionUID = 4192133650399169296L; 6 | 7 | public InputOpenException(Throwable cause) { 8 | super(cause); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/Relation.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import de.hpi.is.md.util.Hashable; 4 | 5 | public interface Relation extends HasSchema, Hashable { 6 | 7 | @Override 8 | default Schema getSchema() { 9 | try (RelationalInput input = open()) { 10 | return input.getSchema(); 11 | } catch (InputException e) { 12 | throw new RuntimeException(e); 13 | } 14 | } 15 | 16 | long getSize() throws InputException; 17 | 18 | RelationalInput open() throws InputOpenException; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/RelationalInput.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | public interface RelationalInput extends AutoCloseable, Iterable, HasSchema { 4 | 5 | @Override 6 | void close() throws InputCloseException; 7 | } 8 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/Row.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import java.util.Map; 4 | import java.util.Optional; 5 | 6 | public interface Row extends HasSchema { 7 | 8 | static Row create(Schema schema, Map, Object> values) { 9 | return RowImpl.create(schema, values); 10 | } 11 | 12 | Optional get(Column column); 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/RowImpl.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | 4 | import de.hpi.is.md.util.BetterMap; 5 | import de.hpi.is.md.util.BetterMapDecorator; 6 | import de.hpi.is.md.util.CastUtils; 7 | import java.util.Map; 8 | import java.util.Optional; 9 | import lombok.ToString; 10 | 11 | @ToString 12 | final class RowImpl extends AbstractRow { 13 | 14 | private final BetterMap, Object> values; 15 | 16 | private RowImpl(Schema schema, Map, Object> values) { 17 | super(schema); 18 | this.values = new BetterMapDecorator<>(values); 19 | } 20 | 21 | static Row create(Schema schema, Map, Object> values) { 22 | return new RowImpl(schema, values); 23 | } 24 | 25 | @Override 26 | public Optional get(Column column) { 27 | return values.get(column) 28 | .map(CastUtils::as); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/Schema.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import de.hpi.is.md.util.CastUtils; 4 | import java.util.List; 5 | import java.util.Optional; 6 | 7 | public interface Schema { 8 | 9 | static Schema of(List> columns) { 10 | return new SchemaImpl(columns); 11 | } 12 | 13 | default Optional> getColumn(String columnName) { 14 | return getColumns().stream() 15 | .filter(column -> column.hasName(columnName)) 16 | .findFirst() 17 | .map(CastUtils::as); 18 | } 19 | 20 | List> getColumns(); 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/db/src/main/java/de/hpi/is/md/relational/SchemaImpl.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import java.util.List; 4 | import lombok.Data; 5 | import lombok.NonNull; 6 | 7 | @Data 8 | class SchemaImpl implements Schema { 9 | 10 | @NonNull 11 | private final List> columns; 12 | 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/db/src/test/java/de/hpi/is/md/relational/ColumnPairTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.relational; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.junit.Test; 6 | 7 | public class ColumnPairTest { 8 | 9 | @Test 10 | public void test() { 11 | Column left = Column.of("a", Integer.class); 12 | Column right = Column.of("a", Integer.class); 13 | ColumnPair pair = new ColumnPair<>(left, right); 14 | assertThat(pair.getType()).isEqualTo(Integer.class); 15 | assertThat(pair.getType()).isEqualTo(left.getType()); 16 | assertThat(pair.getType()).isEqualTo(right.getType()); 17 | } 18 | 19 | } -------------------------------------------------------------------------------- /HyMD/db/src/test/resources/de/hpi/is/md/relational/jdbc/dataset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /HyMD/db/src/test/resources/de/hpi/is/md/relational/jdbc/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS PERSON ( 2 | ID INT IDENTITY PRIMARY KEY, 3 | NAME VARCHAR, 4 | LAST_NAME VARCHAR, 5 | AGE SMALLINT 6 | ) -------------------------------------------------------------------------------- /HyMD/demo/config/default.config.json: -------------------------------------------------------------------------------- 1 | {} -------------------------------------------------------------------------------- /HyMD/demo/config/default_single.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "mapping": { 3 | "schema_mapper": { 4 | "type": "self" 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /HyMD/demo/config/fd.config.json: -------------------------------------------------------------------------------- 1 | { 2 | "discovery": { 3 | "support_calculator": { 4 | "type": "constant", 5 | "support": 0 6 | } 7 | }, 8 | "mapping": { 9 | "similarity_measures": { 10 | "default": { 11 | "type": "equals" 12 | }, 13 | "by_type": {} 14 | }, 15 | "schema_mapper": { 16 | "type": "self" 17 | } 18 | } 19 | } -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/demo/ContingencyTable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.demo; 2 | 3 | import de.hpi.is.md.util.MathUtils; 4 | import lombok.AccessLevel; 5 | import lombok.Data; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @Data 9 | @RequiredArgsConstructor(access = AccessLevel.PRIVATE) 10 | public final class ContingencyTable { 11 | 12 | private final double recall; 13 | private final double precision; 14 | private final double fmeasure; 15 | 16 | static ContingencyTable create(double tp, double fn, double fp) { 17 | double recall = tp / (tp + fn); 18 | double precision = tp / (tp + fp); 19 | double fmeasure = MathUtils.divide(2 * precision * recall, precision + recall); 20 | return new ContingencyTable(recall, precision, fmeasure); 21 | } 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/demo/RunnerConfiguration.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.demo; 2 | 3 | import de.hpi.is.md.config.MappingConfiguration; 4 | import de.hpi.is.md.hybrid.DiscoveryConfiguration; 5 | import lombok.Data; 6 | import lombok.NonNull; 7 | 8 | @Data 9 | class RunnerConfiguration { 10 | 11 | @NonNull 12 | private MappingConfiguration mapping = new MappingConfiguration(); 13 | @NonNull 14 | private DiscoveryConfiguration discovery = new DiscoveryConfiguration(); 15 | 16 | } 17 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/demo/input/IOProvider.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.demo.input; 2 | 3 | import java.util.Optional; 4 | 5 | public interface IOProvider { 6 | 7 | Optional readLine(); 8 | 9 | default Optional readLine(String s) { 10 | writeLine(s); 11 | return readLine(); 12 | } 13 | 14 | void writeLine(String s); 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/demo/input/IOReceiver.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.demo.input; 2 | 3 | public interface IOReceiver { 4 | 5 | String getContinueKeyword(); 6 | 7 | String getEndKeyword(); 8 | 9 | void runOnce(IOProvider provider); 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/jcommander/Application.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.jcommander; 2 | 3 | public interface Application { 4 | 5 | void run(); 6 | } 7 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/java/de/hpi/is/md/jcommander/JCommanderJdbcConfiguration.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.jcommander; 2 | 3 | import com.beust.jcommander.Parameter; 4 | import java.sql.Connection; 5 | import java.sql.DriverManager; 6 | import java.sql.SQLException; 7 | import lombok.Data; 8 | 9 | @Data 10 | public class JCommanderJdbcConfiguration { 11 | 12 | @Parameter(names = {"--password", 13 | "-pw"}, description = "connection password", password = true, required = true) 14 | private String password; 15 | @Parameter(names = {"--user", "-u"}, required = true) 16 | private String user; 17 | @Parameter(names = {"--connection", "-c"}, required = true) 18 | private String url; 19 | @Parameter(names = {"--driver", "-d"}, required = true) 20 | private String driverName; 21 | 22 | public Connection createConnection() throws SQLException, ClassNotFoundException { 23 | Class.forName(driverName); 24 | return DriverManager.getConnection(url, user, password); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /HyMD/demo/src/main/resources/simplelogger.properties: -------------------------------------------------------------------------------- 1 | # suppress inspection "UnusedProperty" for whole file 2 | org.slf4j.simpleLogger.defaultLogLevel=info 3 | org.slf4j.simpleLogger.showDateTime=true 4 | org.slf4j.simpleLogger.levelInBrackets=true -------------------------------------------------------------------------------- /HyMD/hybrid/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | matching-dependencies 7 | de.hpi.is 8 | 1.2-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | md-hybrid 13 | 14 | 15 | 16 | ${project.groupId} 17 | md-core 18 | ${project.version} 19 | 20 | 21 | ${project.groupId} 22 | md-util 23 | ${project.version} 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Analyzer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.impl.level.AnalyzeTask; 4 | import de.hpi.is.md.hybrid.impl.level.Statistics; 5 | 6 | public interface Analyzer { 7 | 8 | Statistics analyze(Iterable results); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/ColumnConfiguration.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.ColumnMapping; 4 | import de.hpi.is.md.ThresholdFilter; 5 | import de.hpi.is.md.util.Hashable; 6 | import de.hpi.is.md.util.Hasher; 7 | import lombok.Data; 8 | import lombok.NonNull; 9 | 10 | @Data 11 | public class ColumnConfiguration implements Hashable { 12 | 13 | @NonNull 14 | private final ThresholdFilter thresholdFilter; 15 | @NonNull 16 | private final PreprocessingColumnConfiguration preprocessingConfiguration; 17 | 18 | @Override 19 | public void hash(Hasher hasher) { 20 | ColumnMapping mapping = preprocessingConfiguration.getMapping(); 21 | hasher 22 | .put(mapping) 23 | .put(thresholdFilter); 24 | } 25 | 26 | @Override 27 | public String toString() { 28 | return preprocessingConfiguration.toString(); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/DictionaryRecords.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.util.Hashable; 4 | import it.unimi.dsi.fastutil.ints.IntSet; 5 | import java.io.Serializable; 6 | 7 | public interface DictionaryRecords extends Iterable, Serializable { 8 | 9 | int[] get(int id); 10 | 11 | IntSet getAll(); 12 | 13 | interface Builder extends Hashable { 14 | 15 | Builder add(int recordId, int[] record); 16 | 17 | DictionaryRecords build(); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/MDUtil.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.md.MDElement; 4 | import de.hpi.is.md.hybrid.md.MDSite; 5 | import lombok.AccessLevel; 6 | import lombok.NoArgsConstructor; 7 | 8 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 9 | public final class MDUtil { 10 | 11 | public static boolean isNonTrivial(MDSite lhs, MDElement rhs) { 12 | int rhsAttr = rhs.getId(); 13 | return lhs.getOrDefault(rhsAttr) < rhs.getThreshold(); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/PositionListIndex.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.PositionListIndex.Cluster; 4 | import it.unimi.dsi.fastutil.ints.IntSet; 5 | import java.io.Serializable; 6 | import lombok.Data; 7 | 8 | public interface PositionListIndex extends Serializable, Iterable { 9 | 10 | IntSet get(int valueId); 11 | 12 | interface Builder { 13 | 14 | void add(int recordId, int value); 15 | 16 | PositionListIndex build(); 17 | } 18 | 19 | @Data 20 | class Cluster { 21 | 22 | private final int value; 23 | private final IntSet records; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Preprocessed.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.ColumnMapping; 4 | import de.hpi.is.md.util.Dictionary; 5 | import java.io.Serializable; 6 | import java.util.List; 7 | import lombok.Builder; 8 | import lombok.Data; 9 | import lombok.NonNull; 10 | 11 | @Data 12 | @Builder 13 | public class Preprocessed implements Serializable { 14 | 15 | private static final long serialVersionUID = 7398108455873873674L; 16 | @NonNull 17 | private final List columnPairs; 18 | @NonNull 19 | private final DictionaryRecords leftRecords; 20 | @NonNull 21 | private final DictionaryRecords rightRecords; 22 | @NonNull 23 | private final List> mappings; 24 | @NonNull 25 | private final List> leftDictionaries; 26 | @NonNull 27 | private final List> rightDictionaries; 28 | } 29 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Preprocessor.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.util.CacheableSupplier; 4 | 5 | public interface Preprocessor extends CacheableSupplier { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Rhs.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import lombok.Builder; 4 | import lombok.Data; 5 | 6 | @Data 7 | @Builder 8 | public class Rhs { 9 | 10 | private final int rhsAttr; 11 | private final double threshold; 12 | private final double lowerBound; 13 | 14 | @Override 15 | public String toString() { 16 | return rhsAttr + "@(" + lowerBound + "," + threshold + "]"; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Sampler.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.util.IntArrayPair; 4 | import java.util.Collection; 5 | import java.util.Optional; 6 | import java.util.Set; 7 | 8 | public interface Sampler { 9 | 10 | Set processRecommendations(Collection recommendations); 11 | 12 | Optional> sample(); 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/SimilaritySet.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.md.MDElement; 4 | import java.util.Arrays; 5 | import lombok.EqualsAndHashCode; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | @EqualsAndHashCode 11 | public class SimilaritySet { 12 | 13 | @NonNull 14 | private final double[] similaritySet; 15 | 16 | public double get(int attr) { 17 | return similaritySet[attr]; 18 | } 19 | 20 | public boolean isViolated(MDElement element) { 21 | int attr = element.getId(); 22 | return element.getThreshold() > similaritySet[attr]; 23 | } 24 | 25 | public int size() { 26 | return similaritySet.length; 27 | } 28 | 29 | @Override 30 | public String toString() { 31 | return Arrays.toString(similaritySet); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/SupportedMD.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.md.MD; 4 | import lombok.Data; 5 | import lombok.NonNull; 6 | 7 | @Data 8 | public class SupportedMD { 9 | 10 | @NonNull 11 | private final MD md; 12 | private final long support; 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/Validator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import de.hpi.is.md.hybrid.md.MDSite; 4 | import java.util.Collection; 5 | 6 | public interface Validator { 7 | 8 | ValidationResult validate(MDSite lhs, Collection rhs); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/ColumnPairWithThreshold.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl; 2 | 3 | import de.hpi.is.md.hybrid.PreprocessedColumnPair; 4 | import it.unimi.dsi.fastutil.ints.IntCollection; 5 | import lombok.NonNull; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @RequiredArgsConstructor 9 | public class ColumnPairWithThreshold { 10 | 11 | @NonNull 12 | private final PreprocessedColumnPair columnPair; 13 | private final double threshold; 14 | 15 | IntCollection getMatching(int value) { 16 | return columnPair.getAllSimilarRightRecords(value, threshold); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/Selector.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl; 2 | 3 | import lombok.EqualsAndHashCode; 4 | import lombok.RequiredArgsConstructor; 5 | 6 | @RequiredArgsConstructor 7 | @EqualsAndHashCode 8 | public class Selector { 9 | 10 | private final int[] values; 11 | 12 | int get(int i) { 13 | return values[i]; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/infer/LhsModifier.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.infer; 2 | 3 | import de.hpi.is.md.hybrid.md.MDSite; 4 | import lombok.NonNull; 5 | import lombok.RequiredArgsConstructor; 6 | 7 | @RequiredArgsConstructor 8 | class LhsModifier { 9 | 10 | @NonNull 11 | private final MDSite lhs; 12 | 13 | MDSite newLhs(int attr, double threshold) { 14 | MDSite clone = lhs.clone(); 15 | return clone.set(attr, threshold); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/infer/LhsRhsDisjointnessFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.infer; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 5 | import de.hpi.is.md.hybrid.md.MDElement; 6 | import de.hpi.is.md.hybrid.md.MDSite; 7 | import de.hpi.is.md.util.jackson.SingletonDeserializer; 8 | 9 | @JsonDeserialize(using = SingletonDeserializer.class) 10 | @CPSType(id = "disjoint", base = SpecializationFilter.class) 11 | public enum LhsRhsDisjointnessFilter implements SpecializationFilter { 12 | 13 | INSTANCE; 14 | 15 | @Override 16 | public boolean filter(MDSite lhs, MDElement rhs) { 17 | int rhsAttr = rhs.getId(); 18 | return !lhs.get(rhsAttr).isPresent(); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/infer/NonTrivialFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.infer; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 5 | import de.hpi.is.md.hybrid.MDUtil; 6 | import de.hpi.is.md.hybrid.md.MDElement; 7 | import de.hpi.is.md.hybrid.md.MDSite; 8 | import de.hpi.is.md.util.jackson.SingletonDeserializer; 9 | 10 | @JsonDeserialize(using = SingletonDeserializer.class) 11 | @CPSType(id = "non-trivial", base = SpecializationFilter.class) 12 | public enum NonTrivialFilter implements SpecializationFilter { 13 | 14 | INSTANCE; 15 | 16 | @Override 17 | public boolean filter(MDSite lhs, MDElement rhs) { 18 | return MDUtil.isNonTrivial(lhs, rhs); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/infer/SpecializationFilter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.infer; 2 | 3 | import com.bakdata.util.jackson.CPSBase; 4 | import com.bakdata.util.jackson.CPSTypeIdResolver; 5 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; 7 | import de.hpi.is.md.hybrid.md.MDElement; 8 | import de.hpi.is.md.hybrid.md.MDSite; 9 | import de.hpi.is.md.util.Hashable; 10 | 11 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "type") 12 | @JsonTypeIdResolver(CPSTypeIdResolver.class) 13 | @CPSBase 14 | public interface SpecializationFilter extends Hashable { 15 | 16 | boolean filter(MDSite lhs, MDElement rhs); 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/candidate/LhsContext.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.candidate; 2 | 3 | import de.hpi.is.md.hybrid.md.MDElement; 4 | import de.hpi.is.md.hybrid.md.MDSite; 5 | import java.util.Optional; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | abstract class LhsContext { 11 | 12 | @NonNull 13 | final MDSite lhs; 14 | 15 | Optional getNext(int currentLhsAttr) { 16 | return lhs.nextElement(currentLhsAttr); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/candidate/ValueHolder.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.candidate; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | @Data 7 | @AllArgsConstructor 8 | class ValueHolder { 9 | 10 | private T value; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/lhs/LhsContext.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.lhs; 2 | 3 | import de.hpi.is.md.hybrid.md.MDElement; 4 | import de.hpi.is.md.hybrid.md.MDSite; 5 | import java.util.Optional; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | abstract class LhsContext { 11 | 12 | @NonNull 13 | final MDSite lhs; 14 | 15 | Optional getNext(int currentLhsAttr) { 16 | return lhs.nextElement(currentLhsAttr); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/lhs/LhsLattice.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.lhs; 2 | 3 | import com.codahale.metrics.annotation.Timed; 4 | import de.hpi.is.md.hybrid.md.MDSite; 5 | 6 | //@Metrics 7 | public class LhsLattice { 8 | 9 | private final LhsNode root; 10 | private int depth = 0; 11 | 12 | public LhsLattice(int columnPairs) { 13 | this.root = new LhsNode(columnPairs); 14 | } 15 | 16 | public void addIfMinimal(MDSite lhs) { 17 | if (!containsMdOrGeneralization(lhs)) { 18 | add(lhs); 19 | } 20 | } 21 | 22 | @Timed 23 | public boolean containsMdOrGeneralization(MDSite lhs) { 24 | return root.containsMdOrGeneralization(lhs, 0); 25 | } 26 | 27 | @Timed 28 | private void add(MDSite lhs) { 29 | depth = Math.max(lhs.cardinality(), depth); 30 | root.add(lhs, 0); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/lhs/ValueHolder.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.lhs; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | 6 | @Data 7 | @AllArgsConstructor 8 | class ValueHolder { 9 | 10 | private T value; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/md/LevelFunction.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.md; 2 | 3 | import de.hpi.is.md.hybrid.md.MDSite; 4 | import de.hpi.is.md.util.StreamUtils; 5 | import it.unimi.dsi.fastutil.doubles.DoubleSortedSet; 6 | import java.util.List; 7 | 8 | public interface LevelFunction { 9 | 10 | default int getDistance(MDSite lhs) { 11 | return StreamUtils.seq(lhs) 12 | .mapToInt(lhsElem -> getSingleDistance(lhsElem.getId(), lhsElem.getThreshold())) 13 | .sum(); 14 | } 15 | 16 | int getSingleDistance(int lhsAttr, double threshold); 17 | 18 | int size(); 19 | 20 | interface Factory { 21 | 22 | LevelFunction create(List thresholds); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/md/LhsRhsPair.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.md; 2 | 3 | import de.hpi.is.md.hybrid.md.MDSite; 4 | import lombok.Data; 5 | import lombok.NonNull; 6 | 7 | @Data 8 | class LhsRhsPair { 9 | 10 | @NonNull 11 | private final MDSite lhs; 12 | @NonNull 13 | private final MDSite rhs; 14 | } 15 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/lattice/md/MDContext.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.md; 2 | 3 | import de.hpi.is.md.hybrid.md.MD; 4 | import de.hpi.is.md.hybrid.md.MDElement; 5 | import de.hpi.is.md.hybrid.md.MDSite; 6 | import java.util.Optional; 7 | import lombok.NonNull; 8 | import lombok.RequiredArgsConstructor; 9 | 10 | @RequiredArgsConstructor 11 | abstract class MDContext { 12 | 13 | @NonNull 14 | final MD md; 15 | 16 | Optional getNext(int currentLhsAttr) { 17 | MDSite lhs = md.getLhs(); 18 | return lhs.nextElement(currentLhsAttr); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/LevelStrategy.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level; 2 | 3 | import de.hpi.is.md.hybrid.impl.lattice.FullLattice; 4 | import java.util.Collection; 5 | 6 | public interface LevelStrategy { 7 | 8 | boolean areLevelsLeft(); 9 | 10 | Collection getCurrentLevel(); 11 | 12 | interface Factory { 13 | 14 | LevelStrategy create(FullLattice fullLattice, double minThreshold); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/ValidationTask.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level; 2 | 3 | 4 | import de.hpi.is.md.hybrid.Rhs; 5 | import de.hpi.is.md.hybrid.ValidationResult; 6 | import de.hpi.is.md.hybrid.Validator; 7 | import de.hpi.is.md.hybrid.impl.infer.ThresholdLowerer; 8 | import de.hpi.is.md.hybrid.md.MDSite; 9 | import java.util.Collection; 10 | import lombok.Builder; 11 | import lombok.NonNull; 12 | 13 | @Builder 14 | class ValidationTask { 15 | 16 | @NonNull 17 | private final Validator validator; 18 | @NonNull 19 | private final MDSite lhs; 20 | @NonNull 21 | private final ThresholdLowerer lowerer; 22 | @NonNull 23 | private final Collection rhs; 24 | 25 | AnalyzeTask validate() { 26 | ValidationResult results = validator.validate(lhs, rhs); 27 | return new AnalyzeTask(results, lowerer); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/analyze/AnalyzeStrategy.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level.analyze; 2 | 3 | import de.hpi.is.md.hybrid.ValidationResult.LhsResult; 4 | import de.hpi.is.md.hybrid.ValidationResult.RhsResult; 5 | import de.hpi.is.md.hybrid.impl.level.Statistics; 6 | 7 | public interface AnalyzeStrategy { 8 | 9 | void deduce(RhsResult rhsResult); 10 | 11 | Statistics getStatistics(); 12 | 13 | interface Factory { 14 | 15 | AnalyzeStrategy create(LhsResult lhsResult); 16 | 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/analyze/MDSpecializer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level.analyze; 2 | 3 | import de.hpi.is.md.hybrid.impl.infer.FullSpecializer; 4 | import de.hpi.is.md.hybrid.md.MD; 5 | import de.hpi.is.md.hybrid.md.MDElement; 6 | import de.hpi.is.md.hybrid.md.MDSite; 7 | import java.util.Collection; 8 | import lombok.RequiredArgsConstructor; 9 | 10 | @RequiredArgsConstructor 11 | public class MDSpecializer { 12 | 13 | private final FullSpecializer specializer; 14 | 15 | Collection specialize(MD md) { 16 | MDSite lhs = md.getLhs(); 17 | MDElement rhs = md.getRhs(); 18 | return specializer.specialize(lhs, rhs, lhs::getOrDefault); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/analyze/SupportBasedFactory.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level.analyze; 2 | 3 | import de.hpi.is.md.hybrid.ValidationResult.LhsResult; 4 | import de.hpi.is.md.hybrid.impl.level.analyze.AnalyzeStrategy.Factory; 5 | import lombok.Builder; 6 | import lombok.NonNull; 7 | 8 | @Builder 9 | public class SupportBasedFactory implements Factory { 10 | 11 | private final long minSupport; 12 | @NonNull 13 | private final Factory supportedFactory; 14 | @NonNull 15 | private final Factory notSupportedFactory; 16 | 17 | @Override 18 | public AnalyzeStrategy create(LhsResult lhsResult) { 19 | if (isSupported(lhsResult)) { 20 | return supportedFactory.create(lhsResult); 21 | } 22 | return notSupportedFactory.create(lhsResult); 23 | } 24 | 25 | private boolean isSupported(LhsResult lhsResult) { 26 | long support = lhsResult.getSupport(); 27 | return support >= minSupport; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/minimizing/IntermediateCandidate.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level.minimizing; 2 | 3 | import de.hpi.is.md.hybrid.Lattice.LatticeMD; 4 | import de.hpi.is.md.hybrid.md.MDElement; 5 | import java.util.Collection; 6 | import java.util.function.BiConsumer; 7 | import lombok.Data; 8 | import lombok.NonNull; 9 | 10 | @Data 11 | class IntermediateCandidate { 12 | 13 | @NonNull 14 | private final LatticeMD latticeMd; 15 | @NonNull 16 | private final Collection rhs; 17 | 18 | public void forEach(BiConsumer action) { 19 | rhs.forEach(element -> action.accept(latticeMd, element)); 20 | } 21 | 22 | boolean isNotEmpty() { 23 | return !rhs.isEmpty(); 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/level/minimizing/Minimizer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.level.minimizing; 2 | 3 | import de.hpi.is.md.hybrid.impl.level.Candidate; 4 | import java.util.Collection; 5 | 6 | interface Minimizer { 7 | 8 | Collection toCandidates(Iterable preCandidates); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/md/MDElementImpl.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.md; 2 | 3 | import de.hpi.is.md.hybrid.md.MDElement; 4 | import lombok.Data; 5 | 6 | @Data 7 | public class MDElementImpl implements MDElement { 8 | 9 | private final int id; 10 | private final double threshold; 11 | 12 | @Override 13 | public String toString() { 14 | return id + "@" + threshold; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/md/MDImpl.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.md; 2 | 3 | import de.hpi.is.md.hybrid.md.MD; 4 | import de.hpi.is.md.hybrid.md.MDElement; 5 | import de.hpi.is.md.hybrid.md.MDSite; 6 | import lombok.Data; 7 | import lombok.NonNull; 8 | 9 | @Data 10 | public class MDImpl implements MD { 11 | 12 | @NonNull 13 | private final MDSite lhs; 14 | @NonNull 15 | private final MDElement rhs; 16 | 17 | @Override 18 | public String toString() { 19 | return lhs + "->" + rhs; 20 | } 21 | 22 | 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/preprocessed/CompressedColumn.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.preprocessed; 2 | 3 | import de.hpi.is.md.hybrid.PositionListIndex; 4 | import de.hpi.is.md.util.Dictionary; 5 | import lombok.Getter; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | @Getter 11 | class CompressedColumn { 12 | 13 | @NonNull 14 | private final Dictionary dictionary; 15 | @NonNull 16 | private final PositionListIndex pli; 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sampling/MDSpecializer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sampling; 2 | 3 | import de.hpi.is.md.hybrid.SimilaritySet; 4 | import de.hpi.is.md.hybrid.impl.infer.FullSpecializer; 5 | import de.hpi.is.md.hybrid.md.MD; 6 | import de.hpi.is.md.hybrid.md.MDElement; 7 | import de.hpi.is.md.hybrid.md.MDSite; 8 | import java.util.Collection; 9 | import lombok.RequiredArgsConstructor; 10 | 11 | @RequiredArgsConstructor 12 | class MDSpecializer { 13 | 14 | private final FullSpecializer specializer; 15 | 16 | Collection specialize(MDSite lhs, MDElement rhs, SimilaritySet similaritySet) { 17 | return specializer.specialize(lhs, rhs, similaritySet::get); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sampling/Statistics.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sampling; 2 | 3 | import lombok.Getter; 4 | import lombok.ToString; 5 | 6 | @Getter 7 | @ToString 8 | public class Statistics { 9 | 10 | private int processed = 0; 11 | private int count = 0; 12 | private int recommendations = 0; 13 | private int newDeduced = 0; 14 | 15 | void add(Statistics statistics) { 16 | this.count += statistics.count; 17 | this.processed += statistics.processed; 18 | this.newDeduced += statistics.newDeduced; 19 | this.recommendations += statistics.recommendations; 20 | } 21 | 22 | void count() { 23 | count++; 24 | } 25 | 26 | void newDeduced() { 27 | newDeduced++; 28 | } 29 | 30 | void processed() { 31 | processed++; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/PreprocessedSimilarity.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim; 2 | 3 | import it.unimi.dsi.fastutil.ints.IntSet; 4 | import java.util.Collection; 5 | import lombok.Builder; 6 | import lombok.Data; 7 | import lombok.NonNull; 8 | 9 | @Data 10 | @Builder 11 | public class PreprocessedSimilarity { 12 | 13 | private final int left; 14 | private final Collection similarities; 15 | private final double minSimilarity; 16 | 17 | boolean isNotEmpty() { 18 | return !similarities.isEmpty(); 19 | } 20 | 21 | @Builder 22 | @Data 23 | public static class To { 24 | 25 | private final int right; 26 | private final double similarity; 27 | @NonNull 28 | private final IntSet records; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/SimilarityArrayTableFactory.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim; 2 | 3 | import de.hpi.is.md.hybrid.impl.sim.SimilarityTableBuilderImpl.SimilarityTableFactory; 4 | import de.hpi.is.md.util.Int2Int2DoubleArrayTable; 5 | import de.hpi.is.md.util.Int2Int2DoubleTable; 6 | 7 | public class SimilarityArrayTableFactory implements SimilarityTableFactory { 8 | 9 | @Override 10 | public Int2Int2DoubleTable create(int height) { 11 | return Int2Int2DoubleArrayTable.create(height); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/SimilarityHashTableFactory.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim; 2 | 3 | import de.hpi.is.md.hybrid.impl.sim.SimilarityTableBuilderImpl.SimilarityTableFactory; 4 | import de.hpi.is.md.util.Int2Int2DoubleHashTable; 5 | import de.hpi.is.md.util.Int2Int2DoubleTable; 6 | 7 | public class SimilarityHashTableFactory implements SimilarityTableFactory { 8 | 9 | @Override 10 | public Int2Int2DoubleTable create(int height) { 11 | return Int2Int2DoubleHashTable.create(height); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/SimilarityReceiver.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim; 2 | 3 | import de.hpi.is.md.hybrid.SimilarityIndex; 4 | 5 | public interface SimilarityReceiver { 6 | 7 | void addSimilarity(PreprocessedSimilarity similarity); 8 | 9 | SimilarityIndex build(double minSimilarity); 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/SimilarityRowBuilder.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim; 2 | 3 | import de.hpi.is.md.hybrid.impl.sim.PreprocessedSimilarity.To; 4 | import de.hpi.is.md.util.Int2Int2DoubleTable.Int2DoubleRow; 5 | import java.util.Collection; 6 | 7 | public interface SimilarityRowBuilder { 8 | 9 | Int2DoubleRow create(Collection similarities); 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/threshold/CollectingSimilarityReceiver.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | import de.hpi.is.md.hybrid.SimilarityIndex; 4 | import de.hpi.is.md.hybrid.impl.sim.SimilarityTableBuilder; 5 | import de.hpi.is.md.util.Int2Int2DoubleTable; 6 | 7 | class CollectingSimilarityReceiver extends ThresholdSimilarityReceiver { 8 | 9 | CollectingSimilarityReceiver(SimilarityTableBuilder similarityTableBuilder) { 10 | super(similarityTableBuilder); 11 | } 12 | 13 | @Override 14 | protected SimilarityIndex build(ThresholdMapBuilder thresholdMapBuilder, 15 | Int2Int2DoubleTable similarityTable, double minSimilarity) { 16 | ThresholdMap thresholdMap = thresholdMapBuilder.build(); 17 | return ThresholdSimilarityIndex.builder() 18 | .thresholdMap(thresholdMap) 19 | .similarityTable(similarityTable) 20 | .minSimilarity(minSimilarity) 21 | .build(); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/threshold/FlatThresholdMap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | import de.hpi.is.md.util.Int2Double2ObjectSortedTable; 4 | import it.unimi.dsi.fastutil.ints.IntCollection; 5 | import it.unimi.dsi.fastutil.ints.IntLists; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | public class FlatThresholdMap implements ThresholdMap { 11 | 12 | private static final long serialVersionUID = -2517678664969781344L; 13 | @NonNull 14 | private final Int2Double2ObjectSortedTable table; 15 | 16 | @Override 17 | public IntCollection greaterOrEqual(int valueId, double max) { 18 | return table.getCeilingValue(valueId, max) 19 | .orElse(IntLists.EMPTY_LIST); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/threshold/ThresholdMap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | import it.unimi.dsi.fastutil.ints.IntCollection; 4 | import java.io.Serializable; 5 | 6 | public interface ThresholdMap extends Serializable { 7 | 8 | IntCollection greaterOrEqual(int valueId, double max); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/sim/threshold/ThresholdMapFlattener.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | import com.bakdata.util.jackson.CPSBase; 4 | import com.bakdata.util.jackson.CPSTypeIdResolver; 5 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; 7 | import de.hpi.is.md.util.Hashable; 8 | import it.unimi.dsi.fastutil.doubles.Double2ObjectMap; 9 | import it.unimi.dsi.fastutil.ints.IntSet; 10 | 11 | public interface ThresholdMapFlattener { 12 | 13 | ThresholdMap build(); 14 | 15 | void flatten(int valueId, Double2ObjectMap sortedMap); 16 | 17 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "type") 18 | @JsonTypeIdResolver(CPSTypeIdResolver.class) 19 | @CPSBase 20 | interface Factory extends Hashable { 21 | 22 | ThresholdMapFlattener create(int leftSize); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/validation/Classifier.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import lombok.RequiredArgsConstructor; 4 | 5 | @RequiredArgsConstructor 6 | public class Classifier { 7 | 8 | private final double minThreshold; 9 | private final double lowerBound; 10 | 11 | public boolean isValidAndMinimal(double similarity) { 12 | return isValid(similarity) && isMinimal(similarity); 13 | } 14 | 15 | private boolean isMinimal(double similarity) { 16 | return similarity > lowerBound; 17 | } 18 | 19 | private boolean isValid(double similarity) { 20 | return similarity >= minThreshold; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/validation/RhsValidationTask.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import de.hpi.is.md.hybrid.PreprocessedColumnPair; 4 | import de.hpi.is.md.hybrid.Rhs; 5 | import de.hpi.is.md.hybrid.ValidationResult.RhsResult; 6 | import it.unimi.dsi.fastutil.ints.IntIterable; 7 | 8 | public interface RhsValidationTask { 9 | 10 | RhsResult createResult(); 11 | 12 | boolean shouldUpdate(); 13 | 14 | void validate(Iterable left, IntIterable right); 15 | 16 | void validate(int[] record, IntIterable right); 17 | 18 | interface Factory { 19 | 20 | RhsValidationTask create(Rhs rhs, PreprocessedColumnPair columnPair, double lhsSimilarity); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/impl/validation/ValidationTask.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import de.hpi.is.md.hybrid.ValidationResult; 4 | 5 | public interface ValidationTask { 6 | 7 | ValidationResult validate(); 8 | } 9 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/md/MD.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.md; 2 | 3 | public interface MD { 4 | 5 | MDSite getLhs(); 6 | 7 | MDElement getRhs(); 8 | 9 | default boolean isInLhs(int attr) { 10 | return getLhs().isSet(attr); 11 | } 12 | 13 | default boolean isRhs(int attr) { 14 | return getRhs().getId() == attr; 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/md/MDElement.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.md; 2 | 3 | public interface MDElement { 4 | 5 | int getId(); 6 | 7 | double getThreshold(); 8 | } 9 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/hybrid/md/MDSiteIterator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.md; 2 | 3 | import java.util.Iterator; 4 | import java.util.NoSuchElementException; 5 | import java.util.Optional; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | class MDSiteIterator implements Iterator { 11 | 12 | @NonNull 13 | private final MDSite site; 14 | private int currentAttr = 0; 15 | 16 | @Override 17 | public boolean hasNext() { 18 | return nextElement().isPresent(); 19 | } 20 | 21 | @Override 22 | public MDElement next() { 23 | return nextElement() 24 | .map(this::shift) 25 | .orElseThrow(NoSuchElementException::new); 26 | } 27 | 28 | private Optional nextElement() { 29 | return site.nextElement(currentAttr); 30 | } 31 | 32 | private MDElement shift(MDElement element) { 33 | currentAttr = element.getId() + 1; 34 | return element; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/util/enforce/ActualEnforcer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import java.util.Collection; 4 | 5 | interface ActualEnforcer { 6 | 7 | Collection enforce(); 8 | } 9 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/util/enforce/CompressedEnforceMatch.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import lombok.Data; 4 | 5 | @Data 6 | public class CompressedEnforceMatch { 7 | 8 | private final Iterable left; 9 | private final Iterable right; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/util/enforce/EmptyActualEnforcer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import de.hpi.is.md.hybrid.DictionaryRecords; 4 | import de.hpi.is.md.util.StreamUtils; 5 | import java.util.Collection; 6 | import java.util.Collections; 7 | import lombok.Builder; 8 | import lombok.NonNull; 9 | 10 | @Builder 11 | class EmptyActualEnforcer implements ActualEnforcer { 12 | 13 | @NonNull 14 | private final DictionaryRecords leftRecords; 15 | @NonNull 16 | private final DictionaryRecords rightRecords; 17 | 18 | @Override 19 | public Collection enforce() { 20 | Iterable left = StreamUtils.seq(leftRecords).toList(); 21 | Iterable right = StreamUtils.seq(rightRecords).toList(); 22 | CompressedEnforceMatch match = new CompressedEnforceMatch(left, right); 23 | return Collections.singletonList(match); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/util/enforce/RecordInflater.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap; 4 | import java.util.List; 5 | import lombok.RequiredArgsConstructor; 6 | 7 | @RequiredArgsConstructor 8 | public class RecordInflater { 9 | 10 | private final List> dictionaries; 11 | 12 | Object[] inflate(int[] record) { 13 | Object[] inflated = new Object[record.length]; 14 | for (int i = 0; i < record.length; i++) { 15 | int id = record[i]; 16 | Int2ObjectMap dictionary = dictionaries.get(i); 17 | inflated[i] = dictionary.get(id); 18 | } 19 | return inflated; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/main/java/de/hpi/is/md/util/enforce/RecordSelector.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.enforce; 2 | 3 | import de.hpi.is.md.hybrid.DictionaryRecords; 4 | import it.unimi.dsi.fastutil.ints.IntCollection; 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | import java.util.PrimitiveIterator.OfInt; 8 | import lombok.NonNull; 9 | import lombok.RequiredArgsConstructor; 10 | 11 | @RequiredArgsConstructor 12 | class RecordSelector { 13 | 14 | @NonNull 15 | private final DictionaryRecords records; 16 | 17 | Iterable getRecords(IntCollection ids) { 18 | int size = ids.size(); 19 | Collection result = new ArrayList<>(size); 20 | OfInt it = ids.iterator(); 21 | while (it.hasNext()) { 22 | int id = it.nextInt(); 23 | int[] record = records.get(id); 24 | result.add(record); 25 | } 26 | return result; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/SimilaritySetTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import de.hpi.is.md.hybrid.impl.md.MDElementImpl; 6 | import org.junit.Test; 7 | 8 | public class SimilaritySetTest { 9 | 10 | @Test 11 | public void testGet() { 12 | SimilaritySet similaritySet = new SimilaritySet(new double[]{0.2, 0.3}); 13 | assertThat(similaritySet.get(0)).isEqualTo(0.2); 14 | assertThat(similaritySet.get(1)).isEqualTo(0.3); 15 | } 16 | 17 | @Test 18 | public void testIsViolated() { 19 | SimilaritySet similaritySet = new SimilaritySet(new double[]{0.2, 0.3}); 20 | assertThat(similaritySet.isViolated(new MDElementImpl(0, 0.1))).isFalse(); 21 | assertThat(similaritySet.isViolated(new MDElementImpl(0, 0.2))).isFalse(); 22 | assertThat(similaritySet.isViolated(new MDElementImpl(0, 0.3))).isTrue(); 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/lattice/md/LatticeImplTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.lattice.md; 2 | 3 | import de.hpi.is.md.hybrid.Lattice; 4 | import de.hpi.is.md.hybrid.LatticeTest; 5 | 6 | public class LatticeImplTest extends LatticeTest { 7 | 8 | @Override 9 | protected Lattice createLattice(int columnPairs) { 10 | return new LatticeImpl(new Cardinality(columnPairs)); 11 | } 12 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/md/MDImplTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.md; 2 | 3 | import de.hpi.is.md.hybrid.MDTest; 4 | import de.hpi.is.md.hybrid.md.MD; 5 | import de.hpi.is.md.hybrid.md.MDElement; 6 | import de.hpi.is.md.hybrid.md.MDSite; 7 | 8 | public class MDImplTest extends MDTest { 9 | 10 | @Override 11 | protected MD createMD(MDSite lhs, MDElement rhs) { 12 | return new MDImpl(lhs, rhs); 13 | } 14 | 15 | @Override 16 | protected MDElement createMDElement(int attr, double threshold) { 17 | return new MDElementImpl(attr, threshold); 18 | } 19 | 20 | @Override 21 | protected MDSite createMDSite(int columnPairs) { 22 | return new MDSiteImpl(columnPairs); 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/preprocessed/ArrayDictionaryRecordsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.preprocessed; 2 | 3 | import de.hpi.is.md.hybrid.ArrayDictionaryRecords; 4 | import de.hpi.is.md.hybrid.DictionaryRecords; 5 | import de.hpi.is.md.hybrid.DictionaryRecords.Builder; 6 | import de.hpi.is.md.hybrid.DictionaryRecordsTest; 7 | import java.util.Collection; 8 | 9 | public class ArrayDictionaryRecordsTest extends DictionaryRecordsTest { 10 | 11 | @Override 12 | protected DictionaryRecords createRecords(Collection records) { 13 | int id = 0; 14 | Builder builder = ArrayDictionaryRecords.builder(); 15 | for (int[] record : records) { 16 | builder.add(id++, record); 17 | } 18 | return builder.build(); 19 | } 20 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/preprocessed/ArrayPositionListIndexTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.preprocessed; 2 | 3 | import de.hpi.is.md.hybrid.PositionListIndex; 4 | import de.hpi.is.md.hybrid.PositionListIndexTest; 5 | import java.util.Map; 6 | 7 | public class ArrayPositionListIndexTest extends PositionListIndexTest { 8 | 9 | @Override 10 | protected PositionListIndex createPli(Map values) { 11 | PositionListIndex.Builder builder = ArrayPositionListIndex.builder(); 12 | values.forEach(builder::add); 13 | return builder.build(); 14 | } 15 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/preprocessed/MapDictionaryRecordsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.preprocessed; 2 | 3 | import de.hpi.is.md.hybrid.DictionaryRecords; 4 | import de.hpi.is.md.hybrid.DictionaryRecords.Builder; 5 | import de.hpi.is.md.hybrid.DictionaryRecordsTest; 6 | import java.util.Collection; 7 | 8 | public class MapDictionaryRecordsTest extends DictionaryRecordsTest { 9 | 10 | @Override 11 | protected DictionaryRecords createRecords(Collection records) { 12 | int id = 0; 13 | Builder builder = MapDictionaryRecords.builder(); 14 | for (int[] record : records) { 15 | builder.add(id++, record); 16 | } 17 | return builder.build(); 18 | } 19 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/preprocessed/MapPositionListIndexTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.preprocessed; 2 | 3 | import de.hpi.is.md.hybrid.PositionListIndex; 4 | import de.hpi.is.md.hybrid.PositionListIndexTest; 5 | import java.util.Map; 6 | 7 | public class MapPositionListIndexTest extends PositionListIndexTest { 8 | 9 | @Override 10 | protected PositionListIndex createPli(Map values) { 11 | PositionListIndex.Builder builder = MapPositionListIndex.builder(); 12 | values.forEach(builder::add); 13 | return builder.build(); 14 | } 15 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/sim/slim/SlimSimilarityIndexTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.slim; 2 | 3 | import de.hpi.is.md.hybrid.SimilarityIndex.SimilarityIndexBuilder; 4 | import de.hpi.is.md.hybrid.impl.sim.SimilarityIndexTest; 5 | 6 | public class SlimSimilarityIndexTest extends SimilarityIndexTest { 7 | 8 | @Override 9 | protected SimilarityIndexBuilder createBuilder() { 10 | return SlimSimilarityIndexBuilder.builder().build(); 11 | } 12 | 13 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/sim/threshold/ThresholdMapArrayFlattenerTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | public class ThresholdMapArrayFlattenerTest extends ThresholdMapFlattenerTest { 4 | 5 | @Override 6 | protected ThresholdMapFlattener createFlattener(int size) { 7 | return ThresholdMapArrayFlattener.factory().create(size); 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/sim/threshold/ThresholdMapHashFlattenerTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.sim.threshold; 2 | 3 | public class ThresholdMapHashFlattenerTest extends ThresholdMapFlattenerTest { 4 | 5 | @Override 6 | protected ThresholdMapFlattener createFlattener(int size) { 7 | return ThresholdMapHashFlattener.factory().create(size); 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/validation/GroupingRhsValidationTaskTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import de.hpi.is.md.hybrid.PreprocessedColumnPair; 4 | import de.hpi.is.md.hybrid.Rhs; 5 | import org.junit.runner.RunWith; 6 | import org.mockito.junit.MockitoJUnitRunner; 7 | 8 | @RunWith(MockitoJUnitRunner.StrictStubs.class) 9 | public class GroupingRhsValidationTaskTest extends RhsValidationTaskTest { 10 | 11 | @Override 12 | protected RhsValidationTask create(Rhs rhs, PreprocessedColumnPair columnPair) { 13 | RhsValidationTask.Factory factory = GroupingRhsValidationTask.factoryBuilder() 14 | .minThreshold(minThreshold) 15 | .rightRecords(rightRecords) 16 | .shouldUpdate(shouldUpdate) 17 | .build(); 18 | return factory.create(rhs, columnPair, 0.0); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/validation/RhsValidationTaskImplTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import de.hpi.is.md.hybrid.PreprocessedColumnPair; 4 | import de.hpi.is.md.hybrid.Rhs; 5 | import org.junit.runner.RunWith; 6 | import org.mockito.junit.MockitoJUnitRunner; 7 | 8 | @RunWith(MockitoJUnitRunner.StrictStubs.class) 9 | public class RhsValidationTaskImplTest extends RhsValidationTaskTest { 10 | 11 | @Override 12 | protected RhsValidationTask create(Rhs rhs, PreprocessedColumnPair columnPair) { 13 | RhsValidationTask.Factory factory = RhsValidationTaskImpl.factoryBuilder() 14 | .minThreshold(minThreshold) 15 | .rightRecords(rightRecords) 16 | .shouldUpdate(shouldUpdate) 17 | .build(); 18 | return factory.create(rhs, columnPair, 0.0); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /HyMD/hybrid/src/test/java/de/hpi/is/md/hybrid/impl/validation/ValidatorImplTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.hybrid.impl.validation; 2 | 3 | import static de.hpi.is.md.util.CollectionUtils.sizeBelow; 4 | 5 | import de.hpi.is.md.hybrid.DictionaryRecords; 6 | import de.hpi.is.md.hybrid.PreprocessedColumnPair; 7 | import de.hpi.is.md.hybrid.Validator; 8 | import de.hpi.is.md.hybrid.ValidatorTest; 9 | import java.util.List; 10 | 11 | public class ValidatorImplTest extends ValidatorTest { 12 | 13 | @Override 14 | protected Validator createValidator(List columnPairs, 15 | DictionaryRecords left, DictionaryRecords right, double minThreshold) { 16 | return ValidatorImpl.builder() 17 | .columnPairs(columnPairs) 18 | .leftRecords(left) 19 | .rightRecords(right) 20 | .minThreshold(minThreshold) 21 | .shouldUpdate(sizeBelow(5)) 22 | .minSupport(0) 23 | .build(); 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /HyMD/mapping/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | matching-dependencies 7 | de.hpi.is 8 | 1.2-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | md-mapping 13 | 14 | 15 | 16 | ${project.groupId} 17 | md-db 18 | ${project.version} 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /HyMD/mapping/src/main/java/de/hpi/is/md/mapping/SchemaMapperHelper.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.mapping; 2 | 3 | import de.hpi.is.md.relational.Column; 4 | import de.hpi.is.md.relational.ColumnPair; 5 | import java.util.Optional; 6 | import lombok.AccessLevel; 7 | import lombok.NoArgsConstructor; 8 | 9 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 10 | public final class SchemaMapperHelper { 11 | 12 | @SuppressWarnings("unchecked") 13 | public static Optional> toPair(Column left, Column right) { 14 | if (ofSameType(left, right)) { 15 | Column rightT = (Column) right; 16 | ColumnPair pair = new ColumnPair<>(left, rightT); 17 | return Optional.of(pair); 18 | } 19 | return Optional.empty(); 20 | } 21 | 22 | private static boolean ofSameType(Column left, Column right) { 23 | Class rightType = right.getType(); 24 | Class leftType = left.getType(); 25 | return leftType.equals(rightType); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /HyMD/mapping/src/main/java/de/hpi/is/md/mapping/impl/ColumnMappingsConverter.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.mapping.impl; 2 | 3 | import com.fasterxml.jackson.databind.util.StdConverter; 4 | import com.google.common.collect.Multimap; 5 | import de.hpi.is.md.relational.Column; 6 | import de.hpi.is.md.util.jackson.Converters; 7 | import de.hpi.is.md.util.jackson.Entry; 8 | import java.util.List; 9 | 10 | class ColumnMappingsConverter extends 11 | StdConverter, Column>>, Multimap, Column>> { 12 | 13 | @Override 14 | public Multimap, Column> convert(List, Column>> value) { 15 | return Converters.toMultimap(value); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/metanome/src/main/resources/simplelogger.properties: -------------------------------------------------------------------------------- 1 | # suppress inspection "UnusedProperty" for whole file 2 | org.slf4j.simpleLogger.defaultLogLevel=info 3 | org.slf4j.simpleLogger.showDateTime=true 4 | org.slf4j.simpleLogger.levelInBrackets=true -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/DistanceMetric.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim; 2 | 3 | import com.bakdata.util.jackson.CPSBase; 4 | import com.bakdata.util.jackson.CPSTypeIdResolver; 5 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 6 | import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver; 7 | import de.hpi.is.md.util.Hashable; 8 | import java.io.Serializable; 9 | 10 | @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, include = JsonTypeInfo.As.PROPERTY, property = "type") 11 | @JsonTypeIdResolver(CPSTypeIdResolver.class) 12 | @CPSBase 13 | public interface DistanceMetric extends Hashable, Serializable { 14 | 15 | long computeDistance(T obj1, T obj2); 16 | 17 | } 18 | -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/Similarity.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim; 2 | 3 | import java.util.Collection; 4 | import lombok.Data; 5 | 6 | @Data 7 | public class Similarity { 8 | 9 | private final T left; 10 | private final Collection> similarities; 11 | 12 | @Data 13 | public static class To { 14 | 15 | private final T right; 16 | private final double similarity; 17 | 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/SimilarityClassifier.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim; 2 | 3 | public interface SimilarityClassifier { 4 | 5 | boolean areSimilar(T obj1, T obj2); 6 | 7 | SimilarityClassifier asClassifier(double threshold); 8 | 9 | double getThreshold(); 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/impl/DateSimilarity.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim.impl; 2 | 3 | import de.hpi.is.md.sim.DistanceMetric; 4 | import java.time.temporal.ChronoUnit; 5 | import java.time.temporal.Temporal; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @RequiredArgsConstructor 9 | public class DateSimilarity implements DistanceMetric { 10 | 11 | private static final long serialVersionUID = -1193708279189367428L; 12 | private final ChronoUnit unit; 13 | 14 | @Override 15 | public long computeDistance(Temporal temporal1, Temporal temporal2) { 16 | return unit.between(temporal1, temporal2); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/impl/LevenshteinDistanceMetric.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim.impl; 2 | 3 | import com.bakdata.util.jackson.CPSType; 4 | import com.fasterxml.jackson.databind.annotation.JsonDeserialize; 5 | import de.hpi.is.md.sim.DistanceMetric; 6 | import de.hpi.is.md.util.jackson.SingletonDeserializer; 7 | import org.apache.commons.text.similarity.LevenshteinDistance; 8 | 9 | @JsonDeserialize(using = SingletonDeserializer.class) 10 | @CPSType(id = "levenshtein", base = DistanceMetric.class) 11 | public enum LevenshteinDistanceMetric implements DistanceMetric { 12 | 13 | INSTANCE; 14 | 15 | private static final LevenshteinDistance LEVENSHTEIN = LevenshteinDistance.getDefaultInstance(); 16 | 17 | @Override 18 | public long computeDistance(String obj1, String obj2) { 19 | return LEVENSHTEIN.apply(obj1, obj2).longValue(); 20 | } 21 | 22 | 23 | @Override 24 | public String toString() { 25 | return "Levenshtein"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /HyMD/sim/src/main/java/de/hpi/is/md/sim/impl/SqlDateSimilarity.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.sim.impl; 2 | 3 | import de.hpi.is.md.sim.DistanceMetric; 4 | import java.sql.Date; 5 | import java.time.temporal.ChronoUnit; 6 | 7 | public class SqlDateSimilarity implements DistanceMetric { 8 | 9 | private static final long serialVersionUID = 1210121832046884960L; 10 | private final DateSimilarity dateSimilarity; 11 | 12 | public SqlDateSimilarity(ChronoUnit unit) { 13 | dateSimilarity = new DateSimilarity(unit); 14 | } 15 | 16 | @Override 17 | public long computeDistance(Date obj1, Date obj2) { 18 | return dateSimilarity.computeDistance(obj1.toLocalDate(), obj2.toLocalDate()); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/AbstractDictionary.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.objects.Object2IntMap; 4 | import java.util.Collection; 5 | import lombok.NonNull; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @RequiredArgsConstructor 9 | public abstract class AbstractDictionary implements Dictionary { 10 | 11 | private static final long serialVersionUID = -9007173080471006422L; 12 | @NonNull 13 | private final Object2IntMap values; 14 | 15 | @Override 16 | public int getOrAdd(T value) { 17 | return values.computeIntIfAbsent(value, this::encode); 18 | } 19 | 20 | @Override 21 | public Collection values() { 22 | return values.keySet(); 23 | } 24 | 25 | protected abstract int encode(T value); 26 | 27 | } 28 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/AbstractPollCollection.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import lombok.NonNull; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @RequiredArgsConstructor 9 | public abstract class AbstractPollCollection implements PollCollection { 10 | 11 | @NonNull 12 | private final Collection collection; 13 | 14 | @Override 15 | public void add(T value) { 16 | collection.add(value); 17 | } 18 | 19 | @Override 20 | public void addAll(Collection values) { 21 | collection.addAll(values); 22 | } 23 | 24 | @Override 25 | public Collection poll() { 26 | Collection values = new ArrayList<>(collection); 27 | collection.clear(); 28 | return values; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BetterConsumer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.function.Consumer; 5 | 6 | public interface BetterConsumer extends Consumer, Serializable { 7 | 8 | default Consumer compose(BetterFunction function) { 9 | return function.thenConsume(this); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BetterFunction.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.function.Consumer; 5 | import java.util.function.Function; 6 | 7 | public interface BetterFunction extends Function, Serializable { 8 | 9 | default Consumer thenConsume(Consumer consumer) { 10 | return obj -> consumer.accept(apply(obj)); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BetterMap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.Optional; 5 | import java.util.function.BiConsumer; 6 | 7 | public interface BetterMap extends Serializable { 8 | 9 | void forEach(BiConsumer action); 10 | 11 | Optional get(K key); 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BetterMapDecorator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Map; 4 | import java.util.Optional; 5 | import java.util.function.BiConsumer; 6 | import lombok.Data; 7 | import lombok.NonNull; 8 | import lombok.RequiredArgsConstructor; 9 | 10 | @RequiredArgsConstructor 11 | @Data 12 | public class BetterMapDecorator implements BetterMap { 13 | 14 | private static final long serialVersionUID = -7785903165560099613L; 15 | @NonNull 16 | protected final Map map; 17 | 18 | @Override 19 | public void forEach(BiConsumer action) { 20 | map.forEach(action); 21 | } 22 | 23 | @Override 24 | public Optional get(K key) { 25 | V value = map.get(key); 26 | return Optional.ofNullable(value); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BetterSupplier.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.function.Supplier; 5 | 6 | public interface BetterSupplier extends Supplier, Serializable { 7 | 8 | } 9 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/BigDecimalUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.math.BigDecimal; 4 | import lombok.AccessLevel; 5 | import lombok.NoArgsConstructor; 6 | 7 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 8 | public final class BigDecimalUtils { 9 | 10 | private static final BigDecimal[] NUMBERS = new BigDecimal[500]; 11 | 12 | static { 13 | for (int i = 0; i < NUMBERS.length; i++) { 14 | NUMBERS[i] = BigDecimal.valueOf(i); 15 | } 16 | } 17 | 18 | public static BigDecimal valueOf(int number) { 19 | return 0 <= number && number < NUMBERS.length ? NUMBERS[number] 20 | : BigDecimal.valueOf(number); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/CacheableSupplier.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import com.google.common.hash.HashCode; 4 | import java.util.function.Supplier; 5 | 6 | public interface CacheableSupplier extends Supplier { 7 | 8 | /** 9 | * HashCode to uniquely identify the object create by this supplier 10 | * 11 | * @return hash code 12 | */ 13 | HashCode hash(); 14 | 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/CastUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import lombok.AccessLevel; 4 | import lombok.NoArgsConstructor; 5 | 6 | @SuppressWarnings("unchecked") 7 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 8 | public final class CastUtils { 9 | 10 | public static T as(Object obj) { 11 | return (T) obj; 12 | } 13 | } -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/DefaultDictionary.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; 4 | 5 | public class DefaultDictionary extends AbstractDictionary { 6 | 7 | private static final long serialVersionUID = -5333970395893162389L; 8 | private int nextValue = 0; 9 | 10 | public DefaultDictionary() { 11 | super(new Object2IntOpenHashMap<>()); 12 | } 13 | 14 | @Override 15 | public int size() { 16 | return nextValue; 17 | } 18 | 19 | @Override 20 | protected int encode(T value) { 21 | return nextValue++; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Dictionary.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.Collection; 5 | 6 | public interface Dictionary extends Serializable { 7 | 8 | int getOrAdd(T value); 9 | 10 | int size(); 11 | 12 | Collection values(); 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/DoubleObjectBiConsumer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.function.BiConsumer; 4 | 5 | public interface DoubleObjectBiConsumer extends BiConsumer { 6 | 7 | void accept(double d, T t); 8 | 9 | @Deprecated 10 | @Override 11 | default void accept(Double d, T t) { 12 | accept(d.doubleValue(), t); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Hashable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public interface Hashable { 4 | 5 | default void hash(Hasher hasher) { 6 | Class clazz = this.getClass(); 7 | hasher.putClass(clazz); 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2Double2ObjectSortedArrayTable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.doubles.Double2ObjectSortedMap; 4 | import java.util.Optional; 5 | import lombok.NonNull; 6 | import lombok.RequiredArgsConstructor; 7 | 8 | @RequiredArgsConstructor 9 | public class Int2Double2ObjectSortedArrayTable extends AbstractInt2Double2ObjectSortedTable { 10 | 11 | private static final long serialVersionUID = 3705813803498950508L; 12 | @NonNull 13 | private final Double2ObjectSortedMap[] array; 14 | 15 | @Override 16 | protected Optional> get(int rowKey) { 17 | Double2ObjectSortedMap row = array[rowKey]; 18 | return Optional.ofNullable(row); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2Double2ObjectSortedMapTable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.doubles.Double2ObjectSortedMap; 4 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap; 5 | import java.util.Optional; 6 | import lombok.NonNull; 7 | import lombok.RequiredArgsConstructor; 8 | 9 | @RequiredArgsConstructor 10 | public class Int2Double2ObjectSortedMapTable extends AbstractInt2Double2ObjectSortedTable { 11 | 12 | private static final long serialVersionUID = 2019129869920332801L; 13 | @NonNull 14 | private final Int2ObjectMap> map; 15 | 16 | @Override 17 | protected Optional> get(int rowKey) { 18 | Double2ObjectSortedMap row = map.get(rowKey); 19 | return Optional.ofNullable(row); 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2Double2ObjectSortedTable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.io.Serializable; 4 | import java.util.Optional; 5 | 6 | public interface Int2Double2ObjectSortedTable extends Serializable { 7 | 8 | Optional getCeilingValue(int rowKey, double columnKey); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2DoubleMapRow.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static de.hpi.is.md.util.Int2Int2DoubleTable.DEFAULT; 4 | 5 | import de.hpi.is.md.util.Int2Int2DoubleTable.Int2DoubleRow; 6 | import it.unimi.dsi.fastutil.doubles.DoubleCollection; 7 | import it.unimi.dsi.fastutil.ints.Int2DoubleMap; 8 | import lombok.RequiredArgsConstructor; 9 | 10 | @RequiredArgsConstructor 11 | public class Int2DoubleMapRow implements Int2DoubleRow { 12 | 13 | private static final long serialVersionUID = -7326716695357407771L; 14 | private final Int2DoubleMap map; 15 | 16 | @Override 17 | public DoubleCollection values() { 18 | return map.values(); 19 | } 20 | 21 | @Override 22 | public double getOrDefault(int columnKey) { 23 | return map.getOrDefault(columnKey, DEFAULT); 24 | } 25 | 26 | @Override 27 | public Int2DoubleMap asMap() { 28 | return map; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2Int2DoubleTable.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.doubles.DoubleCollection; 4 | import it.unimi.dsi.fastutil.doubles.DoubleSet; 5 | import it.unimi.dsi.fastutil.ints.Int2DoubleMap; 6 | import it.unimi.dsi.fastutil.ints.Int2DoubleMaps; 7 | import java.io.Serializable; 8 | 9 | public interface Int2Int2DoubleTable extends Serializable { 10 | 11 | double DEFAULT = 0.0; 12 | 13 | double getOrDefault(int rowKey, int columnKey); 14 | 15 | void putRow(int rowKey, Int2DoubleRow row); 16 | 17 | Int2DoubleRow row(int rowKey); 18 | 19 | DoubleSet values(); 20 | 21 | interface Int2DoubleRow extends Serializable { 22 | 23 | Int2DoubleRow EMPTY = new Int2DoubleMapRow(Int2DoubleMaps.EMPTY_MAP); 24 | 25 | Int2DoubleMap asMap(); 26 | 27 | double getOrDefault(int columnKey); 28 | 29 | DoubleCollection values(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Int2ObjectMultimap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap.Entry; 4 | import java.util.Collection; 5 | 6 | public interface Int2ObjectMultimap extends Iterable>> { 7 | 8 | void put(int key, T value); 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/IntArrayPair.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import lombok.Data; 4 | import lombok.NonNull; 5 | 6 | @Data 7 | public class IntArrayPair { 8 | 9 | @NonNull 10 | private final int[] left; 11 | @NonNull 12 | private final int[] right; 13 | } 14 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/IntObjectBiConsumer.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.function.BiConsumer; 4 | 5 | public interface IntObjectBiConsumer extends BiConsumer { 6 | 7 | void accept(int i, T t); 8 | 9 | @Deprecated 10 | @Override 11 | default void accept(Integer i, T t) { 12 | accept(i.intValue(), t); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/IteratorUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Iterator; 4 | import java.util.Optional; 5 | import java.util.PrimitiveIterator.OfDouble; 6 | import lombok.AccessLevel; 7 | import lombok.NoArgsConstructor; 8 | 9 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 10 | public final class IteratorUtils { 11 | 12 | public static Optional next(Iterator it) { 13 | if (it.hasNext()) { 14 | T entry = it.next(); 15 | return Optional.of(entry); 16 | } 17 | return Optional.empty(); 18 | } 19 | 20 | public static OptionalDouble next(OfDouble it) { 21 | if (it.hasNext()) { 22 | double higher = it.nextDouble(); 23 | return OptionalDouble.of(higher); 24 | } 25 | return OptionalDouble.empty(); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/JdbcUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.sql.ResultSetMetaData; 4 | import java.sql.SQLException; 5 | import java.util.Optional; 6 | import lombok.AccessLevel; 7 | import lombok.NoArgsConstructor; 8 | 9 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 10 | public final class JdbcUtils { 11 | 12 | public static Optional> getColumnClass(ResultSetMetaData metaData, int column) 13 | throws SQLException { 14 | String className = metaData.getColumnClassName(column); 15 | try { 16 | Class clazz = Class.forName(className); 17 | return Optional.of(clazz); 18 | } catch (ClassNotFoundException e) { 19 | return Optional.empty(); 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/LazyMap.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public interface LazyMap extends BetterMap { 4 | 5 | V getOrCreate(K key); 6 | } 7 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/LazyMapImpl.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Map; 4 | 5 | public class LazyMapImpl extends BetterMapDecorator implements LazyMap { 6 | 7 | private static final long serialVersionUID = -4794332794682053652L; 8 | private final BetterSupplier factory; 9 | 10 | public LazyMapImpl(Map map, BetterSupplier factory) { 11 | super(map); 12 | this.factory = factory; 13 | } 14 | 15 | @Override 16 | public V getOrCreate(K key) { 17 | return map.computeIfAbsent(key, __ -> factory.get()); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/NullComparator.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Comparator; 4 | import lombok.NonNull; 5 | import lombok.RequiredArgsConstructor; 6 | 7 | @RequiredArgsConstructor 8 | public class NullComparator implements Comparator { 9 | 10 | @NonNull 11 | private final Comparator underlying; 12 | 13 | @Override 14 | public int compare(T o1, T o2) { 15 | if (o1 == null) { 16 | return o2 == null ? 0 : -1; 17 | } 18 | if (o2 == null) { 19 | return 1; 20 | } 21 | return underlying.compare(o1, o2); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/ObjectUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Objects; 4 | import lombok.AccessLevel; 5 | import lombok.NoArgsConstructor; 6 | 7 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 8 | public final class ObjectUtils { 9 | 10 | public static boolean bothNull(Object obj1, Object obj2) { 11 | return obj1 == null && obj2 == null; 12 | } 13 | 14 | public static boolean eitherNull(Object obj1, Object obj2) { 15 | return obj1 == null || obj2 == null; 16 | } 17 | 18 | public static boolean notEquals(Object obj1, Object obj2) { 19 | return !Objects.equals(obj1, obj2); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/Optionals.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Collection; 4 | import java.util.Optional; 5 | import java.util.stream.Stream; 6 | import lombok.AccessLevel; 7 | import lombok.NoArgsConstructor; 8 | 9 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 10 | public final class Optionals { 11 | 12 | public static Optional> of(Collection collection) { 13 | return collection.isEmpty() ? Optional.empty() : Optional.of(collection); 14 | } 15 | 16 | public static Stream stream(Optional optional) { 17 | return optional.map(Stream::of) 18 | .orElseGet(Stream::empty); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/PollCollection.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Collection; 4 | 5 | public interface PollCollection { 6 | 7 | void add(T value); 8 | 9 | void addAll(Collection values); 10 | 11 | Collection poll(); 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/PollSet.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.HashSet; 4 | 5 | public class PollSet extends AbstractPollCollection { 6 | 7 | public PollSet() { 8 | super(new HashSet<>()); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/StringUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Optional; 4 | import lombok.AccessLevel; 5 | import lombok.NoArgsConstructor; 6 | 7 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 8 | public final class StringUtils { 9 | 10 | public static String toLowerCase(String s) { 11 | return Optional.ofNullable(s) 12 | .map(String::toLowerCase) 13 | .orElse(null); 14 | } 15 | 16 | public static String join(CharSequence delimiter, Iterable elements) { 17 | return StreamUtils.seq(elements).toString(delimiter); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/TupleUtils.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.Map.Entry; 4 | import lombok.AccessLevel; 5 | import lombok.NoArgsConstructor; 6 | import org.jooq.lambda.tuple.Tuple2; 7 | 8 | @NoArgsConstructor(access = AccessLevel.PRIVATE) 9 | public final class TupleUtils { 10 | 11 | public static Tuple2 toTuple(Entry entry) { 12 | return new Tuple2<>(entry.getKey(), entry.getValue()); 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/ValueWrapper.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import lombok.Data; 4 | 5 | @Data 6 | public class ValueWrapper { 7 | 8 | private final T value; 9 | } 10 | -------------------------------------------------------------------------------- /HyMD/util/src/main/java/de/hpi/is/md/util/jackson/Entry.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util.jackson; 2 | 3 | import com.google.common.collect.Maps; 4 | import lombok.Data; 5 | 6 | @Data 7 | public class Entry { 8 | 9 | private final K key; 10 | private final V value; 11 | 12 | public java.util.Map.Entry toEntry() { 13 | return Maps.immutableEntry(key, value); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/BetterConsumerTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | import java.util.function.Consumer; 8 | import org.junit.Test; 9 | 10 | public class BetterConsumerTest { 11 | 12 | @Test 13 | public void testCompose() { 14 | Collection results = new ArrayList<>(); 15 | BetterConsumer consumer = results::add; 16 | Consumer composed = consumer.compose(String::length); 17 | composed.accept("foo"); 18 | assertThat(results).hasSize(1); 19 | assertThat(results).contains(3); 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/BetterFunctionTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Collection; 7 | import java.util.function.Consumer; 8 | import org.junit.Test; 9 | 10 | public class BetterFunctionTest { 11 | 12 | @Test 13 | public void testThenConsume() { 14 | Collection results = new ArrayList<>(); 15 | BetterFunction function = String::length; 16 | Consumer composed = function.thenConsume(results::add); 17 | composed.accept("foo"); 18 | assertThat(results).hasSize(1); 19 | assertThat(results).contains(3); 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/BigDecimalUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.junit.Test; 6 | 7 | public class BigDecimalUtilsTest { 8 | 9 | @Test 10 | public void test() { 11 | assertThat(BigDecimalUtils.valueOf(0).intValue()).isEqualTo(0); 12 | assertThat(BigDecimalUtils.valueOf(100).intValue()).isEqualTo(100); 13 | assertThat(BigDecimalUtils.valueOf(1000).intValue()).isEqualTo(1000); 14 | } 15 | 16 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/CastUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | import static org.junit.Assert.fail; 5 | 6 | import org.junit.Test; 7 | 8 | public class CastUtilsTest { 9 | 10 | @Test 11 | public void test() { 12 | Object obj = "foo"; 13 | String casted = CastUtils.as(obj); 14 | assertThat(casted).isEqualTo("foo"); 15 | } 16 | 17 | @SuppressWarnings("unused") 18 | @Test(expected = ClassCastException.class) 19 | public void testFailedCast() { 20 | Object obj = "1"; 21 | Integer ignored = CastUtils.as(obj); 22 | fail(); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/DefaultDictionaryTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public class DefaultDictionaryTest extends DictionaryTest { 4 | 5 | @Override 6 | protected Dictionary createDictionary() { 7 | return new DefaultDictionary<>(); 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/DoubleObjectBiConsumerTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import org.junit.Test; 8 | 9 | public class DoubleObjectBiConsumerTest { 10 | 11 | @SuppressWarnings("deprecation") 12 | @Test 13 | public void test() { 14 | Map map = new HashMap<>(); 15 | DoubleObjectBiConsumer consumer = map::put; 16 | consumer.accept(Double.valueOf(1.0), "foo"); 17 | assertThat(map).hasSize(1); 18 | assertThat(map).containsEntry(1.0, "foo"); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/HashableTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import com.google.common.hash.HashFunction; 6 | import com.google.common.hash.Hashing; 7 | import org.junit.Test; 8 | 9 | public class HashableTest { 10 | 11 | @Test 12 | public void test() { 13 | HashFunction function = Hashing.goodFastHash(10); 14 | assertThat(Hasher.of(function).put(new TestHashable()).hash()) 15 | .isEqualTo(function.newHasher().putUnencodedChars(TestHashable.class.getName()).hash()); 16 | } 17 | 18 | private static class TestHashable implements Hashable { 19 | 20 | } 21 | 22 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2Double2ObjectSortedMapTableTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import it.unimi.dsi.fastutil.doubles.Double2ObjectSortedMap; 4 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap; 5 | 6 | public class Int2Double2ObjectSortedMapTableTest extends Int2Double2ObjectSortedTableTest { 7 | 8 | @Override 9 | protected Int2Double2ObjectSortedTable create( 10 | Int2ObjectMap> map, int size) { 11 | return new Int2Double2ObjectSortedMapTable<>(map); 12 | } 13 | 14 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2DoubleArrayRowTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import de.hpi.is.md.util.Int2Int2DoubleTable.Int2DoubleRow; 4 | import it.unimi.dsi.fastutil.ints.Int2DoubleMap; 5 | import it.unimi.dsi.fastutil.ints.Int2DoubleMap.Entry; 6 | 7 | public class Int2DoubleArrayRowTest extends Int2DoubleRowTest { 8 | 9 | @Override 10 | protected Int2DoubleRow createRow(Int2DoubleMap map, int size) { 11 | double[] row = new double[size]; 12 | for (Entry entry : map.int2DoubleEntrySet()) { 13 | int columnKey = entry.getIntKey(); 14 | row[columnKey] = entry.getDoubleValue(); 15 | } 16 | return new Int2DoubleArrayRow(row); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2DoubleMapRowTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import de.hpi.is.md.util.Int2Int2DoubleTable.Int2DoubleRow; 4 | import it.unimi.dsi.fastutil.ints.Int2DoubleMap; 5 | 6 | public class Int2DoubleMapRowTest extends Int2DoubleRowTest { 7 | 8 | @Override 9 | protected Int2DoubleRow createRow(Int2DoubleMap map, int size) { 10 | return new Int2DoubleMapRow(map); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2Int2DoubleArrayTableTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public class Int2Int2DoubleArrayTableTest extends Int2Int2DoubleTableTest { 4 | 5 | @Override 6 | public Int2Int2DoubleTable createTable(int height) { 7 | return Int2Int2DoubleArrayTable.create(height); 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2Int2DoubleHashTableTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public class Int2Int2DoubleHashTableTest extends Int2Int2DoubleTableTest { 4 | 5 | @Override 6 | public Int2Int2DoubleTable createTable(int height) { 7 | return Int2Int2DoubleHashTable.create(height); 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/Int2ObjectHashMultimapTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | public class Int2ObjectHashMultimapTest extends Int2ObjectMultimapTest { 4 | 5 | @Override 6 | protected Int2ObjectMultimap create() { 7 | return new Int2ObjectHashMultimap<>(); 8 | } 9 | 10 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/IntegerObjectBiConsumerTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import org.junit.Test; 8 | 9 | public class IntegerObjectBiConsumerTest { 10 | 11 | @SuppressWarnings("deprecation") 12 | @Test 13 | public void test() { 14 | Map map = new HashMap<>(); 15 | IntObjectBiConsumer consumer = map::put; 16 | consumer.accept(Integer.valueOf(1), "foo"); 17 | assertThat(map).hasSize(1); 18 | assertThat(map).containsEntry(1, "foo"); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/LazyArrayTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.junit.Assert.fail; 4 | 5 | import org.junit.Test; 6 | 7 | public class LazyArrayTest extends LazyMapTest { 8 | 9 | @Test(expected = IndexOutOfBoundsException.class) 10 | public void testGetElementOutOfBounds() { 11 | LazyMap array = create(() -> null, 3); 12 | array.get(4); 13 | fail(); 14 | } 15 | 16 | @Test(expected = IndexOutOfBoundsException.class) 17 | public void testGetOrCreateElementOutOfBounds() { 18 | LazyMap array = create(() -> null, 3); 19 | array.getOrCreate(4); 20 | fail(); 21 | } 22 | 23 | @Override 24 | protected LazyMap create(BetterSupplier factory, int size) { 25 | return new LazyArray<>(new String[size], factory); 26 | } 27 | 28 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/LazyMapImplTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import java.util.HashMap; 4 | 5 | public class LazyMapImplTest extends LazyMapTest { 6 | 7 | @Override 8 | protected LazyMap create(BetterSupplier factory, int size) { 9 | return new LazyMapImpl<>(new HashMap<>(), factory); 10 | } 11 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/MathUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.junit.Test; 6 | 7 | public class MathUtilsTest { 8 | 9 | @Test 10 | public void testDivide() { 11 | assertThat(MathUtils.divide(8.0, 2.0)).isEqualTo(4.0); 12 | assertThat(MathUtils.divide(0.0, 2.0)).isEqualTo(0.0); 13 | assertThat(MathUtils.divide(8.0, 0.0)).isEqualTo(0.0); 14 | } 15 | 16 | @Test 17 | public void testIncrement() { 18 | assertThat(MathUtils.increment(8L)).isEqualTo(9L); 19 | assertThat(MathUtils.increment(0L)).isEqualTo(1L); 20 | assertThat(MathUtils.increment(-8L)).isEqualTo(-7L); 21 | } 22 | 23 | @Test 24 | public void testMultiply() { 25 | assertThat(MathUtils.multiply(8L, 2L)).isEqualTo(16L); 26 | assertThat(MathUtils.multiply(0L, 2L)).isEqualTo(0L); 27 | assertThat(MathUtils.multiply(8L, 0L)).isEqualTo(0L); 28 | } 29 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/MetricsUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static com.codahale.metrics.MetricRegistry.name; 4 | import static org.assertj.core.api.Assertions.assertThat; 5 | 6 | import com.codahale.metrics.Timer; 7 | import com.codahale.metrics.Timer.Context; 8 | import org.junit.Test; 9 | 10 | public class MetricsUtilsTest { 11 | 12 | @Test 13 | public void testDefaultRegistry() { 14 | assertThat(MetricsUtils.getDefaultRegistry()).isNotNull(); 15 | } 16 | 17 | @SuppressWarnings("EmptyTryBlock") 18 | @Test 19 | public void testTimer() { 20 | try (Context ignored = MetricsUtils.timer(MetricsUtilsTest.class, "foo")) { 21 | } 22 | Timer timer = MetricsUtils.getDefaultRegistry().timer(name(MetricsUtilsTest.class, "foo")); 23 | assertThat(timer.getCount()).isGreaterThanOrEqualTo(1L); 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/NullComparatorTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import java.util.Comparator; 6 | import org.junit.Test; 7 | 8 | public class NullComparatorTest { 9 | 10 | @Test 11 | public void test() { 12 | Comparator comparator = new NullComparator<>(Integer::compare); 13 | assertThat(comparator.compare(1, 2)).isEqualTo(Integer.compare(1, 2)); 14 | assertThat(comparator.compare(null, null)).isEqualTo(0); 15 | assertThat(comparator.compare(null, 1)).isLessThan(0); 16 | assertThat(comparator.compare(1, null)).isGreaterThan(0); 17 | } 18 | 19 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/ObjectUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.junit.Test; 6 | 7 | @SuppressWarnings("ConstantConditions") 8 | public class ObjectUtilsTest { 9 | 10 | @Test 11 | public void testBothNull() { 12 | assertThat(ObjectUtils.bothNull(null, null)).isTrue(); 13 | assertThat(ObjectUtils.bothNull("", null)).isFalse(); 14 | assertThat(ObjectUtils.bothNull(null, "")).isFalse(); 15 | assertThat(ObjectUtils.bothNull("", "")).isFalse(); 16 | } 17 | 18 | @Test 19 | public void testEitherNull() { 20 | assertThat(ObjectUtils.eitherNull(null, null)).isTrue(); 21 | assertThat(ObjectUtils.eitherNull("", null)).isTrue(); 22 | assertThat(ObjectUtils.eitherNull(null, "")).isTrue(); 23 | assertThat(ObjectUtils.eitherNull("", "")).isFalse(); 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /HyMD/util/src/test/java/de/hpi/is/md/util/StringUtilsTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.is.md.util; 2 | 3 | import static org.assertj.core.api.Assertions.assertThat; 4 | 5 | import org.junit.Test; 6 | 7 | public class StringUtilsTest { 8 | 9 | @Test 10 | public void testToLowerCase() { 11 | assertThat(StringUtils.toLowerCase("foO")).isEqualTo("foo"); 12 | assertThat(StringUtils.toLowerCase(null)).isNull(); 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /HyUCC/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /HyUCC/src/main/java/de/metanome/algorithms/hyucc/structures/ClusterIdentifierWithRecord.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc.structures; 2 | 3 | public class ClusterIdentifierWithRecord extends ClusterIdentifier { 4 | 5 | private final int record; 6 | 7 | public ClusterIdentifierWithRecord(final int[] cluster, final int record) { 8 | super(cluster); 9 | this.record = record; 10 | } 11 | 12 | public int getRecord() { 13 | return this.record; 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /HyUCC/src/main/java/de/metanome/algorithms/hyucc/structures/IntegerPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc.structures; 2 | 3 | public class IntegerPair { 4 | 5 | private final int a; 6 | private final int b; 7 | 8 | public IntegerPair(final int a, final int b) { 9 | this.a = a; 10 | this.b = b; 11 | } 12 | 13 | public int a() { 14 | return this.a; 15 | } 16 | 17 | public int b() { 18 | return this.b; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /HyUCC/src/main/java/de/metanome/algorithms/hyucc/structures/UCCTreeElementUCCPair.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc.structures; 2 | 3 | import java.util.BitSet; 4 | 5 | public class UCCTreeElementUCCPair { 6 | 7 | private final UCCTreeElement element; 8 | private final BitSet ucc; 9 | 10 | public UCCTreeElement getElement() { 11 | return this.element; 12 | } 13 | 14 | public BitSet getUCC() { 15 | return this.ucc; 16 | } 17 | 18 | public UCCTreeElementUCCPair(UCCTreeElement element, BitSet ucc) { 19 | this.element = element; 20 | this.ucc = ucc; 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /HyUCC/src/main/java/de/metanome/algorithms/hyucc/utils/Logger.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc.utils; 2 | 3 | public class Logger { 4 | 5 | private static Logger instance = null; 6 | 7 | private StringBuilder log = new StringBuilder(); 8 | 9 | private Logger() { 10 | } 11 | 12 | public static Logger getInstance() { 13 | if (instance == null) 14 | instance = new Logger(); 15 | return instance; 16 | } 17 | 18 | public void write(String message) { 19 | this.log.append(message); 20 | System.out.print(message); 21 | } 22 | 23 | public void writeln(String message) { 24 | this.log.append(message + "\r\n"); 25 | System.out.println(message); 26 | } 27 | 28 | public void write(Object message) { 29 | this.write(message.toString());; 30 | } 31 | 32 | public void writeln(Object message) { 33 | this.writeln(message.toString());; 34 | } 35 | 36 | public String read() { 37 | return this.log.toString(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /HyUCC/src/main/java/de/metanome/algorithms/hyucc/utils/ValueComparator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc.utils; 2 | 3 | public class ValueComparator { 4 | 5 | private boolean isNullEqualNull; 6 | 7 | public ValueComparator(boolean isNullEqualNull) { 8 | this.isNullEqualNull = isNullEqualNull; 9 | } 10 | 11 | public boolean isNullEqualNull() { 12 | return this.isNullEqualNull; 13 | } 14 | 15 | public boolean isEqual(Object val1, Object val2) { 16 | if ((val1 == null) && (val2 == null)) 17 | return this.isNullEqualNull; 18 | 19 | return (val1 != null) && val1.equals(val2); 20 | } 21 | 22 | public boolean isEqual(int val1, int val2) { 23 | return (val1 >= 0) && (val2 >= 0) && (val1 == val2); 24 | } 25 | 26 | public boolean isDifferent(int val1, int val2) { 27 | return !this.isEqual(val1, val2); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /HyUCC/src/test/java/de/metanome/algorithms/hyucc/HyUCCTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.hyucc; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class HyUCCTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public HyUCCTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( HyUCCTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /MANY/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /MANY/src/main/java/de/metanome/algorithms/many/bitvectors/BitVector.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.bitvectors; 2 | 3 | public interface BitVector { 4 | 5 | long[] getBits(); 6 | 7 | void set(int i); 8 | 9 | void clear(int i); 10 | 11 | boolean get(int i); 12 | 13 | BitVector and(BitVector other); 14 | 15 | BitVector flip(); 16 | 17 | int size(); 18 | 19 | BitVector or(BitVector other); 20 | 21 | BitVector copy(); 22 | 23 | int next(int i); 24 | 25 | int count(); 26 | } 27 | -------------------------------------------------------------------------------- /MANY/src/main/java/de/metanome/algorithms/many/bitvectors/BitVectorFactory.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.bitvectors; 2 | 3 | public class BitVectorFactory { 4 | boolean isFastVector = true; 5 | 6 | public BitVectorFactory(boolean isFastVector) { 7 | this.isFastVector = isFastVector; 8 | } 9 | 10 | public BitVector createBitVector(int size) { 11 | if (isFastVector) 12 | return new LongArrayHierarchicalBitVector(size); 13 | return new LongArrayBitVector(size); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /MANY/src/main/java/de/metanome/algorithms/many/bloom_filtering/HashFactory.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.bloom_filtering; 2 | 3 | import java.security.MessageDigest; 4 | import java.security.NoSuchAlgorithmException; 5 | 6 | public class HashFactory { 7 | 8 | public static final HashFactory Instance = new HashFactory(); 9 | 10 | static final MessageDigest digestFunction; 11 | static { 12 | MessageDigest tmp; 13 | try { 14 | tmp = java.security.MessageDigest.getInstance("MD5"); 15 | } catch (NoSuchAlgorithmException e) { 16 | tmp = null; 17 | } 18 | digestFunction = tmp; 19 | } 20 | 21 | public synchronized byte[] createHash(byte[] data, byte salt) { 22 | digestFunction.update(salt); 23 | salt++; 24 | return digestFunction.digest(data); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /MANY/src/main/java/de/metanome/algorithms/many/helper/PrintHelper.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.helper; 2 | 3 | import java.util.List; 4 | 5 | import de.metanome.algorithms.many.bitvectors.BitVector; 6 | 7 | public class PrintHelper { 8 | 9 | public static String printMatrix(List> matrix) { 10 | StringBuffer matrixString = new StringBuffer(); 11 | 12 | for (BitVector row : matrix) { 13 | matrixString.append(row.toString() + "\n"); 14 | } 15 | return matrixString.toString(); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /MANY/src/main/java/de/metanome/algorithms/many/io/InputIterator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.io; 2 | 3 | import java.util.List; 4 | 5 | import de.metanome.algorithm_integration.input.InputIterationException; 6 | 7 | public interface InputIterator extends AutoCloseable { 8 | 9 | public boolean next() throws InputIterationException; 10 | public String getValue(int columnIndex) throws InputIterationException; 11 | public List getValues(int numColumns) throws InputIterationException; 12 | } 13 | -------------------------------------------------------------------------------- /MANY/src/test/java/de/metanome/algorithms/many/test/RelationalInputGeneratorMock.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.many.test; 2 | 3 | import java.io.IOException; 4 | 5 | import de.metanome.algorithm_integration.input.InputGenerationException; 6 | import de.metanome.algorithm_integration.input.RelationalInput; 7 | import de.metanome.algorithm_integration.input.RelationalInputGenerator; 8 | 9 | public class RelationalInputGeneratorMock implements RelationalInputGenerator { 10 | 11 | RelationalInput input; 12 | 13 | public RelationalInputGeneratorMock(RelationalInput input) { 14 | this.input = input; 15 | } 16 | 17 | @Override 18 | public RelationalInput generateNewCopy() throws InputGenerationException { 19 | RelationalInputMock mock = (RelationalInputMock) input; 20 | mock.reset(); 21 | return mock; 22 | } 23 | 24 | @Override 25 | public void close() throws IOException { 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /MvdDet/src/test/java/de/metanome/algorithms/mvddet/MvDDetectorTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.mvddet; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | public class MvDDetectorTest { 8 | 9 | @Before 10 | public void setUp() throws Exception { 11 | } 12 | 13 | @After 14 | public void tearDown() throws Exception { 15 | } 16 | 17 | @Test 18 | public void testGetConfigurationRequirements() { 19 | } 20 | 21 | @Test 22 | public void testExecute() { 23 | } 24 | 25 | @Test 26 | public void testSetConfigurationValue() { 27 | } 28 | 29 | @Test 30 | public void testSetTempFileGenerator() { 31 | } 32 | 33 | @Test 34 | public void testSetResultReceiverFunctionalDependencyResultReceiver() { 35 | } 36 | 37 | @Test 38 | public void testSetResultReceiverUniqueColumnCombinationResultReceiver() { 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /Normalize/src/main/java/de/metanome/algorithms/normalize/utils/Utils.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.normalize.utils; 2 | 3 | import java.util.BitSet; 4 | 5 | public class Utils { 6 | 7 | public static int andNotCount(BitSet base, BitSet not) { 8 | BitSet andNotSet = (BitSet) base.clone(); 9 | andNotSet.andNot(not); 10 | return andNotSet.cardinality(); 11 | } 12 | 13 | public static int intersectionCount(BitSet set1, BitSet set2) { 14 | BitSet intersection = (BitSet) set1.clone(); 15 | intersection.and(set2); 16 | return intersection.cardinality(); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/sorting/partitions/RowIndexedDateValue.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.sorting.partitions; 2 | 3 | import java.util.Date; 4 | 5 | public class RowIndexedDateValue extends RowIndexedValue { 6 | public final Date value; 7 | 8 | public RowIndexedDateValue(final long index, final Date value) { 9 | this.index = index; 10 | this.value = value; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/sorting/partitions/RowIndexedDoubleValue.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.sorting.partitions; 2 | 3 | public class RowIndexedDoubleValue extends RowIndexedValue { 4 | public final Double value; 5 | 6 | public RowIndexedDoubleValue(final long index, final Double value) { 7 | this.index = index; 8 | this.value = value; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/sorting/partitions/RowIndexedLongValue.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.sorting.partitions; 2 | 3 | public class RowIndexedLongValue extends RowIndexedValue { 4 | public final Long value; 5 | 6 | public RowIndexedLongValue(final long index, final Long value) { 7 | this.index = index; 8 | this.value = value; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/sorting/partitions/RowIndexedStringValue.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.sorting.partitions; 2 | 3 | public class RowIndexedStringValue extends RowIndexedValue { 4 | public final String value; 5 | 6 | public RowIndexedStringValue(final long index, final String value) { 7 | this.index = index; 8 | this.value = value; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/sorting/partitions/RowIndexedValue.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.sorting.partitions; 2 | 3 | public abstract class RowIndexedValue { 4 | public long index = -1; 5 | } 6 | -------------------------------------------------------------------------------- /ORDER/src/main/java/de/metanome/algorithms/order/types/ByteArray.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.order.types; 2 | 3 | import java.util.Arrays; 4 | 5 | public class ByteArray { 6 | public final byte[] data; 7 | 8 | public ByteArray(final byte[] data) { 9 | if (data == null) { 10 | throw new NullPointerException(); 11 | } 12 | this.data = data; 13 | } 14 | 15 | @Override 16 | public boolean equals(final Object other) { 17 | if (!(other instanceof ByteArray)) { 18 | return false; 19 | } 20 | return Arrays.equals(this.data, ((ByteArray) other).data); 21 | } 22 | 23 | @Override 24 | public int hashCode() { 25 | return Arrays.hashCode(this.data); 26 | } 27 | 28 | @Override 29 | public String toString() { 30 | return ByteArrayPermutations.permutationToIntegerString(this.data); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /SCDP/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /SCDP/src/test/resources/small.csv: -------------------------------------------------------------------------------- 1 | 0,0,0,0,a 2 | 1,1,0,,b 3 | 2,1,0,0,c 4 | 3,3,0,,a 5 | 4,3,0,0,abc 6 | 5,5,0,,ab 7 | 6,5,0,0,abc 8 | 7,7,0,,d 9 | 9,8,0,0,d 10 | 10,7,0,,d -------------------------------------------------------------------------------- /SPIDER/SPIDERAlgorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /SPIDER/SPIDERAlgorithm/src/main/java/de/metanome/algorithms/spider/sorting/TPMMSTuple.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.spider.sorting; 2 | 3 | public class TPMMSTuple implements Comparable { 4 | 5 | public String value; 6 | public final int readerNumber; 7 | 8 | public TPMMSTuple(String value, int readerNumber) { 9 | this.value = value; 10 | this.readerNumber = readerNumber; 11 | } 12 | 13 | @Override 14 | public int compareTo(TPMMSTuple other) { 15 | return this.value.compareTo(other.value); 16 | } 17 | 18 | @Override 19 | public int hashCode() { 20 | return this.value.hashCode(); 21 | } 22 | 23 | @Override 24 | public boolean equals(Object obj) { 25 | if (!(obj instanceof TPMMSTuple)) 26 | return false; 27 | TPMMSTuple other = (TPMMSTuple) obj; 28 | return this.value.equals(other.value); 29 | } 30 | 31 | @Override 32 | public String toString() { 33 | return "Tuple(" + this.value + "," + this.readerNumber + ")"; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /SPIDER/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | de.metanome.algorithms.spider 5 | SPIDERModules 6 | pom 7 | 8 | SPIDERModules 9 | 10 | 11 | UTF-8 12 | 13 | 14 | 15 | de.metanome.algorithms 16 | algorithms 17 | 1.2-SNAPSHOT 18 | ../pom.xml 19 | 20 | 21 | 22 | SPIDERAlgorithm 23 | SPIDERDatabase 24 | SPIDERFile 25 | 26 | 27 | -------------------------------------------------------------------------------- /cody/cody-core/src/main/java/de/metanome/algorithms/cody/codycore/runner/BaseRunner.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.cody.codycore.runner; 2 | 3 | import de.metanome.algorithms.cody.codycore.Configuration; 4 | import de.metanome.algorithms.cody.codycore.candidate.CheckedColumnCombination; 5 | import lombok.Getter; 6 | import lombok.NonNull; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | public abstract class BaseRunner { 12 | 13 | protected final Configuration configuration; 14 | 15 | /** 16 | * Contains all maximal valid ColumnCombinations 17 | */ 18 | @Getter protected List resultSet; 19 | 20 | public BaseRunner(@NonNull Configuration configuration) { 21 | this.configuration = configuration; 22 | this.resultSet = new ArrayList<>(); 23 | } 24 | 25 | public abstract void run(); 26 | } 27 | -------------------------------------------------------------------------------- /dcfinder/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /dcfinder/src/main/java/ch/javasoft/bitset/search/ISubsetBackend.java: -------------------------------------------------------------------------------- 1 | package ch.javasoft.bitset.search; 2 | 3 | import java.util.Set; 4 | import java.util.function.Consumer; 5 | 6 | import ch.javasoft.bitset.IBitSet; 7 | 8 | public interface ISubsetBackend { 9 | 10 | boolean add(IBitSet bs); 11 | 12 | Set getAndRemoveGeneralizations(IBitSet invalidFD); 13 | 14 | boolean containsSubset(IBitSet add); 15 | 16 | void forEach(Consumer consumer); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /dcfinder/src/main/java/ch/javasoft/bitset/search/ITreeSearch.java: -------------------------------------------------------------------------------- 1 | package ch.javasoft.bitset.search; 2 | 3 | import java.util.Collection; 4 | import java.util.function.Consumer; 5 | 6 | import ch.javasoft.bitset.IBitSet; 7 | 8 | public interface ITreeSearch { 9 | 10 | boolean add(IBitSet bs); 11 | 12 | void forEachSuperSet(IBitSet bitset, Consumer consumer); 13 | 14 | void forEach(Consumer consumer); 15 | 16 | void remove(IBitSet remove); 17 | 18 | boolean containsSubset(IBitSet bitset); 19 | 20 | Collection getAndRemoveGeneralizations(IBitSet invalidDC); 21 | 22 | } -------------------------------------------------------------------------------- /dcfinder/src/main/java/de/metanome/algorithms/dcfinder/evidenceset/IEvidenceSet.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.evidenceset; 2 | 3 | import java.util.Iterator; 4 | import java.util.Set; 5 | 6 | import de.metanome.algorithms.dcfinder.predicates.sets.PredicateSet; 7 | 8 | public interface IEvidenceSet extends Iterable { 9 | 10 | boolean add(PredicateSet predicateSet); 11 | 12 | boolean add(PredicateSet create, long count); 13 | 14 | long getCount(PredicateSet predicateSet); 15 | 16 | boolean adjustCount(PredicateSet predicateSet, long amount ) ; 17 | 18 | Iterator iterator(); 19 | 20 | Set getSetOfPredicateSets(); 21 | 22 | int size(); 23 | 24 | boolean isEmpty(); 25 | 26 | } -------------------------------------------------------------------------------- /dcfinder/src/main/java/de/metanome/algorithms/dcfinder/input/partitions/clusters/TupleIDProvider.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.input.partitions.clusters; 2 | 3 | import java.util.List; 4 | import java.util.stream.IntStream; 5 | 6 | import com.google.common.collect.ImmutableList; 7 | 8 | public class TupleIDProvider { 9 | 10 | private final List tIDs; 11 | 12 | public TupleIDProvider(int size) { 13 | tIDs = IntStream.range(0, size).boxed().collect(ImmutableList.toImmutableList()); 14 | } 15 | 16 | public List gettIDs() { 17 | return tIDs; 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /dcfinder/src/main/java/de/metanome/algorithms/dcfinder/input/partitions/clusters/indexers/ITPIDsIndexer.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.input.partitions.clusters.indexers; 2 | 3 | import java.util.Collection; 4 | import java.util.List; 5 | 6 | public interface ITPIDsIndexer { 7 | 8 | public Collection getValues() ; 9 | 10 | 11 | public List getTpIDsForValue(Integer value); 12 | 13 | public int getIndexForValueThatIsLessThan(int value); 14 | 15 | } 16 | -------------------------------------------------------------------------------- /dcfinder/src/main/java/de/metanome/algorithms/dcfinder/predicates/PartitionRefiner.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.predicates; 2 | 3 | public interface PartitionRefiner { 4 | public boolean satisfies(int line1, int lin2); 5 | 6 | } 7 | -------------------------------------------------------------------------------- /dcfinder/src/main/java/de/metanome/algorithms/dcfinder/setcover/IMinimalCoverSearch.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.setcover; 2 | 3 | import de.metanome.algorithms.dcfinder.denialconstraints.DenialConstraintSet; 4 | import de.metanome.algorithms.dcfinder.evidenceset.IEvidenceSet; 5 | 6 | public interface IMinimalCoverSearch { 7 | public DenialConstraintSet getDenialConstraints(IEvidenceSet evidenceSet); 8 | } 9 | -------------------------------------------------------------------------------- /dcfinder/src/test/java/de/metanome/algorithms/dcfinder/dcfinder/AppTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcfinder.dcfinder; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /dcucc/README.md: -------------------------------------------------------------------------------- 1 | Discovery-conditional-unique-column-combination 2 | =============================================== 3 | 4 | The source repository of the DoCU algorithm 5 | 6 | The DoCu algorithm is an algorithm to discover conditional unique column combination. 7 | The algorithm is compatible to the metanome framework which can be found under https://github.com/HPI-Information-Systems/Metanome. 8 | 9 | To execute the algorithm, download the metanome deployment-0.0.2 snapshot from https://www.hpi.uni-potsdam.de/naumann/sites/metanome/files/. 10 | Copy the algorithm jar into the algorithm folder and start metanome. The algorithm should be shown in the GUI. 11 | 12 | The algorithm is a maven project and can, therefore, be created with: 13 | 14 | ```mvn package``` 15 | 16 | I developed the DoCU algorithm as part of my master's thesis. 17 | -------------------------------------------------------------------------------- /dcucc/dcucc/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | /target 3 | /.settings -------------------------------------------------------------------------------- /dcucc/dcucc/src/main/java/de/metanome/algorithms/dcucc/ConditionEntry.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcucc; 2 | 3 | import de.metanome.algorithm_helper.data_structures.ColumnCombinationBitset; 4 | 5 | import it.unimi.dsi.fastutil.longs.LongArrayList; 6 | 7 | /** 8 | * @author Jens Ehrlich 9 | */ 10 | public class ConditionEntry { 11 | 12 | public ColumnCombinationBitset condition; 13 | public LongArrayList cluster; 14 | public float coverage; 15 | public int clusterNumber; 16 | 17 | public ConditionEntry(ColumnCombinationBitset condition, LongArrayList cluster) { 18 | this.condition = new ColumnCombinationBitset(condition); 19 | this.cluster = cluster.clone(); 20 | this.coverage = (float) ((cluster.size() * 100.0) / Dcucc.numberOfTuples); 21 | } 22 | 23 | public ConditionEntry setClusterNumber(int clusterNumber) { 24 | this.clusterNumber = clusterNumber; 25 | return this; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /dcucc/dcucc/src/main/java/de/metanome/algorithms/dcucc/ConditionLatticeTraverser.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcucc; 2 | 3 | import de.metanome.algorithm_helper.data_structures.ColumnCombinationBitset; 4 | import de.metanome.algorithm_integration.AlgorithmExecutionException; 5 | 6 | /** 7 | * @author Jens Ehrlich 8 | */ 9 | public interface ConditionLatticeTraverser { 10 | 11 | public void iterateConditionLattice(ColumnCombinationBitset partialUnique) 12 | throws AlgorithmExecutionException; 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /dcucc/dcucc/src/main/java/de/metanome/algorithms/dcucc/NotAndConditionTraverser.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.dcucc; 2 | 3 | /** 4 | * @author Jens Ehrlich 5 | */ 6 | public class NotAndConditionTraverser extends AndConditionTraverser { 7 | 8 | public NotAndConditionTraverser(Dcucc algorithm) { 9 | super(algorithm); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /dcucc/dcucc_test_helper/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | /target 3 | /.settings -------------------------------------------------------------------------------- /depminer/depminer_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /depminer/depminer_helper/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /depminer/depminer_helper/src/main/java/de/metanome/algorithms/depminer/depminer_helper/modules/container/StorageSet.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.depminer.depminer_helper.modules.container; 2 | 3 | public abstract class StorageSet { 4 | 5 | @Override 6 | public String toString() { 7 | 8 | return this.toString_(); 9 | } 10 | 11 | protected abstract String toString_(); 12 | } 13 | -------------------------------------------------------------------------------- /depminer/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | de.metanome.algorithms.depminer 6 | DepMinerModules 7 | pom 8 | 9 | DepMinerModules 10 | 11 | 12 | de.metanome.algorithms 13 | algorithms 14 | 1.2-SNAPSHOT 15 | ../pom.xml 16 | 17 | 18 | 19 | UTF-8 20 | 21 | 22 | 23 | depminer_helper 24 | depminer_algorithm 25 | 26 | 27 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/approach/equivalence/EquivalenceManagedPartition.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.approach.equivalence; 2 | 3 | import fdiscovery.partitions.Partition; 4 | 5 | public class EquivalenceManagedPartition extends Partition { 6 | 7 | private static final long serialVersionUID = 3864482946944185511L; 8 | 9 | protected long hashNumber; 10 | 11 | public EquivalenceManagedPartition(int columnIndex, int numberOfColumns, int numberOfRows) { 12 | super(columnIndex, numberOfColumns, numberOfRows); 13 | } 14 | 15 | public EquivalenceManagedPartition(EquivalenceManagedPartition base, EquivalenceManagedPartition additional) { 16 | super(base, additional); 17 | } 18 | 19 | public long getHashNumber() { 20 | return hashNumber; 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/columns/AgreeSet.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.columns; 2 | 3 | import java.awt.Point; 4 | import java.util.Set; 5 | 6 | 7 | import com.google.common.collect.Sets; 8 | 9 | public class AgreeSet extends ColumnCollection { 10 | 11 | private static final long serialVersionUID = -5335032949377336772L; 12 | 13 | public AgreeSet(Set set1, Set set2, int numberOfColumns) { 14 | super(numberOfColumns); 15 | Set intersected = Sets.intersection(set1, set2); 16 | for (Point columnToIdentifier : intersected) { 17 | this.set(columnToIdentifier.x); 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/columns/DifferenceSet.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.columns; 2 | 3 | 4 | public class DifferenceSet extends ColumnCollection { 5 | 6 | private static final long serialVersionUID = -5174627424398542681L; 7 | 8 | public DifferenceSet(AgreeSet agreeSet) { 9 | super(agreeSet.getNumberOfColumns()); 10 | 11 | this.or(agreeSet); 12 | this.flip(0, this.numberOfColumns); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/columns/Path.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.columns; 2 | 3 | import java.util.ArrayList; 4 | 5 | 6 | public class Path extends ColumnCollection { 7 | 8 | private static final long serialVersionUID = -6451347203736964695L; 9 | 10 | public Path(int numberOfColumns) { 11 | super(numberOfColumns); 12 | } 13 | 14 | public ArrayList getMaximalSubsets() { 15 | ArrayList maximalSubsetPaths = new ArrayList<>(); 16 | 17 | if (this.isEmpty()) { 18 | return new ArrayList<>(); 19 | } 20 | for (int columnIndex : this.getSetBits()) { 21 | maximalSubsetPaths.add((Path)this.removeColumnCopy(columnIndex)); 22 | } 23 | 24 | return maximalSubsetPaths; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/columns/Seeds.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.columns; 2 | 3 | import java.util.PriorityQueue; 4 | 5 | public class Seeds extends PriorityQueue { 6 | 7 | private static final long serialVersionUID = 3497425762452970552L; 8 | 9 | public boolean containsSubset(Seed seed) { 10 | for (Seed seedInQueue : this) { 11 | if (seedInQueue.getIndices().isProperSubsetOf(seed.getIndices())) { 12 | return true; 13 | } 14 | } 15 | return false; 16 | } 17 | 18 | public boolean containsSuperset(Seed seed) { 19 | for (Seed seedInQueue : this) { 20 | if (seedInQueue.getIndices().isProperSupersetOf(seed.getIndices())) { 21 | return true; 22 | } 23 | } 24 | return false; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/equivalence/Equivalence.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.equivalence; 2 | 3 | import java.util.Set; 4 | 5 | public interface Equivalence extends Set { 6 | 7 | public int unassignedIdentifier = -1; 8 | 9 | public int getIdentifier(); 10 | public > boolean isProperSubset(T other); 11 | public void add(int value); 12 | } 13 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/equivalence/TEquivalence.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.equivalence; 2 | 3 | import gnu.trove.set.TIntSet; 4 | 5 | public interface TEquivalence extends TIntSet { 6 | 7 | public int unassignedIdentifier = -1; 8 | 9 | public int getIdentifier(); 10 | public boolean isProperSubset(T other); 11 | public boolean add(int value); 12 | } 13 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/fastfds/EquivalenceClass.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.fastfds; 2 | 3 | import java.awt.Point; 4 | import java.util.HashSet; 5 | import java.util.Iterator; 6 | 7 | public class EquivalenceClass extends HashSet { 8 | 9 | private static final long serialVersionUID = -1326656356702786656L; 10 | 11 | @Override 12 | public String toString() { 13 | StringBuilder outputBuilder = new StringBuilder(); 14 | for (Iterator it = this.iterator(); it.hasNext(); ) { 15 | Point identifier = it.next(); 16 | outputBuilder.append(String.format("(%s,%d),", Character.valueOf((char)(identifier.x + 65)), Integer.valueOf(identifier.y))); 17 | } 18 | 19 | return outputBuilder.toString(); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/general/CollectionSet.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.general; 2 | 3 | import java.util.Iterator; 4 | import java.util.TreeSet; 5 | 6 | public class CollectionSet> extends TreeSet implements Comparable> { 7 | 8 | private static final long serialVersionUID = 1839773136406309404L; 9 | 10 | @Override 11 | public int compareTo(TreeSet other) { 12 | TreeSet set = other; 13 | Iterator iterThis = iterator(); 14 | Iterator iterOther = set.iterator(); 15 | 16 | while (iterThis.hasNext() && iterOther.hasNext()) { 17 | T first = iterThis.next(); 18 | T second = iterOther.next(); 19 | int cmp = first.compareTo(second); 20 | if (cmp == 0) { 21 | continue; 22 | } 23 | return cmp; 24 | } 25 | if (iterThis.hasNext()) { 26 | return 1; 27 | } 28 | if (iterOther.hasNext()) { 29 | return -1; 30 | } 31 | return 0; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/pruning/Observation.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.pruning; 2 | 3 | public enum Observation { 4 | DEPENDENCY, 5 | NON_DEPENDENCY, 6 | MINIMAL_DEPENDENCY, 7 | CANDIDATE_MINIMAL_DEPENDENCY, 8 | MAXIMAL_NON_DEPENDENCY, 9 | CANDIDATE_MAXIMAL_NON_DEPENDENCY, 10 | EQUIVALENT; 11 | 12 | public boolean isCandidate() { 13 | return this == CANDIDATE_MAXIMAL_NON_DEPENDENCY || this == CANDIDATE_MINIMAL_DEPENDENCY; 14 | } 15 | 16 | public boolean isDependency() { 17 | return this == DEPENDENCY || this == MINIMAL_DEPENDENCY || this == CANDIDATE_MINIMAL_DEPENDENCY; 18 | } 19 | 20 | public boolean isNonDependency() { 21 | return this == NON_DEPENDENCY || this == MAXIMAL_NON_DEPENDENCY || this == CANDIDATE_MAXIMAL_NON_DEPENDENCY; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /dfd/dfdAlgorithm/src/fdiscovery/pruning/PruneInterface.java: -------------------------------------------------------------------------------- 1 | package fdiscovery.pruning; 2 | 3 | import fdiscovery.columns.ColumnCollection; 4 | 5 | public interface PruneInterface { 6 | 7 | public static final int SPLIT_THRESHOLD = 1000; 8 | 9 | public void rebalance(); 10 | public void rebalanceGroup(ColumnCollection groupKey); 11 | 12 | } 13 | -------------------------------------------------------------------------------- /ducc/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /ducc/ducc_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /ducc/ducc_algorithm_helper/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .classpath 3 | /.settings 4 | -------------------------------------------------------------------------------- /ducc/ducc_for_metanome/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /ducc/ducc_test_helper/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | /target 3 | /.settings -------------------------------------------------------------------------------- /fastfds/fastfds_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fastfds/fastfds_algorithm/src/main/java/de/metanome/algorithms/fastfds/modules/container/DifferenceSet.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.fastfds.modules.container; 2 | 3 | import java.util.BitSet; 4 | 5 | import de.metanome.algorithms.fastfds.fastfds_helper.modules.container.AgreeSet; 6 | import de.metanome.algorithms.fastfds.fastfds_helper.util.BitSetUtil; 7 | 8 | public class DifferenceSet extends AgreeSet { 9 | 10 | public DifferenceSet(BitSet obs) { 11 | 12 | this.attributes = obs; 13 | } 14 | 15 | public DifferenceSet() { 16 | 17 | this(new BitSet()); 18 | } 19 | 20 | @Override 21 | public String toString_() { 22 | 23 | return "diff(" + BitSetUtil.convertToIntList(this.attributes).toString() 24 | + ")"; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /fastfds/fastfds_helper/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fastfds/fastfds_helper/src/main/java/de/metanome/algorithms/fastfds/fastfds_helper/modules/container/StorageSet.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.fastfds.fastfds_helper.modules.container; 2 | 3 | public abstract class StorageSet { 4 | 5 | @Override 6 | public String toString() { 7 | 8 | return this.toString_(); 9 | } 10 | 11 | protected abstract String toString_(); 12 | } 13 | -------------------------------------------------------------------------------- /fastfds/fastfds_helper/src/main/java/de/metanome/algorithms/fastfds/fastfds_helper/util/BitSetUtil.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.fastfds.fastfds_helper.util; 2 | 3 | import java.util.BitSet; 4 | 5 | import it.unimi.dsi.fastutil.ints.IntArrayList; 6 | import it.unimi.dsi.fastutil.ints.IntList; 7 | 8 | public class BitSetUtil { 9 | 10 | public static IntList convertToIntList(BitSet set) { 11 | IntList bits = new IntArrayList(); 12 | int lastIndex = set.nextSetBit(0); 13 | while (lastIndex != -1) { 14 | bits.add(lastIndex); 15 | lastIndex = set.nextSetBit(lastIndex + 1); 16 | } 17 | return bits; 18 | } 19 | 20 | public static BitSet convertToBitSet(IntList list) { 21 | BitSet set = new BitSet(list.size()); 22 | for (int l : list) { 23 | set.set(l); 24 | } 25 | return set; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /fastfds/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | de.metanome.algorithms.fastfds 6 | FastFDsModules 7 | pom 8 | 9 | FastFDsModules 10 | 11 | 12 | de.metanome.algorithms 13 | algorithms 14 | 1.2-SNAPSHOT 15 | ../pom.xml 16 | 17 | 18 | 19 | UTF-8 20 | 21 | 22 | 23 | fastfds_helper 24 | fastfds_algorithm 25 | 26 | 27 | -------------------------------------------------------------------------------- /fdep/fdep_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fdep/fdep_algorithm_improved/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fdep/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | de.metanome.algorithms.fdep 6 | FDepModules 7 | pom 8 | 9 | FDepModules 10 | 11 | 12 | de.metanome.algorithms 13 | algorithms 14 | 1.2-SNAPSHOT 15 | ../pom.xml 16 | 17 | 18 | 19 | UTF-8 20 | 21 | 22 | 23 | fdep_algorithm 24 | fdep_algorithm_improved 25 | 26 | 27 | -------------------------------------------------------------------------------- /fdmine/fdmine_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fdmine/fdmine_test_helper/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | /target 3 | /.settings -------------------------------------------------------------------------------- /fdmine/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | de.metanome.algorithms.fdmine 6 | FDMineModules 7 | pom 8 | 9 | FDMineModules 10 | 11 | 12 | de.metanome.algorithms 13 | algorithms 14 | 1.2-SNAPSHOT 15 | ../pom.xml 16 | 17 | 18 | 19 | UTF-8 20 | 21 | 22 | 23 | fdmine_test_helper 24 | fdmine_algorithm 25 | 26 | 27 | -------------------------------------------------------------------------------- /fun/.gitignore: -------------------------------------------------------------------------------- 1 | /.settings 2 | .project 3 | /target 4 | -------------------------------------------------------------------------------- /fun/fun_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fun/fun_algorithm_helper/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | .classpath 3 | /.settings 4 | -------------------------------------------------------------------------------- /fun/fun_for_metanome/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /fun/fun_test_helper/.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | /target 3 | /.settings -------------------------------------------------------------------------------- /hydra/src/main/java/ch/javasoft/bitset/search/ISubsetBackend.java: -------------------------------------------------------------------------------- 1 | package ch.javasoft.bitset.search; 2 | 3 | import java.util.Set; 4 | import java.util.function.Consumer; 5 | 6 | import ch.javasoft.bitset.IBitSet; 7 | 8 | public interface ISubsetBackend { 9 | 10 | boolean add(IBitSet bs); 11 | 12 | Set getAndRemoveGeneralizations(IBitSet invalidFD); 13 | 14 | boolean containsSubset(IBitSet add); 15 | 16 | void forEach(Consumer consumer); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /hydra/src/main/java/ch/javasoft/bitset/search/ITreeSearch.java: -------------------------------------------------------------------------------- 1 | package ch.javasoft.bitset.search; 2 | 3 | import java.util.Collection; 4 | import java.util.function.Consumer; 5 | 6 | import ch.javasoft.bitset.IBitSet; 7 | 8 | public interface ITreeSearch { 9 | 10 | boolean add(IBitSet bs); 11 | 12 | void forEachSuperSet(IBitSet bitset, Consumer consumer); 13 | 14 | void forEach(Consumer consumer); 15 | 16 | void remove(IBitSet remove); 17 | 18 | boolean containsSubset(IBitSet bitset); 19 | 20 | Collection getAndRemoveGeneralizations(IBitSet invalidDC); 21 | 22 | } -------------------------------------------------------------------------------- /hydra/src/main/java/de/hpi/naumann/dc/evidenceset/IEvidenceSet.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.evidenceset; 2 | 3 | import java.util.Iterator; 4 | import java.util.Set; 5 | 6 | import de.hpi.naumann.dc.predicates.sets.PredicateBitSet; 7 | 8 | public interface IEvidenceSet extends Iterable { 9 | 10 | boolean add(PredicateBitSet predicateSet); 11 | 12 | boolean add(PredicateBitSet create, long count); 13 | 14 | long getCount(PredicateBitSet predicateSet); 15 | 16 | Iterator iterator(); 17 | 18 | Set getSetOfPredicateSets(); 19 | 20 | int size(); 21 | 22 | boolean isEmpty(); 23 | 24 | } -------------------------------------------------------------------------------- /hydra/src/main/java/de/hpi/naumann/dc/predicates/PartitionRefiner.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.predicates; 2 | 3 | public interface PartitionRefiner { 4 | public boolean satisfies(int line1, int lin2); 5 | } 6 | -------------------------------------------------------------------------------- /hydra/src/main/java/de/hpi/naumann/dc/predicates/sets/PredicateSetFactory.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.predicates.sets; 2 | 3 | import ch.javasoft.bitset.IBitSet; 4 | import de.hpi.naumann.dc.predicates.Predicate; 5 | 6 | public class PredicateSetFactory { 7 | 8 | public static PredicateBitSet create(Predicate... predicates) { 9 | PredicateBitSet set = new PredicateBitSet(); 10 | for (Predicate p : predicates) 11 | set.add(p); 12 | return set; 13 | } 14 | 15 | public static PredicateBitSet create(IBitSet bitset) { 16 | return new PredicateBitSet(bitset); 17 | } 18 | 19 | public static PredicateBitSet create(PredicateBitSet pS) { 20 | return new PredicateBitSet(pS); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /hydra/src/test/java/de/hpi/naumann/dc/helpers/ParserHelperTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.helpers; 2 | 3 | import static org.junit.Assert.assertFalse; 4 | import static org.junit.Assert.assertTrue; 5 | 6 | import org.junit.Test; 7 | 8 | public class ParserHelperTest { 9 | @Test 10 | public void testIsDouble() { 11 | assertTrue(ParserHelper.isDouble("1.0")); 12 | assertFalse(ParserHelper.isDouble("Z2")); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /hydra/src/test/java/de/hpi/naumann/dc/input/ColumnTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.input; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class ColumnTest { 7 | 8 | @Test 9 | public void testToString() { 10 | Column c = new Column("relation", "test"); 11 | Assert.assertEquals("relation.test", c.toString()); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /hydra/src/test/java/de/hpi/naumann/dc/predicates/PredicateTest.java: -------------------------------------------------------------------------------- 1 | package de.hpi.naumann.dc.predicates; 2 | 3 | public class PredicateTest { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /set-version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ]; then 4 | echo "This script changes the Metanome version and sets the very same version for the here contained algorithms." 5 | echo "Usage: $0 " 6 | exit 1 7 | fi 8 | 9 | # Change the metanome.version variable for Metanome dependencies. 10 | basedir="$(cd "$(dirname "$0")"; pwd)" 11 | sed -e "s/.*<\/metanome.version>/$1<\/metanome.version>/" "$basedir/pom.xml" > "$basedir/pom.xml.tmp" 12 | mv "$basedir/pom.xml.tmp" "$basedir/pom.xml" 13 | 14 | # Update the version of the algorithms. 15 | mvn versions:set "-DnewVersion=$1" -DgenerateBackupPoms=false 16 | -------------------------------------------------------------------------------- /tane/tane_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /tane/tane_algorithm_helper/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /tane/tane_tree_dir_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /tane/tane_tree_end_algorithm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.settings 3 | .classpath 4 | .project 5 | -------------------------------------------------------------------------------- /tireless/src/main/java/de/metanome/algorithms/tireless/Main.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless; 2 | 3 | import de.metanome.algorithm_integration.AlgorithmExecutionException; 4 | 5 | import java.io.IOException; 6 | 7 | public class Main { 8 | 9 | public static void main(String[] args) throws IOException, AlgorithmExecutionException { 10 | TirelessAlgorithmLocal local = new TirelessAlgorithmLocal(); 11 | local.execute(); 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /tireless/src/main/java/de/metanome/algorithms/tireless/algorithm/Alignment.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless.algorithm; 2 | 3 | import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpressionConjunction; 4 | 5 | public abstract class Alignment { 6 | protected RegularExpressionConjunction left; 7 | protected RegularExpressionConjunction right; 8 | 9 | public Alignment(RegularExpressionConjunction left, RegularExpressionConjunction right) { 10 | this.left = left; 11 | this.right = right; 12 | } 13 | 14 | public abstract RegularExpressionConjunction mergeExpressions(); 15 | } 16 | -------------------------------------------------------------------------------- /tireless/src/main/java/de/metanome/algorithms/tireless/postprocessing/CombinedPostprocessor.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless.postprocessing; 2 | 3 | import de.metanome.algorithms.tireless.preprocessing.AlgorithmConfiguration; 4 | import de.metanome.algorithms.tireless.preprocessing.alphabet.Alphabet; 5 | import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpressionConjunction; 6 | 7 | public class CombinedPostprocessor { 8 | 9 | public CombinedPostprocessor (RegularExpressionConjunction expression, Alphabet alphabet, 10 | AlgorithmConfiguration configuration) { 11 | new GeneralizeCharClasses(expression, alphabet, configuration).generalizeCharacterClasses(); 12 | new CombineSimilarCharClasses(expression).combineClasses(); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tireless/src/main/java/de/metanome/algorithms/tireless/regularexpression/RegularExpressionComparator.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless.regularexpression; 2 | 3 | import de.metanome.algorithms.tireless.regularexpression.containerclasses.RegularExpression; 4 | 5 | import java.util.Comparator; 6 | 7 | public class RegularExpressionComparator implements Comparator { 8 | @Override 9 | public int compare(RegularExpression o1, RegularExpression o2) { 10 | return Integer.compare(o1.getLength(), o2.getLength()); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /tireless/src/main/java/de/metanome/algorithms/tireless/regularexpression/containerclasses/ExpressionType.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless.regularexpression.containerclasses; 2 | 3 | public enum ExpressionType { 4 | CHARACTER_CLASS, TOKEN, CONJUNCTION, DISJUNCTION_OF_TOKENS 5 | } 6 | -------------------------------------------------------------------------------- /tireless/src/test/java/de/metanome/algorithms/tireless/algorithm/CombinedAlgorithmTest.java: -------------------------------------------------------------------------------- 1 | package de.metanome.algorithms.tireless.algorithm; 2 | 3 | public class CombinedAlgorithmTest { 4 | } 5 | -------------------------------------------------------------------------------- /tireless/src/test/resources/test_input_with_duplicates.csv: -------------------------------------------------------------------------------- 1 | a 2 | a 3 | b 4 | a 5 | b 6 | a 7 | b 8 | b 9 | -------------------------------------------------------------------------------- /tireless/src/test/resources/test_input_without_header.csv: -------------------------------------------------------------------------------- 1 | , , c 2 | "a,b","c 3 | d","\"e\"" 4 | -------------------------------------------------------------------------------- /utils/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | -------------------------------------------------------------------------------- /utils/src/main/java/de/uni_potsdam/hpi/utils/LoggingUtils.java: -------------------------------------------------------------------------------- 1 | package de.uni_potsdam.hpi.utils; 2 | 3 | import java.util.logging.Handler; 4 | import java.util.logging.Level; 5 | import java.util.logging.Logger; 6 | 7 | public class LoggingUtils { 8 | 9 | public static void disableLogging() { 10 | // Disable Logging (FastSet sometimes complains about skewed key distributions with lots of WARNINGs) 11 | Logger root = Logger.getLogger(""); 12 | Handler[] handlers = root.getHandlers(); 13 | for (Handler handler : handlers) 14 | handler.setLevel(Level.OFF); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /utils/src/main/java/de/uni_potsdam/hpi/utils/MeasurementUtils.java: -------------------------------------------------------------------------------- 1 | package de.uni_potsdam.hpi.utils; 2 | 3 | public class MeasurementUtils { 4 | 5 | public static long sizeOf32(String s) { 6 | long bytes = 40 + 2 * s.length(); 7 | 8 | bytes = (long)(8 * (Math.ceil(bytes / 8))); 9 | 10 | return bytes; 11 | } 12 | 13 | public static long sizeOf64(String s) { 14 | long bytes = 64 + 2 * s.length(); 15 | 16 | bytes = (long)(8 * (Math.ceil(bytes / 8))); 17 | 18 | return bytes; 19 | } 20 | } 21 | --------------------------------------------------------------------------------