├── README ├── .gitignore ├── code ├── .gitignore ├── EpisodesParser │ ├── Tests │ │ ├── Tests.cpp │ │ ├── Tests.pro │ │ ├── TestParser.h │ │ └── TestParser.cpp │ ├── EpisodeDurationDiscretizer.h │ ├── EpisodesParser.pri │ ├── EpisodesSpeeds.csv │ ├── EpisodeDurationDiscretizer.cpp │ ├── Parser.h │ ├── typedefs.cpp │ └── typedefs.h ├── Analytics │ ├── Tests │ │ ├── TestFPTree.h │ │ ├── TestTiltedTimeWindow.h │ │ ├── TestPatternTree.h │ │ ├── TestFPGrowth.h │ │ ├── TestRuleMiner.h │ │ ├── Tests.pro │ │ ├── TestFPStream.h │ │ ├── Tests.cpp │ │ ├── TestRuleMiner.cpp │ │ ├── TestFPTree.cpp │ │ ├── TestFPGrowth.cpp │ │ ├── TestPatternTree.cpp │ │ ├── TestTiltedTimeWindow.cpp │ │ └── TestFPStream.cpp │ ├── Analytics.pri │ ├── RuleMiner.h │ ├── FPTree.h │ ├── TiltedTimeWindow.h │ ├── PatternTree.h │ ├── Constraints.h │ ├── FPGrowth.h │ ├── FPStream.h │ ├── Analyst.h │ ├── Item.cpp │ ├── FPNode.h │ ├── Item.h │ ├── PatternTree.cpp │ ├── FPTree.cpp │ ├── TiltedTimeWindow.cpp │ ├── RuleMiner.cpp │ └── Constraints.cpp ├── WPOAnalytics.pro ├── UI │ ├── UI.pri │ ├── ConceptHierarchyCompleter.h │ ├── CausesTableFilterProxyModel.h │ ├── SettingsDialog.h │ ├── ConceptHierarchyCompleter.cpp │ ├── CausesTableFilterProxyModel.cpp │ ├── MainWindow.h │ └── SettingsDialog.cpp ├── main.cpp └── UNLICENSE ├── docs ├── presentation 2010-03-02 │ ├── concept.png │ ├── concept.graffle │ ├── presentation.key │ └── presentation.pdf ├── presentation 2011-02-08 │ ├── presentation.key │ └── presentation.pdf ├── defense presentation │ ├── defense presentation.pdf │ └── defense presentation without movies.key ├── report │ └── figures │ │ ├── OLAP │ │ ├── data cube │ │ │ ├── ddc growth.png │ │ │ ├── ddc levels.png │ │ │ ├── prefix sum.png │ │ │ ├── ddc partitioning.png │ │ │ ├── iris-histogram-1D.png │ │ │ ├── iris-histogram-2D.png │ │ │ ├── annotated data cube.png │ │ │ ├── relative prefix sum.png │ │ │ ├── data cube generalization.png │ │ │ ├── relative prefix sum update.png │ │ │ ├── iris dataset data cube representation.png │ │ │ └── relative prefix sum overlay calculation.png │ │ └── stream cube │ │ │ └── critical layers.png │ │ ├── anomaly detection │ │ ├── example.png │ │ ├── vilalta-ma.png │ │ ├── collective anomaly example.png │ │ └── contextual anomaly example.png │ │ ├── implementation │ │ ├── 4. compared.png │ │ ├── 1. analysis done.png │ │ ├── 2. sorted and filtered.png │ │ └── 3. concept hierarchy autocomplete.png │ │ ├── mining │ │ └── concept hierarchy.png │ │ ├── stream mining │ │ ├── fp-stream pattern-tree.png │ │ ├── fp-stream frequent patterns.png │ │ ├── plc vs lc error bound ccdf.png │ │ ├── fp-stream natural tilted-time window model.png │ │ └── fp-stream tilted-time windows embedded in pattern-tree.png │ │ └── motivation │ │ ├── episodes analysis - episodes.png │ │ └── episodes analysis - page loading performance.png ├── proposal │ └── images │ │ ├── episodes analysis - episodes o.png │ │ ├── episodes analysis - episodes.png │ │ └── episodes analysis - page loading performance.png └── literature study presentation │ └── Literature study presentation.key └── .gitmodules /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *build-desktop 2 | *.pro.user.* 3 | config 4 | -------------------------------------------------------------------------------- /code/.gitignore: -------------------------------------------------------------------------------- 1 | EpisodesParser-build-desktop 2 | *.pro.user 3 | -------------------------------------------------------------------------------- /docs/presentation 2010-03-02/concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2010-03-02/concept.png -------------------------------------------------------------------------------- /docs/presentation 2010-03-02/concept.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2010-03-02/concept.graffle -------------------------------------------------------------------------------- /docs/presentation 2010-03-02/presentation.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2010-03-02/presentation.key -------------------------------------------------------------------------------- /docs/presentation 2010-03-02/presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2010-03-02/presentation.pdf -------------------------------------------------------------------------------- /docs/presentation 2011-02-08/presentation.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2011-02-08/presentation.key -------------------------------------------------------------------------------- /docs/presentation 2011-02-08/presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/presentation 2011-02-08/presentation.pdf -------------------------------------------------------------------------------- /docs/defense presentation/defense presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/defense presentation/defense presentation.pdf -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/ddc growth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/ddc growth.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/ddc levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/ddc levels.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/prefix sum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/prefix sum.png -------------------------------------------------------------------------------- /docs/report/figures/anomaly detection/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/anomaly detection/example.png -------------------------------------------------------------------------------- /docs/report/figures/implementation/4. compared.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/implementation/4. compared.png -------------------------------------------------------------------------------- /docs/report/figures/mining/concept hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/mining/concept hierarchy.png -------------------------------------------------------------------------------- /docs/report/figures/anomaly detection/vilalta-ma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/anomaly detection/vilalta-ma.png -------------------------------------------------------------------------------- /docs/proposal/images/episodes analysis - episodes o.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/proposal/images/episodes analysis - episodes o.png -------------------------------------------------------------------------------- /docs/proposal/images/episodes analysis - episodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/proposal/images/episodes analysis - episodes.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/ddc partitioning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/ddc partitioning.png -------------------------------------------------------------------------------- /docs/report/figures/implementation/1. analysis done.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/implementation/1. analysis done.png -------------------------------------------------------------------------------- /code/EpisodesParser/Tests/Tests.cpp: -------------------------------------------------------------------------------- 1 | #include "TestParser.h" 2 | 3 | int main() { 4 | TestParser parser; 5 | QTest::qExec(&parser); 6 | 7 | return 0; 8 | } 9 | -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/iris-histogram-1D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/iris-histogram-1D.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/iris-histogram-2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/iris-histogram-2D.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/stream cube/critical layers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/stream cube/critical layers.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/annotated data cube.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/annotated data cube.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/relative prefix sum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/relative prefix sum.png -------------------------------------------------------------------------------- /docs/report/figures/stream mining/fp-stream pattern-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/stream mining/fp-stream pattern-tree.png -------------------------------------------------------------------------------- /docs/report/figures/implementation/2. sorted and filtered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/implementation/2. sorted and filtered.png -------------------------------------------------------------------------------- /docs/defense presentation/defense presentation without movies.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/defense presentation/defense presentation without movies.key -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/data cube generalization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/data cube generalization.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/relative prefix sum update.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/relative prefix sum update.png -------------------------------------------------------------------------------- /docs/report/figures/motivation/episodes analysis - episodes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/motivation/episodes analysis - episodes.png -------------------------------------------------------------------------------- /docs/report/figures/stream mining/fp-stream frequent patterns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/stream mining/fp-stream frequent patterns.png -------------------------------------------------------------------------------- /docs/report/figures/stream mining/plc vs lc error bound ccdf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/stream mining/plc vs lc error bound ccdf.png -------------------------------------------------------------------------------- /docs/literature study presentation/Literature study presentation.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/literature study presentation/Literature study presentation.key -------------------------------------------------------------------------------- /docs/proposal/images/episodes analysis - page loading performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/proposal/images/episodes analysis - page loading performance.png -------------------------------------------------------------------------------- /docs/report/figures/anomaly detection/collective anomaly example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/anomaly detection/collective anomaly example.png -------------------------------------------------------------------------------- /docs/report/figures/anomaly detection/contextual anomaly example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/anomaly detection/contextual anomaly example.png -------------------------------------------------------------------------------- /docs/report/figures/implementation/3. concept hierarchy autocomplete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/implementation/3. concept hierarchy autocomplete.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/iris dataset data cube representation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/iris dataset data cube representation.png -------------------------------------------------------------------------------- /docs/report/figures/OLAP/data cube/relative prefix sum overlay calculation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/OLAP/data cube/relative prefix sum overlay calculation.png -------------------------------------------------------------------------------- /docs/report/figures/motivation/episodes analysis - page loading performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/motivation/episodes analysis - page loading performance.png -------------------------------------------------------------------------------- /docs/report/figures/stream mining/fp-stream natural tilted-time window model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/stream mining/fp-stream natural tilted-time window model.png -------------------------------------------------------------------------------- /docs/report/figures/stream mining/fp-stream tilted-time windows embedded in pattern-tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wimleers/master-thesis/HEAD/docs/report/figures/stream mining/fp-stream tilted-time windows embedded in pattern-tree.png -------------------------------------------------------------------------------- /code/EpisodesParser/Tests/Tests.pro: -------------------------------------------------------------------------------- 1 | DEPENDPATH += .. 2 | include (../EpisodesParser.pri) 3 | 4 | CONFIG += qtestlib 5 | macx { 6 | CONFIG -= app_bundle 7 | } 8 | TARGET = Tests 9 | 10 | 11 | HEADERS += TestParser.h 12 | SOURCES += Tests.cpp \ 13 | TestParser.cpp 14 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestFPTree.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTFPTREE_H 2 | #define TESTFPTREE_H 3 | 4 | #include 5 | #include 6 | #include "../FPTree.h" 7 | 8 | using namespace Analytics; 9 | 10 | class TestFPTree: public QObject { 11 | Q_OBJECT 12 | 13 | private slots: 14 | void basic(); 15 | }; 16 | 17 | #endif // TESTFPTREE_H 18 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestTiltedTimeWindow.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTTILTEDTIMEWINDOW_H 2 | #define TESTTILTEDTIMEWINDOW_H 3 | 4 | #include 5 | #include "../TiltedTimeWindow.h" 6 | 7 | using namespace Analytics; 8 | 9 | class TestTiltedTimeWindow : public QObject { 10 | Q_OBJECT 11 | 12 | private slots: 13 | void basic(); 14 | }; 15 | 16 | #endif // TESTTILTEDTIMEWINDOW_H 17 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestPatternTree.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTPATTERNTREE_H 2 | #define TESTPATTERNTREE_H 3 | 4 | #include 5 | #include "../PatternTree.h" 6 | 7 | using namespace Analytics; 8 | 9 | class TestPatternTree : public QObject { 10 | Q_OBJECT 11 | 12 | private slots: 13 | void basic(); 14 | void additionsRemainInSync(); 15 | }; 16 | 17 | #endif // TESTPATTERNTREE_H 18 | -------------------------------------------------------------------------------- /code/WPOAnalytics.pro: -------------------------------------------------------------------------------- 1 | DEPENDPATH += EpisodesParser \ 2 | Analytics \ 3 | UI 4 | include("EpisodesParser/EpisodesParser.pri") 5 | include("Analytics/Analytics.pri") 6 | include("UI/UI.pri") 7 | 8 | 9 | # Enable compiler optimizations when building in release mode. 10 | QMAKE_CXXFLAGS_RELEASE = -O3 \ 11 | -funroll-loops \ 12 | -fstrict-aliasing 13 | 14 | SOURCES += main.cpp 15 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "code/EpisodesParser/QCachingLocale"] 2 | path = code/EpisodesParser/QCachingLocale 3 | url = git://github.com/wimleers/QCachingLocale.git 4 | [submodule "code/EpisodesParser/QBrowsCap"] 5 | path = code/EpisodesParser/QBrowsCap 6 | url = git@github.com:wimleers/QBrowsCap.git 7 | [submodule "code/EpisodesParser/QGeoIP"] 8 | path = code/EpisodesParser/QGeoIP 9 | url = git@github.com:wimleers/QGeoIP.git 10 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestFPGrowth.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTFPGROWTH_H 2 | #define TESTFPGROWTH_H 3 | 4 | #include 5 | #include 6 | #include "../FPGrowth.h" 7 | 8 | using namespace Analytics; 9 | 10 | class TestFPGrowth : public QObject { 11 | Q_OBJECT 12 | 13 | private slots: 14 | // void initTestCase() {} 15 | // void cleanupTestCase() {} 16 | // void init(); 17 | // void cleanup(); 18 | void basic(); 19 | void withConstraints(); 20 | }; 21 | 22 | #endif // TESTFPGROWTH_H 23 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestRuleMiner.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTRULEMINER_H 2 | #define TESTRULEMINER_H 3 | 4 | #include 5 | #include 6 | #include "../FPGrowth.h" 7 | #include "../Ruleminer.h" 8 | 9 | using namespace Analytics; 10 | 11 | class TestRuleMiner : public QObject { 12 | Q_OBJECT 13 | 14 | private slots: 15 | // void initTestCase() {} 16 | // void cleanupTestCase() {} 17 | // void init(); 18 | // void cleanup(); 19 | void basic(); 20 | }; 21 | 22 | #endif // TESTRULEMINER_H 23 | -------------------------------------------------------------------------------- /code/EpisodesParser/Tests/TestParser.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTPARSER_H 2 | #define TESTPARSER_H 3 | 4 | #include 5 | #include 6 | #include "../Parser.h" 7 | 8 | using namespace EpisodesParser; 9 | 10 | class TestParser: public QObject { 11 | Q_OBJECT 12 | 13 | private slots: 14 | // void initTestCase() {} 15 | // void cleanupTestCase() {} 16 | void init(); 17 | void cleanup(); 18 | void parse(); 19 | void mapLineToEpisodesLogLine_data(); 20 | void mapLineToEpisodesLogLine(); 21 | }; 22 | 23 | #endif // TESTPARSER_H 24 | -------------------------------------------------------------------------------- /code/Analytics/Tests/Tests.pro: -------------------------------------------------------------------------------- 1 | DEPENDPATH += .. 2 | include (../Analytics.pri) 3 | 4 | CONFIG += qtestlib 5 | macx { 6 | CONFIG -= app_bundle 7 | } 8 | TARGET = Tests 9 | 10 | 11 | HEADERS += TestFPTree.h \ 12 | TestFPGrowth.h \ 13 | TestRuleMiner.h \ 14 | TestTiltedTimeWindow.h \ 15 | TestPatternTree.h \ 16 | TestFPStream.h 17 | SOURCES += Tests.cpp \ 18 | TestFPTree.cpp \ 19 | TestFPGrowth.cpp \ 20 | TestRuleMiner.cpp \ 21 | TestTiltedTimeWindow.cpp \ 22 | TestPatternTree.cpp \ 23 | TestFPStream.cpp 24 | -------------------------------------------------------------------------------- /code/UI/UI.pri: -------------------------------------------------------------------------------- 1 | QT += core gui 2 | 3 | INCLUDEPATH += $${PWD} 4 | 5 | SOURCES += \ 6 | $${PWD}/MainWindow.cpp \ 7 | $${PWD}/ConceptHierarchyCompleter.cpp \ 8 | $${PWD}/CausesTableFilterProxyModel.cpp \ 9 | $${PWD}/SettingsDialog.cpp 10 | HEADERS += \ 11 | $${PWD}/MainWindow.h \ 12 | $${PWD}/ConceptHierarchyCompleter.h \ 13 | $${PWD}/CausesTableFilterProxyModel.h \ 14 | $${PWD}/SettingsDialog.h 15 | 16 | # Disable qDebug() output when in release mode. 17 | CONFIG(release, debug|release):DEFINES += QT_NO_DEBUG_OUTPUT 18 | 19 | # Add a DEBUG define when in debug mode. 20 | CONFIG(debug, debug|release):DEFINES += DEBUG 21 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestFPStream.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTFPSTREAM_H 2 | #define TESTFPSTREAM_H 3 | 4 | #include 5 | #include "../FPStream.h" 6 | 7 | using namespace Analytics; 8 | 9 | class TestFPStream : public QObject { 10 | Q_OBJECT 11 | 12 | private slots: 13 | void calculateDroppableTail(); 14 | void basic(); 15 | 16 | private: 17 | void verifyNode(const PatternTree & patternTree, 18 | const FPNode * const node, 19 | ItemID itemID, 20 | unsigned int nodeID, 21 | const ItemIDList & referencePattern, 22 | const QVector & referenceBuckets); 23 | }; 24 | 25 | #endif // TESTFPSTREAM_H 26 | -------------------------------------------------------------------------------- /code/Analytics/Tests/Tests.cpp: -------------------------------------------------------------------------------- 1 | #include "TestFPTree.h" 2 | #include "TestFPGrowth.h" 3 | #include "TestRuleMiner.h" 4 | #include "TestTiltedTimeWindow.h" 5 | #include "TestPatternTree.h" 6 | #include "TestFPStream.h" 7 | 8 | int main() { 9 | TestFPTree FPTree; 10 | QTest::qExec(&FPTree); 11 | 12 | TestFPGrowth FPGrowth; 13 | QTest::qExec(&FPGrowth); 14 | 15 | TestRuleMiner ruleMiner; 16 | QTest::qExec(&ruleMiner); 17 | 18 | // FP-Stream related classes & tests. 19 | TestTiltedTimeWindow tiltedTimeWindow; 20 | QTest::qExec(&tiltedTimeWindow); 21 | 22 | TestPatternTree patternTree; 23 | QTest::qExec(&patternTree); 24 | 25 | TestFPStream FPStream; 26 | QTest::qExec(&FPStream); 27 | 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /code/EpisodesParser/EpisodeDurationDiscretizer.h: -------------------------------------------------------------------------------- 1 | #ifndef EPISODEDURATIONDISCRETIZER_H 2 | #define EPISODEDURATIONDISCRETIZER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "typedefs.h" 10 | 11 | namespace EpisodesParser { 12 | class EpisodeDurationDiscretizer { 13 | public: 14 | EpisodeDurationDiscretizer(); 15 | bool parseCsvFile(const QString & csvFile); 16 | EpisodeSpeed mapToSpeed(const EpisodeName & name, const EpisodeDuration & duration) const; 17 | 18 | private: 19 | QString csvFile; 20 | QMap > thresholds; 21 | }; 22 | } 23 | 24 | #endif // EPISODEDURATIONDISCRETIZER_H 25 | -------------------------------------------------------------------------------- /code/UI/ConceptHierarchyCompleter.h: -------------------------------------------------------------------------------- 1 | #ifndef CONCEPTHIERARCHYCOMPLETER 2 | #define CONCEPTHIERARCHYCOMPLETER 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class ConceptHierarchyCompleter : public QCompleter { 12 | 13 | Q_OBJECT 14 | 15 | public: 16 | ConceptHierarchyCompleter(QObject * parent = NULL); 17 | ConceptHierarchyCompleter(QAbstractItemModel * model, QObject * parent = NULL); 18 | 19 | protected: 20 | void init(); 21 | 22 | QStringList splitPath(const QString & path) const; 23 | QString pathFromIndex(const QModelIndex &index) const; 24 | 25 | QString lineageSeparator; 26 | QString entrySeparator; 27 | }; 28 | 29 | #endif // CONCEPTHIERARCHYCOMPLETER 30 | -------------------------------------------------------------------------------- /code/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "UI/MainWindow.h" 5 | #include "EpisodesParser/QCachingLocale/QCachingLocale.h" 6 | 7 | int main(int argc, char *argv[]) { 8 | QCachingLocale cl; 9 | 10 | // Merely instantiating QCachingLocale activates it. Use Q_UNUSED to prevent 11 | // compiler warnings. 12 | Q_UNUSED(cl); 13 | 14 | const int RESTART_CODE = 1000; 15 | int r; 16 | 17 | do { 18 | QApplication app(argc, argv); 19 | 20 | QCoreApplication::setOrganizationName("WimLeers"); 21 | QCoreApplication::setOrganizationDomain("wimleers.com"); 22 | QCoreApplication::setApplicationName("WPO Analytics"); 23 | 24 | MainWindow * mainWindow = new MainWindow(); 25 | mainWindow->show(); 26 | 27 | r = app.exec(); 28 | } while (r == RESTART_CODE); 29 | 30 | return r; 31 | } 32 | -------------------------------------------------------------------------------- /code/Analytics/Analytics.pri: -------------------------------------------------------------------------------- 1 | QT += core 2 | QT -= gui 3 | 4 | INCLUDEPATH += $${PWD} 5 | 6 | SOURCES += \ 7 | $${PWD}/Item.cpp \ 8 | $${PWD}/FPTree.cpp \ 9 | $${PWD}/FPGrowth.cpp\ 10 | $${PWD}/RuleMiner.cpp \ 11 | $${PWD}/Analyst.cpp \ 12 | $${PWD}/Constraints.cpp \ 13 | $${PWD}/FPStream.cpp \ 14 | $${PWD}/PatternTree.cpp \ 15 | $${PWD}/TiltedTimeWindow.cpp 16 | HEADERS += \ 17 | $${PWD}/Item.h \ 18 | $${PWD}/FPNode.h \ 19 | $${PWD}/FPTree.h \ 20 | $${PWD}/FPGrowth.h \ 21 | $${PWD}/RuleMiner.h \ 22 | $${PWD}/Analyst.h \ 23 | $${PWD}/Constraints.h \ 24 | $${PWD}/FPStream.h \ 25 | $${PWD}/PatternTree.h \ 26 | $${PWD}/TiltedTimeWindow.h 27 | 28 | # Disable qDebug() output when in release mode. 29 | CONFIG(release, debug|release):DEFINES += QT_NO_DEBUG_OUTPUT 30 | 31 | # Add a DEBUG define when in debug mode. 32 | CONFIG(debug, debug|release):DEFINES += DEBUG 33 | -------------------------------------------------------------------------------- /code/EpisodesParser/EpisodesParser.pri: -------------------------------------------------------------------------------- 1 | # The network module is necessary to be able to use the QHostAddress class. 2 | QT += core network 3 | QT -= gui 4 | 5 | INCLUDEPATH += \ 6 | $${PWD} \ 7 | $${PWD}/QCachingLocale \ 8 | $${PWD}/QBrowsCap \ 9 | $${PWD}/QGeoIP 10 | DEPENDPATH += \ 11 | $${PWD}/QCachingLocale \ 12 | $${PWD}/QBrowsCap \ 13 | $${PWD}/QGeoIP 14 | 15 | include("QBrowsCap/QBrowsCap.pri") 16 | include("QGeoIP/QGeoIP.pri") 17 | 18 | SOURCES += \ 19 | $${PWD}/Parser.cpp \ 20 | $${PWD}/typedefs.cpp \ 21 | EpisodesParser/EpisodeDurationDiscretizer.cpp 22 | 23 | HEADERS += \ 24 | $${PWD}/Parser.h \ 25 | $${PWD}/typedefs.h \ 26 | $${PWD}/QCachingLocale/QCachingLocale.h \ 27 | EpisodesParser/EpisodeDurationDiscretizer.h 28 | 29 | # Disable qDebug() output when in release mode. 30 | CONFIG(release, debug|release):DEFINES += QT_NO_DEBUG_OUTPUT 31 | 32 | # Add a DEBUG define when in debug mode. 33 | CONFIG(debug, debug|release):DEFINES += DEBUG 34 | -------------------------------------------------------------------------------- /code/UI/CausesTableFilterProxyModel.h: -------------------------------------------------------------------------------- 1 | #ifndef CAUSESTABLEFILTERPROXYMODEL_H 2 | #define CAUSESTABLEFILTERPROXYMODEL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | class CausesTableFilterProxyModel : public QSortFilterProxyModel { 12 | 13 | Q_OBJECT 14 | 15 | public: 16 | explicit CausesTableFilterProxyModel(QObject * parent = NULL); 17 | 18 | void setEpisodesColumn(int col); 19 | void setCircumstancesColumn(int col); 20 | 21 | void setEpisodeFilter(const QString & filter); 22 | void setCircumstancesFilter(const QStringList & filter); 23 | 24 | protected: 25 | bool filterAcceptsRow(int sourceRow, const QModelIndex & sourceParent) const; 26 | 27 | private: 28 | int episodesColumn; 29 | int circumstancesColumn; 30 | QRegExp episodesFilter; 31 | QList circumstancesFilter; 32 | }; 33 | 34 | #endif // CAUSESTABLEFILTERPROXYMODEL_H 35 | -------------------------------------------------------------------------------- /code/EpisodesParser/EpisodesSpeeds.csv: -------------------------------------------------------------------------------- 1 | totaltime,fast,300,acceptable,2000,slow 2 | pageready,fast,300,acceptable,2000,slow 3 | domready,fast,150,acceptable,1000,slow 4 | backend,fast,100,acceptable,500,slow 5 | frontend,fast,100,acceptable,1500,slow 6 | headerjs,fast,100,acceptable,1000,slow 7 | footerjs,fast,100,acceptable,1000,slow 8 | css,fast,100,acceptable,500,slow 9 | DrupalBehaviors,fast,100,acceptable,200,slow 10 | gaTrackerAttach,fast,10,acceptable,20,slow 11 | tabs,fast,10,acceptable,20,slow 12 | filefieldValidateAutoAttach,fast,10,acceptable,20,slow 13 | HierarchicalSelect,fast,10,acceptable,20,slow 14 | tableDrag,fast,10,acceptable,20,slow 15 | collapse,fast,10,acceptable,20,slow 16 | tableHeader,fast,10,acceptable,20,slow 17 | insert,fast,10,acceptable,20,slow 18 | ahah,fast,10,acceptable,20,slow 19 | textarea,fast,10,acceptable,20,slow 20 | ToThePointShowHideChangelog,fast,10,acceptable,20,slow 21 | attachWysiwyg,fast,10,acceptable,20,slow 22 | filefieldPreviewLinks,fast,10,acceptable,20,slow 23 | filefieldAdmin,fast,10,acceptable,20,slow 24 | filefieldButtons,fast,10,acceptable,20,slow 25 | autocomplete,fast,50,acceptable,200,slow 26 | -------------------------------------------------------------------------------- /code/UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /code/Analytics/RuleMiner.h: -------------------------------------------------------------------------------- 1 | #ifndef RULEMINER_H 2 | #define RULEMINER_H 3 | 4 | #include "Item.h" 5 | #include "Constraints.h" 6 | #include "FPGrowth.h" 7 | #include "PatternTree.h" 8 | #include 9 | 10 | 11 | namespace Analytics { 12 | 13 | #ifdef DEBUG 14 | //#define RULEMINER_DEBUG 0 15 | #endif 16 | 17 | class RuleMiner { 18 | public: 19 | static QList mineAssociationRules(QList frequentItemsets, Confidence minimumConfidence, const Constraints & ruleConsequentConstraints, const FPGrowth * fpgrowth); 20 | static QList mineAssociationRules(QList frequentItemsets, Confidence minimumConfidence, const Constraints & ruleConsequentConstraints, const PatternTree & patternTree, uint from, uint to); 21 | 22 | protected: 23 | static QList generateAssociationRulesForFrequentItemset(FrequentItemset frequentItemset, QList consequents, Confidence minimumConfidence, const FPGrowth * fpgrowth); 24 | static QList generateAssociationRulesForFrequentItemset(FrequentItemset frequentItemset, QList consequents, Confidence minimumConfidence, const PatternTree & patternTree, uint from, uint to); 25 | static ItemIDList getAntecedent(const ItemIDList & frequentItemset, const ItemIDList & consequent); 26 | static QList generateCandidateItemsets(const QList & frequentItemsubsets); 27 | }; 28 | 29 | } 30 | #endif // RULEMINER_H 31 | -------------------------------------------------------------------------------- /code/UI/SettingsDialog.h: -------------------------------------------------------------------------------- 1 | #ifndef SETTINGSDIALOG_H 2 | #define SETTINGSDIALOG_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | class SettingsDialog : public QDialog { 19 | 20 | Q_OBJECT 21 | 22 | public: 23 | explicit SettingsDialog(QWidget * parent = NULL); 24 | 25 | static double errorMarginToAbsolute(double minSupport, double errorMargin); 26 | static double absoluteToErrorMargin(double minSupport, double minPatternTreeSupport); 27 | 28 | signals: 29 | void settingsChanged(); 30 | 31 | protected slots: 32 | void minSupportChanged(double value); 33 | void minConfidenceChanged(double value); 34 | void patternTreeSupportErrorMarginChanged(double value); 35 | 36 | void browseForFile(); 37 | 38 | void buttonRestoreDefaults(); 39 | void buttonCancel(); 40 | void buttonSave(); 41 | 42 | void restart(); 43 | 44 | protected: 45 | QWidget * createAnalystTab(); 46 | QWidget * createParserTab(); 47 | 48 | // Analyst settings tab. 49 | QDoubleSpinBox * minSupport; 50 | QDoubleSpinBox * minConfidence; 51 | QDoubleSpinBox * patternTreeSupportErrorMargin; 52 | QLabel * resultingParametersMinSupport; 53 | QLabel * resultingParametersMinConfidence; 54 | QLabel * resultingParametersMinPatternTreeSupport; 55 | 56 | // Parser settings tab. 57 | QLineEdit * parserEpisodesDiscretizerFileLineEdit; 58 | }; 59 | 60 | #endif // SETTINGSDIALOG_H 61 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestRuleMiner.cpp: -------------------------------------------------------------------------------- 1 | #include "TestRuleMiner.h" 2 | 3 | void TestRuleMiner::basic() { 4 | QList transactions; 5 | transactions.append(QStringList() << "A" << "B" << "C" << "D"); 6 | transactions.append(QStringList() << "A" << "B"); 7 | transactions.append(QStringList() << "A" << "C"); 8 | transactions.append(QStringList() << "A" << "B" << "C"); 9 | transactions.append(QStringList() << "A" << "D"); 10 | transactions.append(QStringList() << "A" << "C" << "D"); 11 | transactions.append(QStringList() << "C" << "B"); 12 | transactions.append(QStringList() << "B" << "C"); 13 | transactions.append(QStringList() << "C" << "D"); 14 | transactions.append(QStringList() << "C" << "E"); 15 | 16 | Constraints constraints; 17 | 18 | FPNode::resetLastNodeID(); 19 | ItemIDNameHash itemIDNameHash; 20 | ItemNameIDHash itemNameIDHash; 21 | ItemIDList sortedFrequentItemIDs; 22 | FPGrowth * fpgrowth = new FPGrowth(transactions, 0.4 * transactions.size(), &itemIDNameHash, &itemNameIDHash, &sortedFrequentItemIDs); 23 | QList frequentItemsets = fpgrowth->mineFrequentItemsets(FPGROWTH_SYNC); 24 | 25 | QList associationRules = RuleMiner::mineAssociationRules(frequentItemsets, 0.8, constraints, fpgrowth); 26 | 27 | // Helpful for debugging/expanding this test. 28 | // Currently, this should match: 29 | // ({B(1)} => {C(2)} (conf=0.8)) 30 | //qDebug() << associationRules; 31 | 32 | // Verify the results. 33 | QCOMPARE(associationRules.size(), 1); 34 | QCOMPARE(associationRules[0].antecedent, (ItemIDList() << 1)); 35 | QCOMPARE(associationRules[0].consequent, (ItemIDList() << 2)); 36 | QCOMPARE(associationRules[0].support, (SupportCount) 4); 37 | QCOMPARE(associationRules[0].confidence, (float) 0.8); 38 | 39 | delete fpgrowth; 40 | } 41 | -------------------------------------------------------------------------------- /code/UI/ConceptHierarchyCompleter.cpp: -------------------------------------------------------------------------------- 1 | #include "ConceptHierarchyCompleter.h" 2 | 3 | ConceptHierarchyCompleter::ConceptHierarchyCompleter(QObject * parent) : 4 | QCompleter(parent) 5 | { 6 | this->init(); 7 | } 8 | 9 | ConceptHierarchyCompleter::ConceptHierarchyCompleter(QAbstractItemModel * model, QObject * parent) 10 | :QCompleter(model, parent) 11 | { 12 | this->init(); 13 | } 14 | 15 | 16 | //------------------------------------------------------------------------------ 17 | // Protected methods. 18 | 19 | void ConceptHierarchyCompleter::init() { 20 | this->lineageSeparator = ":"; 21 | this->entrySeparator = ", "; 22 | } 23 | 24 | /** 25 | * Override of splitPath(). 26 | */ 27 | QStringList ConceptHierarchyCompleter::splitPath(const QString & path) const { 28 | return path.split(this->entrySeparator).last() // Get the last path. 29 | .split(this->lineageSeparator); // And split it. 30 | } 31 | 32 | /** 33 | * Override of pathFromIndex(). 34 | */ 35 | QString ConceptHierarchyCompleter::pathFromIndex(const QModelIndex &index) const { 36 | // Calculate the last path. 37 | QStringList lineage; 38 | for (QModelIndex i = index; i.isValid(); i = i.parent()) 39 | lineage.prepend(model()->data(i, completionRole()).toString()); 40 | QString path = lineage.join(this->lineageSeparator); 41 | 42 | // Make sure that previously entered paths are also kept. 43 | QLineEdit * widget = (QLineEdit *) this->widget(); 44 | QStringList list = widget->text().split(this->entrySeparator); 45 | if (list.size() > 1) { 46 | // Delete the last path in the list, which was uncomplete. 47 | list.removeLast(); 48 | // Add the *completed* last path again at the end of the list. 49 | list.append(path); 50 | // Merge all paths together again. 51 | path = list.join(this->entrySeparator); 52 | } 53 | 54 | return path; 55 | } 56 | -------------------------------------------------------------------------------- /code/Analytics/FPTree.h: -------------------------------------------------------------------------------- 1 | #ifndef FPTREE_H 2 | #define FPTREE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "Item.h" 11 | #include "FPNode.h" 12 | 13 | 14 | namespace Analytics { 15 | class FPTree { 16 | public: 17 | FPTree(); 18 | ~FPTree(); 19 | 20 | // Accessors. 21 | FPNode * getRoot() const { return this->root; } 22 | bool hasItemPath(ItemID itemID) const; 23 | ItemIDList getItemIDs() const { return this->itemPaths.keys(); } 24 | QList *> getItemPath(ItemID itemID) const; 25 | bool itemPathContains(ItemID itemID, FPNode * node) const; 26 | SupportCount getItemSupport(ItemID item) const; 27 | QList calculatePrefixPaths(ItemID itemID) const; 28 | 29 | // Modifiers. 30 | void addTransaction(const Transaction & transaction); 31 | void buildTreeFromPrefixPaths(const QList & prefixPaths); 32 | 33 | // Static (class) methods. 34 | static QHash calculateSupportCountsForPrefixPaths(const QList & prefixPaths); 35 | 36 | #ifdef DEBUG 37 | ItemIDNameHash * itemIDNameHash; 38 | #endif 39 | 40 | protected: 41 | FPNode * root; 42 | QHash *> > itemPaths; 43 | 44 | void init(); 45 | void addNodeToItemPath(FPNode * node); 46 | }; 47 | 48 | #ifdef DEBUG 49 | QDebug operator<<(QDebug dbg, const FPTree & tree); 50 | QString dumpHelper(const FPNode & node, QString prefix = ""); 51 | 52 | // QDebug output operators for SupportCount. 53 | QDebug operator<<(QDebug dbg, const FPNode & node); 54 | QDebug operator<<(QDebug dbg, const QList *> & itemPath); 55 | #endif 56 | } 57 | 58 | Q_DECLARE_METATYPE(Analytics::FPTree); 59 | 60 | #endif // FPTREE_H 61 | -------------------------------------------------------------------------------- /code/Analytics/TiltedTimeWindow.h: -------------------------------------------------------------------------------- 1 | #ifndef TILTEDTIMEWINDOW_H 2 | #define TILTEDTIMEWINDOW_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Item.h" 8 | 9 | 10 | namespace Analytics { 11 | 12 | enum Granularity { 13 | GRANULARITY_QUARTER, 14 | GRANULARITY_HOUR, 15 | GRANULARITY_DAY, 16 | GRANULARITY_MONTH, 17 | GRANULARITY_YEAR 18 | }; 19 | 20 | #define TTW_NUM_GRANULARITIES 5 21 | #define TTW_NUM_BUCKETS 72 22 | #define TTW_BUCKET_UNUSED -1 23 | 24 | 25 | class TiltedTimeWindow { 26 | public: 27 | TiltedTimeWindow(); 28 | void appendQuarter(SupportCount s, quint32 updateID); 29 | bool isEmpty() const { return this->oldestBucketFilled == -1; } 30 | quint32 getLastUpdate() const { return this->lastUpdate; } 31 | void dropTail(Granularity start); 32 | int getOldestBucketFilled() const { return this->oldestBucketFilled; } 33 | uint getCapacityUsed(Granularity g) const { return this->capacityUsed[g]; } 34 | SupportCount getSupportForRange(uint from, uint to) const; 35 | 36 | // Unit testing helper method. 37 | QVector getBuckets(int numBuckets = TTW_NUM_BUCKETS) const; 38 | 39 | // Properties. 40 | SupportCount buckets[TTW_NUM_BUCKETS]; 41 | int oldestBucketFilled; 42 | 43 | // Static methods. 44 | static uint quarterDistanceToBucket(uint bucket, bool includeBucketItself); 45 | 46 | // Static properties 47 | static uint GranularityBucketCount[TTW_NUM_GRANULARITIES]; 48 | static uint GranularityBucketOffset[TTW_NUM_GRANULARITIES]; 49 | static char GranularityChar[TTW_NUM_GRANULARITIES]; 50 | 51 | protected: 52 | // Methods. 53 | void reset(Granularity granularity); 54 | void shift(Granularity granularity); 55 | void store(Granularity granularity, SupportCount supportCount); 56 | 57 | // Properties. 58 | uint capacityUsed[TTW_NUM_GRANULARITIES]; 59 | quint32 lastUpdate; 60 | }; 61 | 62 | #ifdef DEBUG 63 | QDebug operator<<(QDebug dbg, const TiltedTimeWindow & ttw); 64 | #endif 65 | } 66 | 67 | #endif // TILTEDTIMEWINDOW_H 68 | -------------------------------------------------------------------------------- /code/Analytics/PatternTree.h: -------------------------------------------------------------------------------- 1 | #ifndef PATTERNTREE_H 2 | #define PATTERNTREE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Item.h" 8 | #include "TiltedTimeWindow.h" 9 | #include "FPNode.h" 10 | #include "Constraints.h" 11 | 12 | 13 | namespace Analytics { 14 | class PatternTree { 15 | public: 16 | PatternTree(); 17 | ~PatternTree(); 18 | 19 | // Accessors. 20 | FPNode * getRoot() const { return this->root; } 21 | TiltedTimeWindow * getPatternSupport(const ItemIDList & pattern) const; 22 | unsigned int getNodeCount() const { return this->nodeCount; } 23 | uint getCurrentQuarter() const { return this->currentQuarter; } 24 | QList getFrequentItemsetsForRange(SupportCount minSupport, 25 | const Constraints & frequentItemsetConstraints, 26 | uint from, 27 | uint to, 28 | const ItemIDList & prefix = ItemIDList(), 29 | FPNode * node = NULL) const; 30 | 31 | // Modifiers. 32 | void addPattern(const FrequentItemset & pattern, quint32 updateID); 33 | void removePattern(FPNode * const node); 34 | void nextQuarter() { this->currentQuarter = (currentQuarter + 1) % 4; } 35 | 36 | // Static (class) methods. 37 | static ItemIDList getPatternForNode(FPNode const * const node); 38 | 39 | protected: 40 | FPNode * root; 41 | uint currentQuarter; 42 | unsigned int nodeCount; 43 | }; 44 | 45 | #ifdef DEBUG 46 | QDebug operator<<(QDebug dbg, const PatternTree & tree); 47 | QString dumpHelper(const FPNode & node, QString prefix = ""); 48 | 49 | // QDebug output operators for FPNode. 50 | QDebug operator<<(QDebug dbg, const FPNode & node); 51 | #endif 52 | 53 | } 54 | 55 | Q_DECLARE_METATYPE(Analytics::PatternTree); 56 | 57 | #endif // PATTERNTREE_H 58 | -------------------------------------------------------------------------------- /code/EpisodesParser/EpisodeDurationDiscretizer.cpp: -------------------------------------------------------------------------------- 1 | #include "EpisodeDurationDiscretizer.h" 2 | 3 | namespace EpisodesParser { 4 | EpisodeDurationDiscretizer::EpisodeDurationDiscretizer() { 5 | } 6 | 7 | bool EpisodeDurationDiscretizer::parseCsvFile(const QString & csvFile) { 8 | this->csvFile = csvFile; 9 | 10 | QFile csv(this->csvFile); 11 | if (!csv.open(QIODevice::ReadOnly | QIODevice::Text)) { 12 | qCritical("Could not open '%s' file for reading: %s.", qPrintable(this->csvFile), qPrintable(csv.errorString())); 13 | exit(1); 14 | } 15 | else { 16 | QTextStream in(&csv); 17 | QStringList parts; 18 | EpisodeName episodeName; 19 | EpisodeSpeed episodeSpeed; 20 | EpisodeDuration maxDuration; 21 | 22 | while (!in.atEnd()) { 23 | parts = in.readLine().split(','); 24 | episodeName = parts[0]; 25 | 26 | // Build the hierarchical map: 27 | // EpisodeName -> EpisodeSpeed -> max duration for this speed. 28 | QMap map; 29 | this->thresholds.insert(episodeName, map); 30 | for (int i = 1; i < parts.length(); i += 2) { 31 | episodeSpeed = parts[i]; 32 | if (i < parts.length() - 1) 33 | maxDuration = parts[i+1].toInt(); 34 | else 35 | maxDuration = -1; // This will automatically map to the highest value supported, right now that is 65535. 36 | this->thresholds[episodeName].insert(maxDuration, episodeSpeed); 37 | } 38 | } 39 | 40 | return true; 41 | } 42 | } 43 | 44 | EpisodeSpeed EpisodeDurationDiscretizer::mapToSpeed(const EpisodeName & name, const EpisodeDuration & duration) const { 45 | EpisodeDuration maxDuration; 46 | foreach (maxDuration, this->thresholds[name].keys()) { 47 | if (duration <= maxDuration) 48 | return this->thresholds[name][maxDuration]; 49 | } 50 | 51 | qCritical("The duration %d for the Episode '%s' could not be mapped to a discretized speed.", duration, qPrintable(name)); 52 | return "satisfy the compiler"; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /code/UI/CausesTableFilterProxyModel.cpp: -------------------------------------------------------------------------------- 1 | #include "CausesTableFilterProxyModel.h" 2 | 3 | 4 | //------------------------------------------------------------------------------ 5 | // Public methods. 6 | 7 | CausesTableFilterProxyModel::CausesTableFilterProxyModel(QObject *parent) : 8 | QSortFilterProxyModel(parent) 9 | { 10 | } 11 | 12 | void CausesTableFilterProxyModel::setEpisodesColumn(int col) { 13 | this->episodesColumn = col; 14 | this->invalidateFilter(); 15 | } 16 | 17 | void CausesTableFilterProxyModel::setCircumstancesColumn(int col) { 18 | this->circumstancesColumn = col; 19 | this->invalidateFilter(); 20 | } 21 | 22 | void CausesTableFilterProxyModel::setEpisodeFilter(const QString & filter) { 23 | this->episodesFilter = QRegExp(filter, Qt::CaseInsensitive, QRegExp::FixedString); 24 | this->invalidateFilter(); 25 | } 26 | 27 | void CausesTableFilterProxyModel::setCircumstancesFilter(const QStringList & filter) { 28 | this->circumstancesFilter.clear(); 29 | foreach (const QString & f, filter) 30 | this->circumstancesFilter.append(QRegExp(f, Qt::CaseInsensitive, QRegExp::Wildcard)); 31 | this->invalidateFilter(); 32 | } 33 | 34 | 35 | //------------------------------------------------------------------------------ 36 | // Protected methods. 37 | 38 | bool CausesTableFilterProxyModel::filterAcceptsRow(int sourceRow, const QModelIndex & sourceParent) const { 39 | bool episodesColumnMatches = false; 40 | bool circumstancesColumnMatches = false; 41 | 42 | QModelIndex e = this->sourceModel()->index(sourceRow, this->episodesColumn, sourceParent); 43 | episodesColumnMatches = this->sourceModel()->data(e).toString().contains(this->episodesFilter); 44 | 45 | QModelIndex c = this->sourceModel()->index(sourceRow, this->circumstancesColumn, sourceParent); 46 | if (!this->circumstancesFilter.isEmpty()) { 47 | foreach (const QRegExp & regexp, this->circumstancesFilter) { 48 | circumstancesColumnMatches = this->sourceModel()->data(c).toString().contains(regexp); 49 | if (!circumstancesColumnMatches) 50 | break; 51 | } 52 | } 53 | else { 54 | // No circumstances filter, hence this column *always* matches. 55 | circumstancesColumnMatches = true; 56 | } 57 | 58 | return episodesColumnMatches && circumstancesColumnMatches; 59 | } 60 | -------------------------------------------------------------------------------- /code/Analytics/Constraints.h: -------------------------------------------------------------------------------- 1 | #ifndef CONSTRAINTS_H 2 | #define CONSTRAINTS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "Item.h" 10 | 11 | 12 | namespace Analytics { 13 | 14 | enum ItemConstraintType { 15 | CONSTRAINT_POSITIVE_MATCH_ALL, 16 | CONSTRAINT_POSITIVE_MATCH_ANY, 17 | CONSTRAINT_NEGATIVE_MATCH_ALL, 18 | CONSTRAINT_NEGATIVE_MATCH_ANY 19 | }; 20 | 21 | class Constraints { 22 | 23 | #ifdef DEBUG 24 | friend QDebug operator<<(QDebug dbg, const Constraints & constraints); 25 | #endif 26 | 27 | public: 28 | Constraints(); 29 | 30 | bool empty() const { return this->itemConstraints.empty(); } 31 | 32 | void addItemConstraint(ItemName item, ItemConstraintType type); 33 | void setItemConstraints(const QSet & constraints, ItemConstraintType type); 34 | 35 | QSet getItemIDsForConstraintType(ItemConstraintType type) const; 36 | 37 | void preprocessItemIDNameHash(const ItemIDNameHash & hash); 38 | 39 | void preprocessItem(const ItemName & name, ItemID id); 40 | void removeItem(ItemID id); 41 | ItemID getHighestPreprocessedItemID() const { return this->highestPreprocessedItemID; } 42 | void clearPreprocessedItems() { this->preprocessedItemConstraints.clear(); this->highestPreprocessedItemID = ROOT_ITEMID; } 43 | 44 | bool matchItemset(const ItemIDList & itemset) const; 45 | bool matchSearchSpace(const ItemIDList & frequentItemset, const QHash & prefixPathsSupportCounts) const; 46 | 47 | #ifdef DEBUG 48 | ItemIDNameHash * itemIDNameHash; 49 | #endif 50 | 51 | static const char * ItemConstraintTypeName[4]; 52 | 53 | protected: 54 | static bool matchItemsetHelper(const ItemIDList & itemset, ItemConstraintType type, const QSet & constraintItems); 55 | static bool matchSearchSpaceHelper(const ItemIDList & frequentItemset, const QHash & prefixPathsSupportCounts, ItemConstraintType type, const QSet & constraintItems); 56 | 57 | void addPreprocessedItemConstraint(ItemConstraintType type, const ItemName & category, ItemID id); 58 | 59 | QHash > itemConstraints; 60 | QHash > > preprocessedItemConstraints; 61 | ItemID highestPreprocessedItemID; 62 | }; 63 | } 64 | 65 | #endif // CONSTRAINTS_H 66 | -------------------------------------------------------------------------------- /code/Analytics/FPGrowth.h: -------------------------------------------------------------------------------- 1 | #ifndef FPGROWTH_H 2 | #define FPGROWTH_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "Item.h" 13 | #include "Constraints.h" 14 | #include "FPNode.h" 15 | #include "FPTree.h" 16 | 17 | 18 | namespace Analytics { 19 | 20 | #ifdef DEBUG 21 | // #define FPGROWTH_DEBUG 1 22 | #endif 23 | 24 | #define FPGROWTH_ASYNC true 25 | #define FPGROWTH_SYNC false 26 | 27 | class FPGrowth : public QObject { 28 | Q_OBJECT 29 | 30 | public: 31 | FPGrowth(const QList & transactions, SupportCount minSupportAbsolute, ItemIDNameHash * itemIDNameHash, ItemNameIDHash * itemNameIDHash, ItemIDList * sortedFrequentItemIDs); 32 | ~FPGrowth(); 33 | 34 | void setConstraints(const Constraints & constraints) { this->constraints = constraints; } 35 | void setConstraintsForRuleConsequents(const Constraints & constraints) { this->constraintsForRuleConsequents = constraints; } 36 | const Constraints & getConstraintsForRuleConsequents() const { return this->constraintsForRuleConsequents; } 37 | 38 | QList mineFrequentItemsets(bool asynchronous = true); 39 | 40 | // Ability to calculate support for any itemset; necessary to 41 | // calculate confidence for candidate association rules. 42 | SupportCount calculateSupportCount(const ItemIDList & itemset) const; 43 | 44 | ItemID getItemID(ItemName name) const { return this->itemNameIDHash->value(name); } 45 | #ifdef DEBUG 46 | ItemIDNameHash * getItemIDNameHash() { return this->itemIDNameHash; } 47 | #endif 48 | 49 | signals: 50 | void minedFrequentItemset(const FrequentItemset & frequentItemset, bool frequentItemsetMatchesConstraints, const FPTree * ctree); 51 | void branchCompleted(const ItemIDList & itemset); 52 | 53 | public slots: 54 | QList generateFrequentItemsets(const FPTree * tree, const FrequentItemset & suffix, bool asynchronous = FPGROWTH_ASYNC); 55 | 56 | protected slots: 57 | void processTransaction(const Transaction & transaction); 58 | 59 | protected: 60 | // Static methods. 61 | static ItemIDList sortItemIDsByDecreasingSupportCount(const QHash & itemSupportCounts, const ItemIDList * const ignoreList); 62 | static QList filterPrefixPaths(const QList & prefixPaths, SupportCount minSupportAbsolute); 63 | 64 | // Methods. 65 | void scanTransactions(); 66 | void buildFPTree(); 67 | FPTree * considerFrequentItemsupersets(const FPTree * ctree, const ItemIDList & frequentItemset); 68 | Transaction optimizeTransaction(const Transaction & transaction) const; 69 | ItemIDList optimizeItemset(const ItemIDList & itemset) const; 70 | ItemIDList orderItemsetBySupport(const ItemIDList & itemset) const; 71 | 72 | // Properties. 73 | FPTree * tree; 74 | Constraints constraints; 75 | Constraints constraintsForRuleConsequents; 76 | ItemIDNameHash * itemIDNameHash; 77 | ItemNameIDHash * itemNameIDHash; 78 | ItemIDList * sortedFrequentItemIDs; 79 | 80 | QList transactions; 81 | 82 | SupportCount minSupportAbsolute; 83 | 84 | QHash totalFrequentSupportCounts; 85 | }; 86 | 87 | } 88 | #endif // FPGROWTH_H 89 | -------------------------------------------------------------------------------- /code/EpisodesParser/Parser.h: -------------------------------------------------------------------------------- 1 | #ifndef PARSER_H 2 | #define PARSER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "QBrowsCap.h" 18 | #include "QGeoIP.h" 19 | #include "EpisodeDurationDiscretizer.h" 20 | #include "typedefs.h" 21 | 22 | 23 | namespace EpisodesParser { 24 | 25 | #define CHUNK_SIZE 4000 26 | 27 | class Parser : public QObject { 28 | Q_OBJECT 29 | 30 | public: 31 | Parser(); 32 | static void initParserHelpers(const QString & browsCapCSV, 33 | const QString & browsCapIndex, 34 | const QString & geoIPCityDB, 35 | const QString & geoIPISPDB, 36 | const QString & episodeDiscretizerCSV); 37 | static void clearParserHelperCaches(); 38 | 39 | // Processing logic. 40 | static EpisodesLogLine mapLineToEpisodesLogLine(const QString & line); 41 | static ExpandedEpisodesLogLine expandEpisodesLogLine(const EpisodesLogLine & line); 42 | static ExpandedEpisodesLogLine mapAndExpandToEpisodesLogLine(const QString & line); 43 | static QList mapExpandedEpisodesLogLineToTransactions(const ExpandedEpisodesLogLine & line); 44 | 45 | signals: 46 | void parsing(bool); 47 | void parsedDuration(int duration); 48 | void parsedBatch(QList transactions, double transactionsPerEvent, Time start, Time end); 49 | 50 | public slots: 51 | void parse(const QString & fileName); 52 | void continueParsing(); 53 | 54 | protected slots: 55 | void processBatch(const QList batch); 56 | 57 | protected: 58 | void processParsedChunk(const QStringList & chunk); 59 | 60 | QMutex mutex; 61 | QWaitCondition condition; 62 | QTime timer; 63 | 64 | 65 | // QHashes that are used to minimize memory usage. 66 | static EpisodeNameIDHash episodeNameIDHash; 67 | static EpisodeIDNameHash episodeIDNameHash; 68 | static DomainNameIDHash domainNameIDHash; 69 | static DomainIDNameHash domainIDNameHash; 70 | static UAHierarchyDetailsIDHash uaHierarchyDetailsIDHash; 71 | static UAHierarchyIDDetailsHash uaHierarchyIDDetailsHash; 72 | static LocationToIDHash locationToIDHash; 73 | static LocationFromIDHash locationFromIDHash; 74 | 75 | static bool parserHelpersInitialized; 76 | static QBrowsCap browsCap; 77 | static QGeoIP geoIP; 78 | static EpisodeDurationDiscretizer episodeDiscretizer; 79 | 80 | // Mutexes used to ensure thread-safety. 81 | static QMutex parserHelpersInitMutex; 82 | static QMutex episodeHashMutex; 83 | static QMutex domainHashMutex; 84 | static QMutex uaHierarchyHashMutex; 85 | static QMutex mutex_hashAccess_location; 86 | static QMutex regExpMutex; 87 | static QMutex dateTimeMutex; 88 | 89 | // Methods to actually use the above QHashes. 90 | static EpisodeID mapEpisodeNameToID(EpisodeName name); 91 | static DomainID mapDomainNameToID(DomainName name); 92 | static UAHierarchyID mapUAHierarchyToID(UAHierarchyDetails ua); 93 | static LocationID mapLocationToID(const Location & location); 94 | }; 95 | 96 | } 97 | #endif // PARSER_H 98 | -------------------------------------------------------------------------------- /code/EpisodesParser/typedefs.cpp: -------------------------------------------------------------------------------- 1 | #include "typedefs.h" 2 | 3 | 4 | namespace EpisodesParser { 5 | 6 | uint qHash(const Location & location) { 7 | return qHash(location.region + location.city + location.isp); 8 | } 9 | 10 | uint qHash(const UAHierarchyDetails & ua) { 11 | return qHash(ua.platform + ua.browser_name + ua.browser_version); 12 | } 13 | 14 | #ifdef DEBUG 15 | QDebug operator<<(QDebug dbg, const Episode & e) { 16 | dbg.nospace() << e.IDNameHash->value(e.id).toStdString().c_str() 17 | << "(" 18 | << e.id 19 | << ") = " 20 | << e.duration; 21 | 22 | return dbg.nospace(); 23 | } 24 | 25 | QDebug operator<<(QDebug dbg, const Domain & d) { 26 | dbg.nospace() << d.IDNameHash->value(d.id).toStdString().c_str() 27 | << "(" << d.id << ")"; 28 | 29 | return dbg.nospace(); 30 | } 31 | 32 | QDebug operator<<(QDebug dbg, const EpisodeList & el) { 33 | QString episodeOutput; 34 | 35 | //dbg.nospace() << "[size=" << namedEpisodeList.episodes.size() << "] "; 36 | dbg.nospace() << "{"; 37 | 38 | for (int i = 0; i < el.size(); i++) { 39 | if (i > 0) 40 | dbg.nospace() << ", "; 41 | 42 | // Generate output for episode. 43 | episodeOutput.clear(); 44 | QDebug(&episodeOutput) << el[i]; 45 | 46 | dbg.nospace() << episodeOutput.toStdString().c_str(); 47 | } 48 | dbg.nospace() << "}"; 49 | 50 | return dbg.nospace(); 51 | } 52 | 53 | QDebug operator<<(QDebug dbg, const EpisodesLogLine & line) { 54 | const static char * eol = ", \n"; 55 | 56 | dbg.nospace() << "{\n" 57 | << "IP = " << line.ip << eol 58 | << "time = " << line.time << eol 59 | << "episodes = " << line.episodes << eol 60 | << "status = " << line.status << eol 61 | << "URL = " << line.url << eol 62 | << "user-agent = " << line.ua << eol 63 | << "domain = " << line.domain << eol 64 | << "}"; 65 | 66 | return dbg.nospace(); 67 | } 68 | 69 | QDebug operator<<(QDebug dbg, const Location & location) { 70 | dbg.nospace() << location.continent 71 | << " > " << location.region 72 | << " > " << location.country 73 | << " > " << location.city 74 | << " (" << location.isp << ")"; 75 | return dbg.nospace(); 76 | } 77 | 78 | QDebug operator<<(QDebug dbg, const UAHierarchyDetails & ua) { 79 | dbg.nospace() << ua.browser_name.toStdString().c_str() << " " << ua.browser_version.toStdString().c_str() 80 | << " (" << ua.browser_version_major << ", " << ua.browser_version_minor << ")" 81 | << " on " << ua.platform.toStdString().c_str(); 82 | return dbg.nospace(); 83 | } 84 | 85 | QDebug operator<<(QDebug dbg, const ExpandedEpisodesLogLine & line) { 86 | const static char * eol = ", \n"; 87 | 88 | dbg.nospace() << "{\n" 89 | << "location = " << line.location << eol 90 | << "time = " << line.time << eol 91 | << "episodes = " << line.episodes << eol 92 | << "status = " << line.status << eol 93 | << "URL = " << line.url << eol 94 | << "user-agent = " << line.ua << " -> " << line.uaHierarchyIDDetailsHash->value(line.ua) << eol 95 | << "}"; 96 | 97 | return dbg.nospace(); 98 | } 99 | #endif 100 | } 101 | -------------------------------------------------------------------------------- /code/Analytics/FPStream.h: -------------------------------------------------------------------------------- 1 | #ifndef FPSTREAM_H 2 | #define FPSTREAM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "Item.h" 11 | #include "Constraints.h" 12 | #include "FPNode.h" 13 | #include "FPTree.h" 14 | #include "FPGrowth.h" 15 | #include "PatternTree.h" 16 | 17 | namespace Analytics { 18 | 19 | #ifdef DEBUG 20 | // #define FPSTREAM_DEBUG 1 21 | #endif 22 | 23 | class FPStream : public QObject { 24 | Q_OBJECT 25 | 26 | public: 27 | FPStream(double minSupport, 28 | double maxSupportError, 29 | ItemIDNameHash * itemIDNameHash, 30 | ItemNameIDHash * itemNameIDHash, 31 | ItemIDList * sortedFrequentItemIDs); 32 | SupportCount calculateMinSupportForRange(uint from, uint to) const; 33 | 34 | const TiltedTimeWindow * const getTransactionsPerBatch() const { return &this->transactionsPerBatch; } 35 | const TiltedTimeWindow * const getEventsPerBatch() const { return &this->eventsPerBatch; } 36 | void setConstraints(const Constraints & constraints) { this->constraints = constraints; } 37 | void setConstraintsToPreprocess(const Constraints & constraints) { this->constraintsToPreprocess = constraints; } 38 | 39 | // Stats for UI. 40 | int getNumFrequentItems() const { return this->f_list->size(); } 41 | int getPatternTreeSize() const { return this->patternTree.getNodeCount(); } 42 | SupportCount getNumEventsInRange(uint from, uint to) const { return this->eventsPerBatch.getSupportForRange(from, to); } 43 | 44 | // Unit testing helper method. 45 | const PatternTree & getPatternTree() const { return this->patternTree; } 46 | 47 | // Static methods (public to allow for unit testing). 48 | static Granularity calculateDroppableTail(const TiltedTimeWindow & window, 49 | double minSupport, 50 | double maxSupportError, 51 | const TiltedTimeWindow & eventsPerBatch); 52 | 53 | signals: 54 | void mineForFrequentItemsupersets(const FPTree * tree, const FrequentItemset & suffix); 55 | void batchProcessed(); 56 | 57 | public slots: 58 | void processBatchTransactions(const QList & transactions, double transactionsPerEvent = 1.0); 59 | void processFrequentItemset(const FrequentItemset & frequentItemset, 60 | bool frequentItemsetMatchesConstraints, 61 | const FPTree * ctree); 62 | void branchCompleted(const ItemIDList & itemset); 63 | 64 | protected: 65 | // Methods. 66 | void updateUnaffectedNodes(FPNode * node); 67 | 68 | // Properties related to the entire state over time. 69 | PatternTree patternTree; 70 | TiltedTimeWindow transactionsPerBatch; 71 | TiltedTimeWindow eventsPerBatch; 72 | 73 | // Properties related to configuration. 74 | bool initialBatchProcessed; 75 | double minSupport; 76 | double maxSupportError; 77 | Constraints constraints; 78 | Constraints constraintsToPreprocess; 79 | 80 | // Properties that are updated in each batch. 81 | ItemIDNameHash * itemIDNameHash; 82 | ItemNameIDHash * itemNameIDHash; 83 | ItemIDList * f_list; // sortedFrequentItemIDs would be a better 84 | // name, but it's called f_list in the 85 | // FP-Stream paper. 86 | 87 | // Properties relating to the current batch being processed. 88 | QMutex statusMutex; 89 | bool processingBatch; 90 | quint32 currentBatchID; 91 | FPGrowth * currentFPGrowth; 92 | QList supersetsBeingCalculated; 93 | }; 94 | 95 | } 96 | #endif // FPSTREAM_H 97 | -------------------------------------------------------------------------------- /code/Analytics/Analyst.h: -------------------------------------------------------------------------------- 1 | #ifndef ANALYTICS_H 2 | #define ANALYTICS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "Item.h" 18 | #include "Constraints.h" 19 | #include "FPGrowth.h" 20 | #include "FPStream.h" 21 | #include "RuleMiner.h" 22 | 23 | typedef uint Time; 24 | 25 | namespace Analytics { 26 | 27 | class Analyst : public QObject { 28 | Q_OBJECT 29 | 30 | public: 31 | Analyst(double minSupport, double maxSupportError, double minConfidence); 32 | ~Analyst(); 33 | void addFrequentItemsetItemConstraint(ItemName item, ItemConstraintType type); 34 | void addRuleConsequentItemConstraint(ItemName item, ItemConstraintType type); 35 | 36 | // Override moveToThread to also move the FPStream instance. 37 | void moveToThread(QThread * thread); 38 | 39 | // UI integration. 40 | QStandardItemModel * getConceptHierarchyModel() const { return this->conceptHierarchyModel; } 41 | QPair extractEpisodeFromItemset(ItemIDList itemset) const; 42 | 43 | signals: 44 | // Signals for UI. 45 | void analyzing(bool, Time start, Time end, int pageViews, int transactions); 46 | void analyzedDuration(int duration); 47 | void stats(Time start, Time end, int pageViews, int transactions, int uniqueItems, int frequentItems, int patternTreeSize); 48 | void mining(bool); 49 | void minedDuration(int duration); 50 | 51 | // Signals for calculations. 52 | void processedBatch(); 53 | void minedRules(uint from, uint to, QList associationRules, Analytics::SupportCount eventsInTimeRange); 54 | void comparedMinedRules(uint fromOlder, uint toOlder, 55 | uint fromNewer, uint toNewer, 56 | QList intersectedRules, 57 | QList olderRules, 58 | QList newerRules, 59 | QList comparedRules, 60 | QList confidenceVariance, 61 | QList supportVariance, 62 | Analytics::SupportCount eventsInIntersectedTimeRange, 63 | Analytics::SupportCount eventsInOlderTimeRange, 64 | Analytics::SupportCount eventsInNewerTimeRange); 65 | 66 | public slots: 67 | void analyzeTransactions(const QList & transactions, double transactionsPerEvent, Time start, Time end); 68 | void mineRules(uint from, uint to); 69 | void mineAndCompareRules(uint fromOlder, uint toOlder, uint fromNewer, uint toNewer); 70 | 71 | protected slots: 72 | void fpstreamProcessedBatch(); 73 | 74 | protected: 75 | void performMining(const QList & transactions, double transactionsPerEvent); 76 | void updateConceptHierarchyModel(int itemsAlreadyProcessed); 77 | 78 | FPStream * fpstream; 79 | double minSupport; 80 | double maxSupportError; 81 | double minConfidence; 82 | 83 | Constraints frequentItemsetItemConstraints; 84 | Constraints ruleConsequentItemConstraints; 85 | 86 | ItemIDNameHash itemIDNameHash; 87 | ItemNameIDHash itemNameIDHash; 88 | ItemIDList sortedFrequentItemIDs; 89 | 90 | // Stats for the UI. 91 | int currentBatchStartTime; 92 | int currentBatchEndTime; 93 | int currentBatchNumPageViews; 94 | int currentBatchNumTransactions; 95 | int allBatchesStartTime; 96 | int allBatchesNumPageViews; 97 | int allBatchesNumTransactions; 98 | QTime timer; 99 | 100 | // Browsable concept hierarchy for the UI. 101 | int uniqueItemsBeforeMining; 102 | QStandardItemModel * conceptHierarchyModel; 103 | QHash conceptHierarchyHash; 104 | }; 105 | } 106 | 107 | #endif // ANALYST_H 108 | -------------------------------------------------------------------------------- /code/Analytics/Item.cpp: -------------------------------------------------------------------------------- 1 | #include "Item.h" 2 | 3 | namespace Analytics { 4 | 5 | /** 6 | * It's necessary to register the metatypes defined in Item.h to allow 7 | * these types to be used in queued signal/slot connections, for example. 8 | * 9 | * An extra difficulty is the combination with namespaces. See 10 | * http://ktutorial.wordpress.com/2009/04/26/qt-meta-object-system-and-namespaces/ 11 | * and http://lists.trolltech.com/qt-interest/2007-11/thread00465-0.html 12 | */ 13 | void registerBasicMetaTypes() { 14 | qRegisterMetaType("Analytics::SupportCount"); 15 | qRegisterMetaType("Analytics::Confidence"); 16 | qRegisterMetaType("ItemIDList"); 17 | qRegisterMetaType("FrequentItemset"); 18 | qRegisterMetaType< QList >("QList"); 19 | qRegisterMetaType< QList >("QList"); 20 | } 21 | 22 | uint qHash(const AssociationRule & r) { 23 | QString s; 24 | foreach (const ItemID & id, r.antecedent) 25 | s += QString::number(id) + ':'; 26 | s += "=>"; 27 | foreach (const ItemID & id, r.consequent) 28 | s += QString::number(id) + ':'; 29 | return qHash(s); 30 | } 31 | 32 | #ifdef DEBUG 33 | QDebug operator<<(QDebug dbg, const Item & i) { 34 | dbg.nospace() << i.IDNameHash->value(i.id).toStdString().c_str() 35 | << "(" 36 | << i.id 37 | << ")=" 38 | << i.supportCount; 39 | 40 | return dbg.nospace(); 41 | } 42 | 43 | QDebug operator<<(QDebug dbg, const ItemIDList & pattern) { 44 | dbg.nospace() << "{"; 45 | 46 | for (int i = 0; i < pattern.size(); i++) { 47 | if (i > 0) 48 | dbg.nospace() << ", "; 49 | 50 | dbg.nospace() << pattern[i]; 51 | } 52 | dbg.nospace() << "}"; 53 | 54 | return dbg.nospace(); 55 | } 56 | 57 | QDebug operator<<(QDebug dbg, const Transaction & transaction) { 58 | QString itemOutput; 59 | 60 | dbg.nospace() << "[size=" << transaction.size() << "] {"; 61 | 62 | for (int i = 0; i < transaction.size(); i++) { 63 | if (i > 0) 64 | dbg.nospace() << ", "; 65 | 66 | // Generate output for item. 67 | itemOutput.clear(); 68 | QDebug(&itemOutput) << transaction[i]; 69 | 70 | dbg.nospace() << itemOutput.toStdString().c_str(); 71 | } 72 | dbg.nospace() << "}"; 73 | 74 | return dbg.nospace(); 75 | } 76 | 77 | QDebug operator<<(QDebug dbg, const FrequentItemset & frequentItemset) { 78 | QString itemOutput; 79 | 80 | dbg.nospace() << "({"; 81 | itemIDHelper(dbg, frequentItemset.itemset, frequentItemset.IDNameHash); 82 | dbg.nospace() << "}, sup: " 83 | << frequentItemset.support 84 | << ")"; 85 | 86 | return dbg.nospace(); 87 | } 88 | 89 | QDebug operator<<(QDebug dbg, const AssociationRule & associationRule) { 90 | dbg.nospace() << "{"; 91 | itemIDHelper(dbg, associationRule.antecedent, associationRule.IDNameHash); 92 | dbg.nospace() << "} => {"; 93 | itemIDHelper(dbg, associationRule.consequent, associationRule.IDNameHash); 94 | dbg.nospace() << "}"; 95 | 96 | dbg.nospace() << " (" 97 | << "sup=" << associationRule.support 98 | << ", conf=" << associationRule.confidence 99 | << ")"; 100 | 101 | return dbg.nospace(); 102 | } 103 | 104 | QDebug itemIDHelper(QDebug dbg, const ItemIDList & itemset, ItemIDNameHash const * const IDNameHash) { 105 | for (int i = 0; i < itemset.size(); i++) { 106 | if (i > 0) 107 | dbg.nospace() << ", "; 108 | 109 | if (IDNameHash != NULL) { 110 | dbg.nospace() << IDNameHash->value(itemset[i]).toStdString().c_str() 111 | << "(" 112 | << itemset[i] 113 | << ")"; 114 | } 115 | else 116 | dbg.nospace() << itemset[i]; 117 | } 118 | 119 | return dbg.nospace(); 120 | } 121 | 122 | #endif 123 | 124 | } 125 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestFPTree.cpp: -------------------------------------------------------------------------------- 1 | #include "TestFPTree.h" 2 | 3 | 4 | void TestFPTree::basic() { 5 | FPTree * tree = new FPTree(); 6 | 7 | // Build ItemIDNameHash; 8 | ItemIDNameHash itemIDNameHash; 9 | ItemIDNameHash * hash = &itemIDNameHash; 10 | tree->itemIDNameHash = hash; 11 | itemIDNameHash.insert(1, "A"); 12 | itemIDNameHash.insert(2, "B"); 13 | itemIDNameHash.insert(3, "C"); 14 | itemIDNameHash.insert(4, "D"); 15 | 16 | // Create a few transactions. 17 | Transaction t1, t2, t3, t4; 18 | 19 | t1 << Item(1) << Item(2); 20 | t2 << Item(2) << Item(3); 21 | t3 << Item(1) << Item(2) << Item(3); 22 | t4 << Item(1) << Item(4); 23 | 24 | tree->addTransaction(t1); 25 | tree->addTransaction(t2); 26 | tree->addTransaction(t3); 27 | tree->addTransaction(t4); 28 | 29 | // Helpful for debugging/expanding this test. 30 | //qDebug() << *tree; 31 | 32 | // Verify the available item paths. 33 | QCOMPARE(tree->getItemIDs(), ItemIDList() << 1 << 2 << 3 << 4); 34 | QList *> itemPath; 35 | // Item path for A(1): A(1)=3 (0x0001) 36 | itemPath = tree->getItemPath(1); 37 | QCOMPARE(itemPath.size(), 1); 38 | QCOMPARE(itemPath[0]->getNodeID(), (unsigned int) 1); 39 | QCOMPARE(itemPath[0]->getValue(), (SupportCount) 3); 40 | // Item path for B(2): B(2)=2 (0x0002) -> B(2)=1 (0x0003) 41 | itemPath = tree->getItemPath(2); 42 | QCOMPARE(itemPath.size(), 2); 43 | QCOMPARE(itemPath[0]->getNodeID(), (unsigned int) 2); 44 | QCOMPARE(itemPath[0]->getValue(), (SupportCount) 2); 45 | QCOMPARE(itemPath[1]->getNodeID(), (unsigned int) 3); 46 | QCOMPARE(itemPath[1]->getValue(), (SupportCount) 1); 47 | // Item path for C(3): C(3)=1 (0x0004) -> C(3)=1 (0x0005) 48 | itemPath = tree->getItemPath(3); 49 | QCOMPARE(itemPath.size(), 2); 50 | QCOMPARE(itemPath[0]->getNodeID(), (unsigned int) 4); 51 | QCOMPARE(itemPath[0]->getValue(), (SupportCount) 1); 52 | QCOMPARE(itemPath[1]->getNodeID(), (unsigned int) 5); 53 | QCOMPARE(itemPath[1]->getValue(), (SupportCount) 1); 54 | // Item path for D(4): D(4)=1 (0x0006) 55 | itemPath = tree->getItemPath(4); 56 | QCOMPARE(itemPath.size(), 1); 57 | QCOMPARE(itemPath[0]->getNodeID(), (unsigned int) 6); 58 | QCOMPARE(itemPath[0]->getValue(), (SupportCount) 1); 59 | 60 | 61 | // Verify the total item support counts. 62 | QCOMPARE(tree->getItemSupport(1), (SupportCount) 3); 63 | QCOMPARE(tree->getItemSupport(2), (SupportCount) 3); 64 | QCOMPARE(tree->getItemSupport(3), (SupportCount) 2); 65 | QCOMPARE(tree->getItemSupport(4), (SupportCount) 1); 66 | 67 | 68 | // Verify the tree shape. 69 | FPNode * node; 70 | FPNode * root = tree->getRoot(); 71 | QCOMPARE(root->getNodeID(), (unsigned int) 0); 72 | QCOMPARE(root->getItemID(), (ItemID) ROOT_ITEMID); 73 | 74 | // First branch. 75 | // root -> A(1)=3 (0x0001) 76 | node = root->getChild(1); 77 | QVERIFY(node != NULL); 78 | QCOMPARE(node->getItemID(), (ItemID) 1); 79 | QCOMPARE(node->getValue(), (SupportCount) 3); 80 | QCOMPARE(node->getNodeID(), (unsigned int) 1); 81 | FPNode * firstBranch = node; 82 | // root -> A(1)=3 (0x0001) -> B(2)=2 (0x0002) 83 | node = firstBranch->getChild(2); 84 | QVERIFY(node != NULL); 85 | QCOMPARE(node->getItemID(), (ItemID) 2); 86 | QCOMPARE(node->getValue(), (SupportCount) 2); 87 | QCOMPARE(node->getNodeID(), (unsigned int) 2); 88 | // root -> A(1)=3 (0x0001) -> C(3)=1 (0x0005) 89 | node = node->getChild(3); 90 | QVERIFY(node != NULL); 91 | QCOMPARE(node->getItemID(), (ItemID) 3); 92 | QCOMPARE(node->getValue(), (SupportCount) 1); 93 | QCOMPARE(node->getNodeID(), (unsigned int) 5); 94 | // root -> A(1)=3 (0x0001) -> D(4)=1 (0x0006) 95 | node = firstBranch->getChild(4); 96 | QVERIFY(node != NULL); 97 | QCOMPARE(node->getItemID(), (ItemID) 4); 98 | QCOMPARE(node->getValue(), (SupportCount) 1); 99 | QCOMPARE(node->getNodeID(), (unsigned int) 6); 100 | 101 | // Second branch. 102 | // root -> B(2)=1 (0x0003) 103 | node = root->getChild(2); 104 | QVERIFY(node != NULL); 105 | QCOMPARE(node->getItemID(), (ItemID) 2); 106 | QCOMPARE(node->getValue(), (SupportCount) 1); 107 | QCOMPARE(node->getNodeID(), (unsigned int) 3); 108 | // root -> B(2)=1 (0x0003) -> C(3)=1 (0x0004) 109 | node = node->getChild(3); 110 | QVERIFY(node != NULL); 111 | QCOMPARE(node->getItemID(), (ItemID) 3); 112 | QCOMPARE(node->getValue(), (SupportCount) 1); 113 | QCOMPARE(node->getNodeID(), (unsigned int) 4); 114 | 115 | delete tree; 116 | } 117 | -------------------------------------------------------------------------------- /code/Analytics/FPNode.h: -------------------------------------------------------------------------------- 1 | #ifndef FPNODE_H 2 | #define FPNODE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "Item.h" 9 | 10 | 11 | namespace Analytics { 12 | 13 | template 14 | class FPNode { 15 | public: 16 | FPNode(ItemID itemID, SupportCount count) { 17 | this->itemID = itemID; 18 | this->value = count; 19 | this->parent = NULL; 20 | 21 | #ifdef DEBUG 22 | this->nodeID = FPNode::nextNodeID(); 23 | #endif 24 | } 25 | FPNode(ItemID itemID) { 26 | this->itemID = itemID; 27 | this->parent = NULL; 28 | 29 | #ifdef DEBUG 30 | this->nodeID = FPNode::nextNodeID(); 31 | #endif 32 | } 33 | ~FPNode() { 34 | // Delete all child nodes. 35 | foreach (FPNode * child, this->children) { 36 | delete child; 37 | } 38 | this->children.clear(); 39 | 40 | // Remove this node from its parent's children. 41 | if (this->parent != NULL) 42 | this->parent->children.remove(this->itemID); 43 | } 44 | 45 | // Accessors. 46 | bool isRoot() const { return this->itemID == ROOT_ITEMID; } 47 | bool isLeaf() const { return this->children.size() == 0; } 48 | ItemID getItemID() const { return this->itemID; } 49 | const T & getValue() const { return this->value; } 50 | T * getPointerToValue() { return &this->value; } 51 | FPNode * getParent() const { return this->parent; } 52 | FPNode * getChild(ItemID itemID) const { 53 | if (this->children.contains(itemID)) 54 | return this->children.value(itemID); 55 | else 56 | return NULL; 57 | } 58 | const QHash *> & getChildren() const { return this->children; } 59 | bool hasChild(ItemID itemID) const { return this->children.contains(itemID); } 60 | unsigned int numChildren() const { return this->children.size(); } 61 | unsigned int getNumDescendants() const { 62 | unsigned int n = this->children.size(); 63 | if (n > 0) { 64 | foreach (FPNode * child, this->children.values()) 65 | n += child->getNumDescendants(); 66 | } 67 | return n; 68 | } 69 | T * findNodeByPattern(const ItemIDList & pattern) const { 70 | // This method only works from the root node. 71 | if (this->itemID != ROOT_ITEMID) { 72 | qWarning("FPNode::getPath() was called from a node other than the root node."); 73 | return NULL; 74 | } 75 | 76 | FPNode * node = const_cast *>(this); 77 | foreach (ItemID itemID, pattern) { 78 | if (node->hasChild(itemID)) 79 | node = node->getChild(itemID); 80 | else 81 | return NULL; 82 | } 83 | 84 | return &(node->value); 85 | } 86 | 87 | // Modifiers. 88 | void addChild(FPNode * child) { this->children.insert(child->getItemID(), child); } 89 | void setParent(FPNode * parent) { 90 | this->parent = parent; 91 | 92 | // Also let the parent know it has a new child, when it is a valid 93 | // parent. 94 | if (this->parent != NULL) 95 | this->parent->addChild(this); 96 | 97 | } 98 | /** 99 | * This adds a SupportCount to the existing value in this FPNode, but 100 | * how this happens depends on the template parameter type's addition 101 | * operator. 102 | */ 103 | void addSupportCount(SupportCount count) { this->value += count; } 104 | 105 | #ifdef DEBUG 106 | unsigned int getNodeID() const { return this->nodeID; } 107 | static void resetLastNodeID() { FPNode::lastNodeID = 0; } 108 | 109 | ItemIDNameHash * itemIDNameHash; 110 | #endif 111 | 112 | protected: 113 | ItemID itemID; 114 | T value; 115 | FPNode * parent; 116 | QHash *> children; 117 | 118 | #ifdef DEBUG 119 | unsigned int nodeID; 120 | static unsigned int lastNodeID; 121 | static unsigned int nextNodeID() { return FPNode::lastNodeID++; } 122 | #endif 123 | }; 124 | 125 | #ifdef DEBUG 126 | // Initialize static members. 127 | template 128 | unsigned int FPNode::lastNodeID = 0; 129 | 130 | #endif 131 | } 132 | 133 | //template 134 | //Q_DECLARE_METATYPE(Analytics::FPNode); 135 | 136 | #endif // FPNODE_H 137 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestFPGrowth.cpp: -------------------------------------------------------------------------------- 1 | #include "TestFPGrowth.h" 2 | 3 | void TestFPGrowth::basic() { 4 | QList transactions; 5 | transactions.append(QStringList() << "A" << "B" << "C" << "D"); 6 | transactions.append(QStringList() << "A" << "B"); 7 | transactions.append(QStringList() << "A" << "C"); 8 | transactions.append(QStringList() << "A" << "B" << "C"); 9 | transactions.append(QStringList() << "A" << "D"); 10 | transactions.append(QStringList() << "A" << "C" << "D"); 11 | transactions.append(QStringList() << "C" << "B"); 12 | transactions.append(QStringList() << "B" << "C"); 13 | transactions.append(QStringList() << "C" << "D"); 14 | transactions.append(QStringList() << "C" << "E"); 15 | 16 | FPNode::resetLastNodeID(); 17 | ItemIDNameHash itemIDNameHash; 18 | ItemNameIDHash itemNameIDHash; 19 | ItemIDList sortedFrequentItemIDs; 20 | FPGrowth * fpgrowth = new FPGrowth(transactions, 0.4 * transactions.size(), &itemIDNameHash, &itemNameIDHash, &sortedFrequentItemIDs); 21 | QList frequentItemsets = fpgrowth->mineFrequentItemsets(FPGROWTH_SYNC); 22 | 23 | // Characteristics about the transactions above, and the found results: 24 | // * support: 25 | // - A: 6 26 | // - B: 5 27 | // - C: 8 28 | // - D: 4 29 | // - E: 1 30 | // * minimum support = 0.4 31 | // * number of transactions: 10 32 | // * absolute min support: 4 33 | // * items qualifying: A, B, C, D 34 | // * frequent itemsets: {{A}, {B}, {C}, {D}, {C, B}, {C, A}} 35 | 36 | // Helpful for debugging/expanding this test. 37 | // Currently, this should match: 38 | // (({C(2)}, sup: 8), ({A(0)}, sup: 6), ({C(2), A(0)}, sup: 4), ({B(1)}, sup: 5), ({C(2), B(1)}, sup: 4), ({D(3)}, sup: 4)) 39 | //qDebug() << frequentItemsets; 40 | 41 | // Verify the results. 42 | QCOMPARE(frequentItemsets, QList() << FrequentItemset(ItemIDList() << 0 , 6) 43 | << FrequentItemset(ItemIDList() << 2 << 0, 4) 44 | << FrequentItemset(ItemIDList() << 1 , 5) 45 | << FrequentItemset(ItemIDList() << 2 << 1, 4) 46 | << FrequentItemset(ItemIDList() << 2 , 8) 47 | << FrequentItemset(ItemIDList() << 3 , 4) 48 | ); 49 | 50 | delete fpgrowth; 51 | } 52 | 53 | void TestFPGrowth::withConstraints() { 54 | QList transactions; 55 | transactions.append(QStringList() << "A" << "B" << "C" << "D"); 56 | transactions.append(QStringList() << "A" << "B"); 57 | transactions.append(QStringList() << "A" << "C"); 58 | transactions.append(QStringList() << "A" << "B" << "C"); 59 | transactions.append(QStringList() << "A" << "D"); 60 | transactions.append(QStringList() << "A" << "C" << "D"); 61 | transactions.append(QStringList() << "C" << "B"); 62 | transactions.append(QStringList() << "B" << "C"); 63 | transactions.append(QStringList() << "C" << "D"); 64 | transactions.append(QStringList() << "C" << "E"); 65 | 66 | Constraints constraints; 67 | constraints.addItemConstraint("A", Analytics::CONSTRAINT_POSITIVE_MATCH_ANY); 68 | 69 | FPNode::resetLastNodeID(); 70 | ItemIDNameHash itemIDNameHash; 71 | ItemNameIDHash itemNameIDHash; 72 | ItemIDList sortedFrequentItemIDs; 73 | FPGrowth * fpgrowth = new FPGrowth(transactions, 0.4 * transactions.size(), &itemIDNameHash, &itemNameIDHash, &sortedFrequentItemIDs); 74 | fpgrowth->setConstraints(constraints); 75 | QList frequentItemsets = fpgrowth->mineFrequentItemsets(FPGROWTH_SYNC); 76 | 77 | // Characteristics about the transactions above, and the found results 78 | // (*after* applying filtering): 79 | // * support: 80 | // - A: 6 81 | // - B: 3 82 | // - C: 4 83 | // - D: 3 84 | // - E: 0 85 | // * minimum support = 0.4 86 | // * number of transactions: 10 87 | // * absolute min support: 4 88 | // * items qualifying: A, C 89 | // * frequent itemsets: {{A}, {A, C}} 90 | 91 | // Helpful for debugging/expanding this test. 92 | // Currently, this should match: 93 | // (({A(0)}, sup: 6), ({C(2), A(0)}, sup: 4)) 94 | //qDebug() << frequentItemsets; 95 | 96 | // Verify the results. 97 | QCOMPARE(frequentItemsets, QList() << FrequentItemset(ItemIDList() << 0 , 6) 98 | << FrequentItemset(ItemIDList() << 2 << 0, 4) 99 | ); 100 | 101 | delete fpgrowth; 102 | } 103 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestPatternTree.cpp: -------------------------------------------------------------------------------- 1 | #include "TestPatternTree.h" 2 | 3 | void TestPatternTree::basic() { 4 | FPNode::resetLastNodeID(); 5 | PatternTree * patternTree = new PatternTree(); 6 | 7 | // Pattern 1: {1, 2, 3}, support: 1. 8 | ItemIDList p1; 9 | p1 << 1 << 2 << 3; 10 | SupportCount s1 = 1; 11 | patternTree->addPattern(FrequentItemset(p1, s1, NULL), 0); 12 | 13 | // Pattern 2: {1, 2}, support: 2, add this twice. 14 | ItemIDList p2; 15 | p2 << 1 << 2; 16 | SupportCount s2 = 2; 17 | patternTree->addPattern(FrequentItemset(p2, s2, NULL), 0); 18 | patternTree->addPattern(FrequentItemset(p2, s2, NULL), 1); 19 | 20 | // Pattern 3: {1, 4}, support: 5. 21 | ItemIDList p3; 22 | p3 << 1 << 4; 23 | SupportCount s3 = 5; 24 | patternTree->addPattern(FrequentItemset(p3, s3, NULL), 0); 25 | 26 | // Helpful for debugging/expanding this test. 27 | // Currently, this should match: 28 | // (NULL) 29 | // -> ({1}, {}) (0x0001) 30 | // -> ({1, 2}, {Q={2, 2}}) (0x0002) 31 | // -> ({1, 2, 3}, {Q={1}}) (0x0003) 32 | // -> ({1, 4}, {Q={5}}) (0x0004) 33 | //qDebug() << *patternTree; 34 | 35 | // Verify the tree shape. 36 | FPNode * node; 37 | FPNode * root = patternTree->getRoot(); 38 | QCOMPARE(root->getNodeID(), (unsigned int) 0); 39 | QCOMPARE(root->getItemID(), (ItemID) ROOT_ITEMID); 40 | 41 | // First branch. 42 | // root -> ({1}, {}) (0x0001) 43 | node = root->getChild(1); 44 | ItemIDList referencePattern = ItemIDList() << 1; 45 | QVector referenceBuckets = QVector(); 46 | QVERIFY(node != NULL); 47 | QCOMPARE(node->getItemID(), (ItemID) 1); 48 | QCOMPARE(node->getValue().getBuckets(0), referenceBuckets); 49 | QCOMPARE(node->getNodeID(), (unsigned int) 1); 50 | QCOMPARE(PatternTree::getPatternForNode(node), referencePattern); 51 | QCOMPARE(patternTree->getPatternSupport(referencePattern)->getBuckets(0), referenceBuckets); 52 | FPNode * splitNode = node; 53 | // root -> ({1}, {}) (0x0001) -> ({1, 2}, {Q={2, 2}}) (0x0002) 54 | node = node->getChild(2); 55 | referencePattern = ItemIDList() << 1 << 2; 56 | referenceBuckets = QVector() << 2 << 2; 57 | QVERIFY(node != NULL); 58 | QCOMPARE(node->getItemID(), (ItemID) 2); 59 | QCOMPARE(node->getValue().getBuckets(2), referenceBuckets); 60 | QCOMPARE(node->getNodeID(), (unsigned int) 2); 61 | QCOMPARE(PatternTree::getPatternForNode(node), referencePattern); 62 | QCOMPARE(patternTree->getPatternSupport(referencePattern)->getBuckets(2), referenceBuckets); 63 | // root -> ({1}, {}) (0x0001) -> ({1, 2}, {Q={2, 2}}) (0x0002) -> ({1, 2, 3}, {Q={1}}) (0x0003) 64 | node = node->getChild(3); 65 | referencePattern = ItemIDList() << 1 << 2 << 3; 66 | referenceBuckets = QVector() << 1; 67 | QVERIFY(node != NULL); 68 | QCOMPARE(node->getItemID(), (ItemID) 3); 69 | QCOMPARE(node->getValue().getBuckets(1), referenceBuckets); 70 | QCOMPARE(node->getNodeID(), (unsigned int) 3); 71 | QCOMPARE(PatternTree::getPatternForNode(node), referencePattern); 72 | QCOMPARE(patternTree->getPatternSupport(referencePattern)->getBuckets(1), referenceBuckets); 73 | 74 | // Second branch. 75 | // root -> ({1}, {}) (0x0001) -> ({1, 4}, {Q={5}}) (0x0004) 76 | node = splitNode->getChild(4); 77 | referencePattern = ItemIDList() << 1 << 4; 78 | referenceBuckets = QVector() << 5; 79 | QVERIFY(node != NULL); 80 | QCOMPARE(node->getItemID(), (ItemID) 4); 81 | QCOMPARE(node->getValue().getBuckets(1), referenceBuckets); 82 | QCOMPARE(node->getNodeID(), (unsigned int) 4); 83 | QCOMPARE(PatternTree::getPatternForNode(node), referencePattern); 84 | QCOMPARE(patternTree->getPatternSupport(referencePattern)->getBuckets(1), referenceBuckets); 85 | 86 | delete patternTree; 87 | } 88 | 89 | 90 | void TestPatternTree::additionsRemainInSync() { 91 | FPNode::resetLastNodeID(); 92 | PatternTree * patternTree = new PatternTree(); 93 | uint updateID; 94 | 95 | 96 | 97 | // 98 | // Batch 1 (quarter 1). 99 | // 100 | 101 | updateID = 1; 102 | 103 | // Pattern 1: {1, 2, 3}, support: 1. 104 | ItemIDList p1; 105 | p1 << 1 << 2 << 3; 106 | SupportCount s1 = 1; 107 | patternTree->addPattern(FrequentItemset(p1, s1, NULL), updateID); 108 | 109 | 110 | 111 | // 112 | // Batch 2 (quarter 2). 113 | // 114 | 115 | updateID = 2; 116 | patternTree->nextQuarter(); 117 | 118 | // Repeat pattern 1. 119 | patternTree->addPattern(FrequentItemset(p1, s1, NULL), updateID); 120 | 121 | // Pattern 2: {4, 5}, support: 2. 122 | ItemIDList p2; 123 | p2 << 4 << 5; 124 | SupportCount s2 = 2; 125 | patternTree->addPattern(FrequentItemset(p2, s2, NULL), updateID); 126 | 127 | 128 | 129 | // Helpful for debugging/expanding this test. 130 | // Currently, this should match: 131 | // (NULL) 132 | // -> ({1}, {} (lastUpdate=0)) (0x0001) 133 | // -> ({1, 2}, {} (lastUpdate=0)) (0x0002) 134 | // -> ({1, 2, 3}, {Q={1, 1}} (lastUpdate=2)) (0x0003) 135 | // -> ({4}, {} (lastUpdate=0)) (0x0004) 136 | // -> ({4, 5}, {Q={2, 0}} (lastUpdate=2)) (0x0005) 137 | //qDebug() << *patternTree; 138 | 139 | // Verify that the TiltedTimeWindow for the node for the pattern {4, 5} 140 | // has a 0 for the second quarter, which would make it in sync with the 141 | // first pattern, which also has two quarters stored. 142 | FPNode * node = patternTree->getRoot()->getChild(4)->getChild(5); 143 | QVector referenceBuckets = QVector() << 2 << 0; 144 | QCOMPARE(node->getValue().getBuckets(2), referenceBuckets); 145 | } 146 | -------------------------------------------------------------------------------- /code/Analytics/Item.h: -------------------------------------------------------------------------------- 1 | #ifndef ITEM_H 2 | #define ITEM_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef DEBUG 10 | #include 11 | #endif 12 | 13 | 14 | namespace Analytics { 15 | 16 | /** 17 | * Generic data mining types. 18 | */ 19 | // Supports 2^32 *different* items. Upgradable to quint64. 20 | typedef quint32 ItemID; 21 | // Largest supported value for quint32. 22 | #define ROOT_ITEMID 4294967295 23 | typedef QString ItemName; 24 | // Supports 2^32 count. Upgradable to quint64. 25 | typedef quint32 SupportCount; 26 | #define MAX_SUPPORT 4294967295 27 | typedef float Confidence; 28 | typedef QHash ItemIDNameHash; 29 | typedef QHash ItemNameIDHash; 30 | struct Item { 31 | Item() {} 32 | Item(ItemID id) 33 | : id(id), supportCount(1) {} 34 | Item(ItemID id, SupportCount supportCount) 35 | : id(id), supportCount(supportCount) {} 36 | 37 | ItemID id; 38 | /** 39 | * One would not expect SupportCount to be associated with an item. 40 | * However, this allows for cleaner code when building conditional 41 | * FP-trees. More specifically: the prefix paths that 42 | * FPTree::calculatePrefixPaths() returns already include the correct 43 | * SupportCount values (i.e. the number of times that itemset was 44 | * included in all transactions) and can be passed to 45 | * FPTree::addTransaction() *directly*. Otherwise, we'd have to 46 | * repeatedly insert the prefix path, to match the number of times 47 | * that itemset was included in all transactions. 48 | * Each item occurs once in each transaction. Therefor, this defaults 49 | * to 1. 50 | */ 51 | SupportCount supportCount; 52 | 53 | #ifdef DEBUG 54 | Item(ItemID id, ItemIDNameHash * IDNameHash) 55 | : id(id), supportCount(1), IDNameHash(IDNameHash) {} 56 | Item(ItemID id, SupportCount supportCount, ItemIDNameHash * IDNameHash) 57 | : id(id), supportCount(supportCount), IDNameHash(IDNameHash) {} 58 | ItemIDNameHash * IDNameHash; 59 | #endif 60 | }; 61 | inline bool operator==(const Item & i1, const Item & i2) { 62 | // Important! We don't require a match on the supportCount attribute! 63 | return i1.id == i2.id; 64 | } 65 | inline bool operator!=(const Item & i1, const Item & i2) { 66 | return !(i1 == i2); 67 | } 68 | 69 | 70 | /** 71 | * Generic data mining container types. 72 | */ 73 | typedef QList ItemIDList; 74 | typedef QList ItemList; 75 | typedef QList ItemNameList; 76 | typedef QList Transaction; 77 | struct FrequentItemset { 78 | FrequentItemset() : support(0) {} 79 | FrequentItemset(ItemIDList itemset, SupportCount support) 80 | : itemset(itemset), support(support) {} 81 | FrequentItemset(ItemList itemset) { 82 | SupportCount minSupport = MAX_SUPPORT; 83 | foreach (Item item, itemset) { 84 | this->itemset.append(item.id); 85 | minSupport = (item.supportCount < minSupport) ? item.supportCount : minSupport; 86 | } 87 | this->support = minSupport; 88 | } 89 | // This constructor can be used while generating new candidate 90 | // frequent itemsets. 91 | FrequentItemset(ItemID itemID, SupportCount itemIDSupport, const FrequentItemset & suffix) { 92 | this->itemset.append(itemID); 93 | this->itemset.append(suffix.itemset); 94 | this->support = (itemIDSupport < suffix.support || suffix.itemset.isEmpty()) ? itemIDSupport : suffix.support; 95 | } 96 | 97 | ItemIDList itemset; 98 | SupportCount support; 99 | 100 | #ifdef DEBUG 101 | FrequentItemset(ItemIDList itemset, SupportCount support, ItemIDNameHash * IDNameHash) 102 | : itemset(itemset), support(support), IDNameHash(IDNameHash) {} 103 | 104 | ItemIDNameHash * IDNameHash; 105 | #endif 106 | }; 107 | inline bool operator==(const FrequentItemset & fis1, const FrequentItemset & fis2) { 108 | // Important! We don't require a match on the supportCount attribute! 109 | return fis1.support == fis2.support && fis1.itemset == fis2.itemset; 110 | } 111 | inline bool operator!=(const FrequentItemset & fis1, const FrequentItemset & fis2) { 112 | return !(fis1 == fis2); 113 | } 114 | struct AssociationRule { 115 | AssociationRule() {} 116 | AssociationRule(ItemIDList antecedent, ItemIDList consequent, SupportCount support, Confidence confidence) 117 | : antecedent(antecedent), consequent(consequent), support(support), confidence(confidence) {} 118 | 119 | ItemIDList antecedent; 120 | ItemIDList consequent; 121 | SupportCount support; 122 | Confidence confidence; 123 | 124 | #ifdef DEBUG 125 | ItemIDNameHash * IDNameHash; 126 | #endif 127 | }; 128 | uint qHash(const AssociationRule & r); 129 | inline bool operator==(const AssociationRule & r1, const AssociationRule & r2) { 130 | // Important! We don't require a match on the support and consequent attributes! 131 | return r1.antecedent == r2.antecedent && r1.consequent == r2.consequent; 132 | } 133 | inline bool operator!=(const AssociationRule & r1, const AssociationRule & r2) { 134 | return !(r1 == r2); 135 | } 136 | 137 | void registerBasicMetaTypes(); 138 | 139 | #ifdef DEBUG 140 | // QDebug() streaming output operators. 141 | QDebug operator<<(QDebug dbg, const Item & item); 142 | QDebug operator<<(QDebug dbg, const ItemIDList & pattern); 143 | QDebug operator<<(QDebug dbg, const Transaction & transaction); 144 | QDebug operator<<(QDebug dbg, const FrequentItemset & frequentItemset); 145 | QDebug operator<<(QDebug dbg, const AssociationRule & associationRule); 146 | QDebug itemIDHelper(QDebug dbg, const ItemIDList & itemset, ItemIDNameHash const * const IDNameHash); 147 | #endif 148 | 149 | } 150 | 151 | Q_DECLARE_METATYPE(Analytics::ItemIDList); 152 | Q_DECLARE_METATYPE(Analytics::FrequentItemset); 153 | 154 | 155 | #endif // ITEM_H 156 | -------------------------------------------------------------------------------- /code/UI/MainWindow.h: -------------------------------------------------------------------------------- 1 | #ifndef MAINWINDOW_H 2 | #define MAINWINDOW_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include 40 | #include 41 | #include 42 | 43 | #include "ConceptHierarchyCompleter.h" 44 | #include "CausesTableFilterProxyModel.h" 45 | #include "SettingsDialog.h" 46 | 47 | #include "../EpisodesParser/Parser.h" 48 | #include "../Analytics/Analyst.h" 49 | #include "../Analytics/TiltedTimeWindow.h" 50 | 51 | 52 | #define STATS_ITEM_ESTIMATED_AVG_BYTES 20 * 4 53 | #define STATS_TILTED_TIME_WINDOW_BYTES 292 54 | #define STATS_FPNODE_FIXED_OVERHEAD_BYTES 12 55 | #define STATS_FPNODE_ESTIMATED_CHILDREN_AVG_BYTES 3 * 4 56 | 57 | 58 | class MainWindow : public QMainWindow { 59 | 60 | Q_OBJECT 61 | 62 | public: 63 | explicit MainWindow(QWidget * parent = 0); 64 | ~MainWindow(); 65 | 66 | signals: 67 | void parse(QString file); 68 | void mine(uint from, uint to); 69 | void mineAndCompare(uint fromOlder, uint toOlder, uint fromNewer, uint toNewer); 70 | 71 | public slots: 72 | // Parser. 73 | void wakeParser(); 74 | void updateParsingStatus(bool parsing); 75 | void updateParsingDuration(int duration); 76 | 77 | // Analyst: analyzing. 78 | void updateAnalyzingStatus(bool analyzing, Time start, Time end, int numPageViews, int numTransactions); 79 | void updateAnalyzingDuration(int duration); 80 | void updateAnalyzingStats(Time start, Time end, int pageViews, int transactions, int uniqueItems, int frequentItems, int patternTreeSize); 81 | 82 | // Analyst: mining. 83 | void updateMiningStatus(bool mining); 84 | void updateMiningDuration(int duration); 85 | void minedRules(uint from, uint to, QList associationRules, Analytics::SupportCount eventsInTimeRange); 86 | void comparedMinedRules(uint fromOlder, uint toOlder, 87 | uint fromNewer, uint toNewer, 88 | QList intersectedRules, 89 | QList olderRules, 90 | QList newerRules, 91 | QList comparedRules, 92 | QList confidenceVariance, 93 | QList supportVariance, 94 | Analytics::SupportCount eventsInIntersectedTimeRange, 95 | Analytics::SupportCount eventsInOlderTimeRange, 96 | Analytics::SupportCount eventsInNewerTimeRange); 97 | 98 | protected slots: 99 | void causesActionChanged(int action); 100 | void causesTimerangeChanged(); 101 | void causesFilterChanged(QString filterString); 102 | 103 | void importFile(); 104 | void settingsDialog(); 105 | 106 | private: 107 | // Logic. 108 | void initLogic(); 109 | void connectLogic(); 110 | void assignLogicToThreads(); 111 | 112 | // UI set-up. 113 | void initUI(); 114 | void createSparklineGroupbox(); 115 | void createStatsGroupbox(); 116 | void createCausesGroupbox(); 117 | void createStatusGroupbox(); 118 | void createMenuBar(); 119 | void connectUI(); 120 | 121 | // UI updating. 122 | void updateStatus(const QString & status = QString::null); 123 | void updateCausesComparisonAbility(bool able); 124 | void mineOrCompare(); 125 | static QPair mapTimerangeChoiceToBucket(int choice); 126 | 127 | // Logic. 128 | EpisodesParser::Parser * parser; 129 | Analytics::Analyst * analyst; 130 | QThread parserThread; 131 | QThread analystThread; 132 | 133 | // Stats. 134 | QMutex statusMutex; 135 | bool parsing; 136 | int patternTreeSize; 137 | Time startTime; 138 | Time endTime; 139 | int totalPageViews; 140 | int totalTransactions; 141 | int totalPatternsExaminedWhileMining; 142 | int totalParsingDuration; 143 | int totalAnalyzingDuration; 144 | int totalMiningDuration; 145 | 146 | // Major widgets. 147 | QVBoxLayout * mainLayout; 148 | 149 | // Sparkline groupbox. 150 | QGroupBox * sparklineGroupbox; 151 | QLabel * label; 152 | 153 | // Stats groupbox. 154 | QGroupBox * statsGroupbox; 155 | QComboBox * statsEpisodeComboBox; 156 | QComboBox * statsLocationComboBox; 157 | 158 | // Causes groupbox. 159 | QGroupBox * causesGroupbox; 160 | QComboBox * causesActionChoice; 161 | QComboBox * causesMineTimerangeChoice; 162 | QLabel * causesCompareLabel; 163 | QComboBox * causesCompareTimerangeChoice; 164 | QLineEdit * causesFilter; 165 | ConceptHierarchyCompleter * causesFilterCompleter; 166 | QLabel * causesDescription; 167 | QTableView * causesTable; 168 | QStandardItemModel * causesTableModel; 169 | CausesTableFilterProxyModel * causesTableProxyModel; 170 | 171 | // Status groupbox. 172 | QGroupBox * statusGroupbox; 173 | QLabel * statusCurrentlyProcessing; 174 | QLabel * status_measurements_startDate; 175 | QLabel * status_measurements_endDate; 176 | QLabel * status_measurements_pageViews; 177 | QLabel * status_measurements_episodes; 178 | QLabel * status_performance_parsing; 179 | QLabel * status_performance_analyzing; 180 | QLabel * status_performance_mining; 181 | QLabel * status_mining_uniqueItems; 182 | QLabel * status_mining_frequentItems; 183 | QLabel * status_mining_patternTree; 184 | 185 | // Menu bar. 186 | QMenu * menuFile; 187 | QAction * menuFileImport; 188 | QAction * menuFileSettings; 189 | }; 190 | 191 | #endif // MAINWINDOW_H 192 | -------------------------------------------------------------------------------- /code/Analytics/Tests/TestTiltedTimeWindow.cpp: -------------------------------------------------------------------------------- 1 | #include "TestTiltedTimeWindow.h" 2 | 3 | 4 | /** 5 | * IMPORTANT NOTE: 6 | * Each time when the SupportCount -1 is being used in this test, it's meant 7 | * to be TTW_BUCKET_UNUSED. However, for legibility purposes, I've opted to 8 | * directly write -1 instead. 9 | */ 10 | void TestTiltedTimeWindow::basic() { 11 | TiltedTimeWindow * ttw = new TiltedTimeWindow(); 12 | 13 | QList supportCounts; 14 | // First hour: first four quarters. 15 | supportCounts << 45 << 67 << 88 << 93; 16 | // Second hour. 17 | supportCounts << 34 << 49 << 36 << 97; 18 | // Third hour. 19 | supportCounts << 50 << 50 << 50 << 50; 20 | // Hours 4-23. 21 | for (int i = 3; i <= 23; i++) 22 | supportCounts << 25 << 25 << 25 << 25; 23 | // First quarter of second day to provide tipping point: now the 24 24 | // hour buckets are all filled. 25 | supportCounts << 10; 26 | // Four more quarters, meaning that the first hour of the second day 27 | // will be completed *and* another quarter is added, which will provide 28 | // the tipping point to fill the first day bucket. 29 | supportCounts << 10 << 10 << 10 << 20; 30 | // And finally, four more quarters, which will ensure there are 2 hours 31 | // of the second day. 32 | supportCounts << 20 << 20 << 20 << 30; 33 | 34 | // First hour. 35 | for (int i = 0; i < 4; i++) 36 | ttw->appendQuarter(supportCounts[i], i); 37 | QCOMPARE(ttw->getBuckets(4), QVector() << 93 << 88 << 67 << 45); 38 | QCOMPARE(ttw->oldestBucketFilled, 3); 39 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 3); 40 | 41 | // Second hour. 42 | for (int i = 4; i < 8; i++) 43 | ttw->appendQuarter(supportCounts[i], i); 44 | QCOMPARE(ttw->getBuckets(5), QVector() << 97 << 36 << 49 << 34 45 | << 293); 46 | QCOMPARE(ttw->oldestBucketFilled, 4); 47 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 7); 48 | 49 | // Third hour. 50 | for (int i = 8; i < 12; i++) 51 | ttw->appendQuarter(supportCounts[i], i); 52 | QCOMPARE(ttw->getBuckets(6), QVector() << 50 << 50 << 50 << 50 53 | << 216 << 293); 54 | QCOMPARE(ttw->oldestBucketFilled, 5); 55 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 11); 56 | 57 | // Hours 4-23. 58 | for (int i = 12; i < 96 ; i++) 59 | ttw->appendQuarter(supportCounts[i], i); 60 | QCOMPARE(ttw->getBuckets(28), QVector() << 25 << 25 << 25 << 25 61 | << 100 << 100 << 100 62 | << 100 << 100 << 100 63 | << 100 << 100 << 100 64 | << 100 << 100 << 100 65 | << 100 << 100 << 100 66 | << 100 << 100 << 100 67 | << 100 << 100 << 200 68 | << 216 << 293 << -1); 69 | QCOMPARE(ttw->oldestBucketFilled, 26); 70 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 95); 71 | 72 | // First quarter of second day to provide tipping point: now the 24 73 | // hour buckets are all filled. 74 | ttw->appendQuarter(supportCounts[96], 96); 75 | QCOMPARE(ttw->getBuckets(28), QVector() << 10 << -1 << -1 << -1 76 | << 100 << 100 << 100 77 | << 100 << 100 << 100 78 | << 100 << 100 << 100 79 | << 100 << 100 << 100 80 | << 100 << 100 << 100 81 | << 100 << 100 << 100 82 | << 100 << 100 << 100 83 | << 200 << 216 << 293); 84 | QCOMPARE(ttw->oldestBucketFilled, 27); 85 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 96); 86 | 87 | // Four more quarters, meaning that the first hour of the second day 88 | // will be completed *and* another quarter is added, which will provide 89 | // the tipping point to fill the first day bucket. 90 | for (int i = 97; i < 101 ; i++) 91 | ttw->appendQuarter(supportCounts[i], i); 92 | QCOMPARE(ttw->getBuckets(29), QVector() << 20 << -1 << -1 << -1 93 | << 40 << -1 << -1 94 | << -1 << -1 << -1 95 | << -1 << -1 << -1 96 | << -1 << -1 << -1 97 | << -1 << -1 << -1 98 | << -1 << -1 << -1 99 | << -1 << -1 << -1 100 | << -1 << -1 << -1 101 | << 2809); // 2809 = 21*100 + 200 + 216 + 293 102 | QCOMPARE(ttw->oldestBucketFilled, 28); 103 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 100); 104 | 105 | // Four more quarters, meaning that the second hour of the second day will 106 | // be completed. This is a test to check if the "oldestBucketFilled" 107 | // variable updates correctly: it should remain set to 28, and should not 108 | // be reset to 5. Since the second hour is added (which means the first 109 | // hour shifts from bucket 4 to bucket 5), this is a logic edge case that 110 | // may be expected. 111 | for (int i = 101; i < 105; i++) 112 | ttw->appendQuarter(supportCounts[i], i); 113 | QCOMPARE(ttw->oldestBucketFilled, 28); 114 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 104); 115 | 116 | // Drop tail starting at Granularity 1. This means only the value in the 117 | // first granularity (buckets 0, 1, 2 and 3) are kept, and all subsequent 118 | // granularities (and buckets) are reset. 119 | ttw->dropTail((Granularity) 1); 120 | QVector buckets = ttw->getBuckets(); 121 | QCOMPARE(buckets[0], (SupportCount) 30); 122 | for (int i = 1; i < TTW_NUM_BUCKETS; i++) 123 | QCOMPARE(buckets[i], (SupportCount) -1); 124 | QCOMPARE(ttw->oldestBucketFilled, 3); 125 | QCOMPARE(ttw->getLastUpdate(), (unsigned int) 104); 126 | 127 | delete ttw; 128 | } 129 | -------------------------------------------------------------------------------- /code/Analytics/PatternTree.cpp: -------------------------------------------------------------------------------- 1 | #include "PatternTree.h" 2 | 3 | namespace Analytics { 4 | 5 | //------------------------------------------------------------------------ 6 | // Public methods. 7 | 8 | PatternTree::PatternTree() { 9 | this->root = new FPNode(ROOT_ITEMID); 10 | this->nodeCount = 0; 11 | this->currentQuarter = 0; 12 | } 13 | 14 | PatternTree::~PatternTree() { 15 | delete root; 16 | } 17 | 18 | TiltedTimeWindow * PatternTree::getPatternSupport(const ItemIDList & pattern) const { 19 | return this->root->findNodeByPattern(pattern); 20 | } 21 | 22 | /** 23 | * Get the frequent itemsets that match given constraints for a range of 24 | * buckets in the TiltedTimeWindows in this PatternTree. 25 | * 26 | * @param minSupport 27 | * The minimum support that the itemset must have over the given range 28 | * to qualify as "frequent". 29 | * @param frequentItemsetConstraints 30 | * The constraints that frequent itemsets must match. 31 | * @param from 32 | * The range starts at this bucket. 33 | * @param to 34 | * The range starts at this bucket. 35 | * @param prefix 36 | * Internal parameter (for recursive calls). 37 | * @param node 38 | * Internal parameter (for recursive calls). 39 | * @return 40 | * The frequent itemsets over the given range that match the given 41 | * constraints. 42 | */ 43 | QList PatternTree::getFrequentItemsetsForRange(SupportCount minSupport, const Constraints & frequentItemsetConstraints, uint from, uint to, const ItemIDList & prefix, FPNode * node) const { 44 | QList frequentItemsets; 45 | FrequentItemset frequentItemset; 46 | 47 | // Start at the root. 48 | if (node == NULL) 49 | node = this->root; 50 | // If it's not the root node, set the current frequent itemset. 51 | else { 52 | frequentItemset.itemset = prefix; 53 | frequentItemset.itemset.append(node->getItemID()); 54 | frequentItemset.support = node->getValue().getSupportForRange(from, to); 55 | #ifdef DEBUG 56 | frequentItemset.IDNameHash = node->itemIDNameHash; 57 | #endif 58 | } 59 | 60 | // Add this frequent itemset to the list of frequent itemsets if 61 | // it qualifies through its support and if it matches the 62 | // constraints. 63 | if (frequentItemset.support > minSupport && frequentItemsetConstraints.matchItemset(frequentItemset.itemset)) 64 | frequentItemsets.append(frequentItemset); 65 | 66 | // Recursive call for each child node of the current node. 67 | foreach (FPNode * child, node->getChildren()) { 68 | frequentItemsets.append(this->getFrequentItemsetsForRange( 69 | minSupport, 70 | frequentItemsetConstraints, 71 | from, 72 | to, 73 | frequentItemset.itemset, 74 | child 75 | )); 76 | } 77 | 78 | return frequentItemsets; 79 | } 80 | 81 | void PatternTree::addPattern(const FrequentItemset & pattern, quint32 updateID) { 82 | // The initial current node is the root node. 83 | FPNode * currentNode = root; 84 | FPNode * nextNode; 85 | 86 | foreach (ItemID itemID, pattern.itemset) { 87 | if (currentNode->hasChild(itemID)) 88 | nextNode = currentNode->getChild(itemID); 89 | else { 90 | // Create a new node and add it as a child of the current node. 91 | nextNode = new FPNode(itemID); 92 | this->nodeCount++; 93 | nextNode->setParent(currentNode); 94 | #ifdef DEBUG 95 | nextNode->itemIDNameHash = pattern.IDNameHash; 96 | #endif 97 | } 98 | 99 | // We've processed this item in the transaction, time to move on 100 | // to the next! 101 | currentNode = nextNode; 102 | nextNode = NULL; 103 | } 104 | 105 | TiltedTimeWindow * ttw = currentNode->getPointerToValue(); 106 | 107 | // Make sure the quarters are in sync. 108 | for (uint i = ttw->getCapacityUsed(GRANULARITY_QUARTER); i < this->currentQuarter; i++) 109 | ttw->appendQuarter(0, updateID); 110 | 111 | // Now that the quarters are in sync, finally append the quarter. 112 | ttw->appendQuarter(pattern.support, updateID); 113 | } 114 | 115 | void PatternTree::removePattern(FPNode * const node) { 116 | this->nodeCount -= (1 + node->getNumDescendants()); 117 | delete node; 118 | } 119 | 120 | 121 | //------------------------------------------------------------------------ 122 | // Static public methods. 123 | 124 | ItemIDList PatternTree::getPatternForNode(FPNode const * const node) { 125 | ItemIDList pattern; 126 | FPNode const * nextNode; 127 | 128 | nextNode = node; 129 | while (nextNode->getItemID() != ROOT_ITEMID) { 130 | pattern.prepend(nextNode->getItemID()); 131 | nextNode = nextNode->getParent(); 132 | } 133 | 134 | return pattern; 135 | } 136 | 137 | 138 | //------------------------------------------------------------------------ 139 | // Other. 140 | 141 | #ifdef DEBUG 142 | QDebug operator<<(QDebug dbg, const PatternTree & tree) { 143 | dbg.nospace() << dumpHelper(*(tree.getRoot())).toStdString().c_str(); 144 | 145 | return dbg.nospace(); 146 | } 147 | 148 | QString dumpHelper(const FPNode & node, QString prefix) { 149 | static QString suffix = "\t"; 150 | QString s; 151 | bool firstChild = true; 152 | 153 | // Print current node. 154 | QDebug(&s) << node << "\n"; 155 | 156 | // Print all child nodes. 157 | if (node.numChildren() > 0) { 158 | foreach (FPNode * child, node.getChildren()) { 159 | if (firstChild) 160 | s += prefix; 161 | else 162 | firstChild = false; 163 | s += "-> " + dumpHelper(*child, prefix + suffix); 164 | } 165 | } 166 | 167 | return s; 168 | } 169 | 170 | QDebug operator<<(QDebug dbg, const FPNode & node) { 171 | if (node.getItemID() == ROOT_ITEMID) 172 | dbg.nospace() << "(NULL)"; 173 | else { 174 | QString nodeID; 175 | 176 | ItemIDList pattern = PatternTree::getPatternForNode(&node); 177 | nodeID.sprintf("0x%04d", node.getNodeID()); 178 | 179 | dbg.nospace() << "({"; 180 | itemIDHelper(dbg, pattern, node.itemIDNameHash); 181 | dbg.nospace() << "}, " << node.getValue() << ") (" << nodeID.toStdString().c_str() << ")"; 182 | } 183 | 184 | return dbg.nospace(); 185 | } 186 | #endif 187 | 188 | } 189 | -------------------------------------------------------------------------------- /code/EpisodesParser/typedefs.h: -------------------------------------------------------------------------------- 1 | #ifndef TYPEDEFS_H 2 | #define TYPEDEFS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #ifdef DEBUG 13 | #include 14 | #endif 15 | 16 | 17 | namespace EpisodesParser { 18 | 19 | 20 | typedef uint Time; 21 | 22 | // Efficient storage of Episode names: don't store the actual names, use 23 | // 8-bit IDs instead. This allows for 256 different Episode names, which 24 | // should be more than sufficient. 25 | typedef QString EpisodeName; 26 | typedef quint8 EpisodeID; 27 | typedef QHash EpisodeNameIDHash; 28 | typedef QHash EpisodeIDNameHash; 29 | 30 | // Store Episode durations as 16-bit uints. 31 | typedef quint16 EpisodeDuration; 32 | // The EpisodeDuration will be discretized to an EpisodeSpeed for association 33 | // rule mining. 34 | typedef QString EpisodeSpeed; 35 | 36 | struct Episode { 37 | Episode() {} 38 | Episode(EpisodeID id, EpisodeDuration duration) : id(id), duration(duration) {} 39 | 40 | EpisodeID id; 41 | EpisodeDuration duration; 42 | #ifdef DEBUG 43 | EpisodeIDNameHash * IDNameHash; 44 | #endif 45 | }; 46 | inline bool operator==(const Episode &e1, const Episode &e2) { 47 | return e1.id == e2.id && e1.duration == e2.duration; 48 | } 49 | typedef QList EpisodeList; 50 | 51 | // 510 is the highest HTTP status code, so 9 bits would be sufficient, but 52 | // that's not possible, so we use 16 bits instead. 53 | typedef quint16 HTTPStatus; 54 | 55 | typedef QString URL; 56 | typedef QString UA; 57 | 58 | // Efficient storage of domain names. 59 | typedef QString DomainName; 60 | typedef quint8 DomainID; 61 | typedef QHash DomainNameIDHash; 62 | typedef QHash DomainIDNameHash; 63 | struct Domain { 64 | DomainID id; 65 | // TODO: allow multiple domains to be analyzed as one whole by providing 66 | // a common identifier. 67 | #ifdef DEBUG 68 | EpisodeIDNameHash * IDNameHash; 69 | #endif 70 | }; 71 | 72 | // Parsed raw line from Episodes log file: no processing applied whatsoever. 73 | struct EpisodesLogLine { 74 | QHostAddress ip; 75 | Time time; 76 | EpisodeList episodes; 77 | HTTPStatus status; 78 | URL url; 79 | UA ua; 80 | Domain domain; 81 | #ifdef DEBUG 82 | EpisodeIDNameHash * episodeIDNameHash; 83 | DomainIDNameHash * domainIDNameHash; 84 | #endif 85 | }; 86 | 87 | 88 | 89 | struct Location{ 90 | QString continent; 91 | QString country; 92 | QString region; 93 | QString city; 94 | QString isp; 95 | 96 | // @TODO this is a likely performance bottleneck. 97 | // @TRICKY: Note that we don't check continent and country, we assume each 98 | // (region, city, isp) tuple is unique on its own! 99 | bool operator==(const Location & other) const { 100 | return (this->region == other.region 101 | && this->city == other.city 102 | && this->isp == other.isp); 103 | } 104 | 105 | /** 106 | * Generate the items for this Location, to allow for association rule 107 | * mining. This takes the concept hierarchy into account. 108 | * 109 | * @return 110 | * The association rule items for this Location. 111 | */ 112 | QStringList generateAssociationRuleItems() const { 113 | static const QString prefix = "location:"; 114 | static const QString s = ":"; 115 | 116 | return QStringList() // Granular locations. 117 | << prefix + this->continent 118 | << prefix + this->continent + s + this->country 119 | << prefix + this->continent + s + this->country + s + this->region 120 | // ISP per country (global ISP does not make sense). 121 | << "isp" + s + this->country + s + this->isp; 122 | } 123 | }; 124 | typedef quint32 LocationID; 125 | typedef QHash LocationToIDHash; 126 | typedef QHash LocationFromIDHash; 127 | uint qHash(const Location & location); 128 | 129 | struct UAHierarchyDetails { 130 | // OS details. 131 | QString platform; 132 | // Browser details. 133 | QString browser_name; 134 | QString browser_version; 135 | quint16 browser_version_major; 136 | quint16 browser_version_minor; 137 | bool is_mobile; 138 | 139 | // @TODO this is a likely performance bottleneck. 140 | bool operator==(const UAHierarchyDetails & other) const { 141 | return (this->platform == other.platform && this->browser_name == other.browser_name && this->browser_version == other.browser_version); 142 | } 143 | 144 | /** 145 | * Generate the items for this UA, to allow for association rule 146 | * mining. This takes the concept hierarchy into account. 147 | * 148 | * @return 149 | * The association rule items for this Location. 150 | */ 151 | QStringList generateAssociationRuleItems() const { 152 | static const QString prefix = "ua:"; 153 | static const QString s = ":"; 154 | 155 | const QString browser_version_major = QString::number(this->browser_version_major); 156 | const QString browser_version_minor = QString::number(this->browser_version_minor); 157 | 158 | QStringList items; 159 | 160 | // Platform-specific browsers. 161 | items << prefix + this->platform 162 | << prefix + this->platform + s + this->browser_name 163 | << prefix + this->platform + s + this->browser_name + s + browser_version_major 164 | << prefix + this->platform + s + this->browser_name + s + browser_version_major + s + browser_version_minor; 165 | 166 | // Mobile or not. 167 | if (this->is_mobile) 168 | items << prefix + "isMobile"; 169 | 170 | return items; 171 | } 172 | }; 173 | typedef quint16 UAHierarchyID; 174 | typedef QHash UAHierarchyDetailsIDHash; 175 | typedef QHash UAHierarchyIDDetailsHash; 176 | uint qHash(const UAHierarchyDetails & ua); 177 | 178 | struct ExpandedEpisodesLogLine { 179 | LocationID location; 180 | Time time; 181 | EpisodeList episodes; 182 | HTTPStatus status; 183 | URL url; 184 | UAHierarchyID ua; 185 | 186 | LocationFromIDHash * locationFromIDHash; 187 | UAHierarchyIDDetailsHash * uaHierarchyIDDetailsHash; 188 | #ifdef DEBUG 189 | EpisodeIDNameHash * episodeIDNameHash; 190 | #endif 191 | }; 192 | 193 | 194 | 195 | 196 | 197 | #ifdef DEBUG 198 | // QDebug() streaming output operators. 199 | QDebug operator<<(QDebug dbg, const Episode & episode); 200 | QDebug operator<<(QDebug dbg, const Domain & domain); 201 | QDebug operator<<(QDebug dbg, const EpisodesLogLine & episodesLogLine); 202 | QDebug operator<<(QDebug dbg, const Location & location); 203 | QDebug operator<<(QDebug dbg, const UAHierarchyDetails & ua); 204 | QDebug operator<<(QDebug dbg, const ExpandedEpisodesLogLine & episodesLogLine); 205 | #endif 206 | 207 | 208 | 209 | } 210 | 211 | // Register metatypes to allow these types to be streamed in QTests. 212 | Q_DECLARE_METATYPE(EpisodesParser::EpisodeList) 213 | Q_DECLARE_METATYPE(EpisodesParser::Episode) 214 | 215 | #endif // TYPEDEFS_H 216 | -------------------------------------------------------------------------------- /code/EpisodesParser/Tests/TestParser.cpp: -------------------------------------------------------------------------------- 1 | #include "TestParser.h" 2 | 3 | void TestParser::init() { 4 | QFile logFile("episodes.log"); 5 | if (!logFile.open(QIODevice::WriteOnly | QIODevice::Text | QIODevice::Truncate)) 6 | QFAIL("Could not create sample Episodes log file."); 7 | 8 | QTextStream out(&logFile); 9 | out << "218.56.155.59 [Sunday, 14-Nov-2010 06:27:03 +0100] \"?ets=css:203,headerjs:94,footerjs:500,domready:843,tabs:110,ToThePointShowHideChangelog:15,DrupalBehaviors:141,frontend:1547\" 200 \"http://driverpacks.net/driverpacks/windows/xp/x86/chipset/10.09\" \"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\" \"driverpacks.net\"" << "\n" 10 | << "190.166.203.6 [Sunday, 14-Nov-2010 06:27:06 +0100] \"?ets=css:0,headerjs:588,footerjs:61,domready:680,tabs:1,ToThePointShowHideChangelog:0,DrupalBehaviors:1,frontend:998\" 200 \"http://driverpacks.net/\" \"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7\" \"driverpacks.net\"" << "\n" 11 | << "76.170.154.29 [Sunday, 14-Nov-2010 06:27:08 +0100] \"?ets=css:0,headerjs:41,footerjs:0,domready:822,tabs:1,ToThePointShowHideChangelog:0,DrupalBehaviors:1,frontend:990\" 200 \"http://driverpacks.net/driverpacks/windows/7/x86/graphics-b/10.07\" \"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12\" \"driverpacks.net\"" << "\n" 12 | << "76.170.154.29 [Sunday, 14-Nov-2010 06:27:11 +0100] \"?ets=backend:439,css:61,headerjs:162,footerjs:0,domready:318,tabs:1,tableHeader:16,ToThePointShowHideChangelog:0,DrupalBehaviors:17,pageready:775,frontend:336,totaltime:775\" 200 \"http://driverpacks.net/driverpacks/windows/7/x86/graphics-b/10.07/drivers\" \"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12\" \"driverpacks.net\"" << "\n" 13 | << "76.170.154.29 [Sunday, 14-Nov-2010 06:27:12 +0100] \"?ets=css:40,headerjs:90,footerjs:0,domready:223,tabs:1,ToThePointShowHideChangelog:0,DrupalBehaviors:1,frontend:382\" 200 \"http://driverpacks.net/driverpacks/windows/7/x86/graphics-b/10.07\" \"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.12) Gecko/20101026 Firefox/3.6.12\" \"driverpacks.net\"" << "\n"; 14 | 15 | logFile.close(); 16 | } 17 | 18 | void TestParser::cleanup() { 19 | QFile logFile("episodes.log"); 20 | if (!logFile.remove()) 21 | QFAIL("Could not delete sample Episodes log file."); 22 | } 23 | 24 | void TestParser::parse() { 25 | Parser parser; 26 | 27 | QVERIFY(parser.parse("episodes.log") == 5); 28 | } 29 | 30 | void TestParser::mapLineToEpisodesLogLine_data() { 31 | QTest::addColumn("line"); 32 | QTest::addColumn("ip"); 33 | QTest::addColumn