├── .gitignore ├── TripleBitQuery ├── module.mk └── TripleBitQuery.cpp ├── Query ├── queryLUBM8 ├── dblpQuery5 ├── queryLUBM1 ├── dblpQuery2 ├── dblpQuery3 ├── queryLUBM7 ├── dblpQuery4 ├── queryLUBM2 ├── queryLUBM4 ├── dblpQuery1 ├── queryLUBM6 ├── queryUniprot2 ├── queryLUBM5 ├── queryUniprot6 ├── queryLUBM3 ├── queryUniprot8 ├── queryBTC5 ├── queryBTC2 ├── queryBTC1 ├── queryUniprot5 ├── queryUniprot7 ├── queryBTC6 ├── queryUniprot4 ├── queryUniprot1 ├── queryBTC3 ├── queryBTC8 ├── queryBTC4 ├── queryBTC7 └── queryUniprot3 ├── BuildTripleBitFromN3 ├── module.mk └── BuildTripleBit.cpp ├── BuildTripleBitFromRDF ├── module.mk └── BuildTripleBit.cpp ├── TripleBit ├── module.mk ├── IRepository.cpp ├── MessageEngine.h ├── Sorter.h ├── OSFile.h ├── RDFParser.h ├── QuerySemanticAnalysis.h ├── BitmapWAH.h ├── MessageEngine.cpp ├── TimeStamp.h ├── RDFQuery.h ├── MMapBuffer.h ├── util │ ├── BufferManager.h │ ├── SortMergeJoin.h │ ├── HashJoin.h │ ├── BufferManager.cpp │ ├── FindEntityID.h │ └── SortMergeJoin.cpp ├── RDFParser.cpp ├── URITable.h ├── PredicateTable.h ├── OSFile.cpp ├── SynchronousBuffer.h ├── PlanGenerator.h ├── IRepository.h ├── SPARQLLexer.h ├── HashIndex.h ├── LineHashIndex.h ├── ThreadPool.h ├── Hash.h ├── SynchronousBuffer.cpp ├── MemoryBuffer.h ├── BitVectorWAH.h ├── TripleBitBuilder.h ├── TripleBitRepository.h ├── StringIDSegment.h ├── ObjectPool.cpp ├── TempFile.h ├── BitmapWAH.cpp ├── EntityIDBuffer.h ├── TripleBitQuery.h ├── RDFQuery.cpp ├── TurtleParser.h ├── SPARQLParser.h ├── MMapBuffer.cpp ├── URITable.cpp ├── StatisticsBuffer.h ├── ThreadPool.cpp ├── MemoryBuffer.cpp ├── PredicateTable.cpp ├── SPARQLLexer.cpp ├── Sorter.cpp ├── TripleBitQueryGraph.cpp ├── TripleBit.h └── TripleBitQueryGraph.h ├── README └── Makefile /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | -------------------------------------------------------------------------------- /TripleBitQuery/module.mk: -------------------------------------------------------------------------------- 1 | local_src := $(wildcard $(subdirectory)/*.cpp) 2 | 3 | $(eval $(call make-program,triplebitQuery,libtriplebit.a,$(local_src))) 4 | 5 | $(eval $(call compile-rules)) 6 | -------------------------------------------------------------------------------- /Query/queryLUBM8: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y ?z WHERE { 4 | ?x ?y ?z . 5 | } 6 | -------------------------------------------------------------------------------- /BuildTripleBitFromN3/module.mk: -------------------------------------------------------------------------------- 1 | local_src := $(wildcard $(subdirectory)/*.cpp) 2 | 3 | $(eval $(call make-program,buildTripleBitFromN3,libtriplebit.a,$(local_src))) 4 | 5 | $(eval $(call compile-rules)) 6 | -------------------------------------------------------------------------------- /BuildTripleBitFromRDF/module.mk: -------------------------------------------------------------------------------- 1 | local_src := $(wildcard $(subdirectory)/*.cpp) 2 | 3 | $(eval $(call make-program,buildTripleBitFromRDF,libtriplebit.a,$(local_src))) 4 | 5 | $(eval $(call compile-rules)) 6 | -------------------------------------------------------------------------------- /Query/dblpQuery5: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX Semrex: 3 | PREFIX rdfs: 4 | select ?y ?z where 5 | { 6 | ?y rdfs:label "Semantic Web". 7 | ?y Semrex:hasAbstract ?z . 8 | } 9 | -------------------------------------------------------------------------------- /Query/queryLUBM1: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x WHERE 4 | { 5 | ?x ub:subOrganizationOf . 6 | ?x rdf:type ub:ResearchGroup . 7 | } 8 | -------------------------------------------------------------------------------- /TripleBit/module.mk: -------------------------------------------------------------------------------- 1 | sub_dir := $(call source-dir-to-binary-dir, TripleBit/util) 2 | $(shell $(MKDIR) $(sub_dir)) 3 | 4 | local_src := $(wildcard $(subdirectory)/*.cpp) $(wildcard $(subdirectory)/util/*.cpp) 5 | 6 | $(eval $(call make-library,libtriplebit.a,$(local_src))) 7 | 8 | $(eval $(call compile-rules)) 9 | -------------------------------------------------------------------------------- /Query/dblpQuery2: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX Semrex: 3 | PREFIX rdfs: 4 | select ?y ?z where 5 | { 6 | ?x rdfs:label "Jeffrey D. Ullman" . 7 | ?y Semrex:hasAuthor ?x . 8 | ?y rdfs:label ?z . 9 | } 10 | -------------------------------------------------------------------------------- /Query/dblpQuery3: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX Semrex: 3 | PREFIX rdfs: 4 | select ?y ?z where 5 | { 6 | ?x rdfs:label "Jeffrey D. Ullman" . 7 | ?y Semrex:hasAuthor ?x . 8 | ?y Semrex:year ?z . 9 | } 10 | -------------------------------------------------------------------------------- /Query/queryLUBM7: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y WHERE { 4 | ?y ub:subOrganizationOf . 5 | ?y rdf:type ub:Department . 6 | ?x ub:worksFor ?y . 7 | ?x rdf:type ub:FullProfessor . 8 | } 9 | -------------------------------------------------------------------------------- /Query/dblpQuery4: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX semrex: 3 | PREFIX rdfs: 4 | SELECT ?paper ?paperTitle WHERE 5 | { 6 | ?pRef rdfs:label "The Semantic Web" . 7 | ?paper semrex:hasReference ?pRef . 8 | ?paper rdfs:label ?paperTitle . 9 | } 10 | -------------------------------------------------------------------------------- /Query/queryLUBM2: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x WHERE { 4 | ?x ub:worksFor . 5 | ?x rdf:type ub:FullProfessor . 6 | ?x ub:name ?y1 . 7 | ?x ub:emailAddress ?y2 . 8 | ?x ub:telephone ?y3 . 9 | } 10 | -------------------------------------------------------------------------------- /Query/queryLUBM4: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y WHERE { 4 | ?x rdf:type ub:GraduateStudent . 5 | ?y rdf:type ub:GraduateCourse . 6 | ub:teacherOf ?y . 7 | ?x ub:takesCourse ?y . 8 | } 9 | -------------------------------------------------------------------------------- /Query/dblpQuery1: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX SemreX: 3 | PREFIX rdfs: 4 | SELECT ?author ?authorname WHERE 5 | { 6 | ?paper rdfs:label "The Semantic Web" . 7 | ?paper SemreX:hasAuthor ?author . 8 | ?author rdfs:label ?authorname . 9 | } 10 | -------------------------------------------------------------------------------- /Query/queryLUBM6: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y ?z WHERE { 4 | ?y ub:teacherOf ?z . 5 | ?y rdf:type ub:FullProfessor . 6 | ?z rdf:type ub:Course . 7 | ?x ub:takesCourse ?z . 8 | ?x rdf:type ub:UndergraduateStudent . 9 | ?x ub:advisor ?y . 10 | } 11 | -------------------------------------------------------------------------------- /Query/queryUniprot2: -------------------------------------------------------------------------------- 1 | PREFIX uni: 2 | PREFIX uniprot: 3 | PREFIX schema: 4 | PREFIX file: 5 | select ?protein ?name where 6 | { 7 | ?protein a uni:Protein . 8 | ?protein uni:encodedBy [uni:name "CRB"] . 9 | ?protein uni:name ?name . 10 | } 11 | -------------------------------------------------------------------------------- /Query/queryLUBM5: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y ?z WHERE { 4 | ?z ub:subOrganizationOf ?y . 5 | ?y rdf:type ub:University . 6 | ?z rdf:type ub:Department . 7 | ?x ub:memberOf ?z . 8 | ?x rdf:type ub:GraduateStudent . 9 | ?x ub:undergraduateDegreeFrom ?y . 10 | } 11 | -------------------------------------------------------------------------------- /Query/queryUniprot6: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX uni: 3 | select ?protein ?annotation where 4 | { 5 | ?protein uni:annotation ?annotation . 6 | ?protein rdf:type uni:Protein . 7 | ?annotation rdf:type . 8 | ?annotation uni:range ?range . 9 | } 10 | -------------------------------------------------------------------------------- /Query/queryLUBM3: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX ub: 3 | SELECT ?x ?y ?z WHERE { 4 | ?x rdf:type ub:UndergraduateStudent . 5 | ?y rdf:type ub:University . 6 | ?z rdf:type ub:Department . 7 | ?x ub:memberOf ?z 8 | ?z ub:subOrganizationOf ?y . 9 | ?x ub:undergraduateDegreeFrom ?y . 10 | } 11 | -------------------------------------------------------------------------------- /Query/queryUniprot8: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX uni: 3 | PREFIX uni2: 4 | PREFIX schema: 5 | SELECT ?a ?b ?ab WHERE 6 | { 7 | ?b uni:modified "2008-07-22" . 8 | ?b rdf:type uni:Protein . 9 | ?a uni:replaces ?ab . 10 | ?ab uni:replacedBy ?b . 11 | } 12 | -------------------------------------------------------------------------------- /Query/queryBTC5: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select ?l ?long ?lat where { 7 | ?a [] "Barack Obama" . 8 | ?a dbpedia:placeOfBirth ?l . 9 | ?l pos:lat ?lat . 10 | ?l pos:long ?long . 11 | } 12 | 13 | -------------------------------------------------------------------------------- /Query/queryBTC2: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select ?b ?p ?bn where { 7 | ?a [] "Tim Berners-Lee" . 8 | ?a dbpedia:dateOfBirth ?b . 9 | ?a dbpedia:placeOfBirth ?p . 10 | ?a dbpedia:name ?bn . 11 | } 12 | 13 | -------------------------------------------------------------------------------- /Query/queryBTC1: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select ?lat ?long where { 7 | ?a [] "Bro-C'hall" . 8 | ?a . 9 | ?a pos:lat ?lat . 10 | ?a pos:long ?long . 11 | } 12 | 13 | -------------------------------------------------------------------------------- /Query/queryUniprot5: -------------------------------------------------------------------------------- 1 | PREFIX uni: 2 | PREFIX uniprot: 3 | PREFIX schema: 4 | PREFIX file: 5 | select ?a ?vo where 6 | { 7 | ?a uni:annotation ?vo. 8 | ?a schema:seeAlso . 9 | ?a schema:seeAlso . 10 | ?a uni:citation . 11 | } 12 | 13 | -------------------------------------------------------------------------------- /Query/queryUniprot7: -------------------------------------------------------------------------------- 1 | prefix rdfs: 2 | prefix rdf: 3 | prefix uni: 4 | prefix taxon: 5 | select ?protein ?annotation where 6 | { 7 | ?protein uni:annotation ?annotation . 8 | ?protein rdf:type uni:Protein . 9 | ?protein uni:organism taxon:9606 . 10 | ?annotation rdf:type . 11 | ?annotation rdfs:comment ?text . 12 | } 13 | -------------------------------------------------------------------------------- /Query/queryBTC6: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select distinct ?d where { 7 | ?a dbpedia:senators ?c . 8 | ?a dbpedia:name ?d . 9 | ?c dbpedia:profession dbpediares:Politician . 10 | ?a owl:sameAs ?b . 11 | ?b . 12 | } 13 | 14 | -------------------------------------------------------------------------------- /Query/queryUniprot4: -------------------------------------------------------------------------------- 1 | PREFIX uni: 2 | PREFIX uniprot: 3 | PREFIX schema: 4 | PREFIX file: 5 | select ?a ?vo where 6 | { 7 | ?a uni:encodedBy ?vo. 8 | ?a schema:seeAlso . 9 | ?a schema:seeAlso . 10 | ?a schema:seeAlso . 11 | ?a schema:seeAlso . 12 | } 13 | -------------------------------------------------------------------------------- /Query/queryUniprot1: -------------------------------------------------------------------------------- 1 | PREFIX rdf: 2 | PREFIX uni: 3 | PREFIX uni2: 4 | SELECT ?p2 ?interaction ?p1 WHERE 5 | { 6 | ?p1 uni:enzyme . 7 | ?p1 rdf:type uni:Protein . 8 | ?interaction uni:participant ?p1 . 9 | ?interaction rdf:type uni:Interaction . 10 | ?interaction uni:participant ?p2 . 11 | ?p2 rdf:type uni:Protein . 12 | ?p2 uni:enzyme . 13 | } 14 | -------------------------------------------------------------------------------- /Query/queryBTC3: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select ?t ?lat ?long where { 7 | ?a dbpedia:region . 8 | ?a dbpedia:title ?t . 9 | ?a pos:lat ?lat . 10 | ?a pos:long ?long . 11 | ?a . 12 | } 13 | 14 | -------------------------------------------------------------------------------- /Query/queryBTC8: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select distinct ?a ?y where { 7 | ?a a . 8 | ?a dbpedia:years ?y. 9 | ?a ?n. 10 | ?b [] ?n. 11 | ?b . 12 | } 13 | -------------------------------------------------------------------------------- /Query/queryBTC4: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select ?l ?long ?lat where { 7 | ?p dbpedia:name "Krebs, Emil" . 8 | ?p dbpedia:deathPlace ?l . 9 | ?c [] ?l . 10 | ?c . 11 | ?c . 12 | ?c pos:long ?long . 13 | ?c pos:lat ?lat . 14 | } 15 | -------------------------------------------------------------------------------- /Query/queryBTC7: -------------------------------------------------------------------------------- 1 | PREFIX geo: 2 | PREFIX pos: 3 | PREFIX dbpedia: 4 | PREFIX dbpediares: 5 | PREFIX owl: 6 | select distinct ?a ?b ?lat ?long where { 7 | ?a dbpedia:spouse ?b . 8 | ?a . 9 | ?b . 10 | ?a dbpedia:placeOfBirth ?c . 11 | ?b dbpedia:placeOfBirth ?c . 12 | ?c owl:sameAs ?c2 . 13 | ?c2 pos:lat ?lat . 14 | ?c2 pos:long ?long . 15 | } 16 | 17 | -------------------------------------------------------------------------------- /TripleBit/IRepository.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "IRepository.h" 14 | 15 | -------------------------------------------------------------------------------- /Query/queryUniprot3: -------------------------------------------------------------------------------- 1 | PREFIX uni: 2 | PREFIX uniprot: 3 | PREFIX schema: 4 | PREFIX file: 5 | select ?a ?vo where 6 | { 7 | ?a schema:seeAlso ?vo . 8 | ?a uni:classifiedWith . 9 | ?a schema:seeAlso . 10 | ?b schema:seeAlso . 11 | ?b schema:seeAlso . 12 | ?b schema:seeAlso . 13 | ?a uni:replaces ?ab . 14 | ?ab uni:replacedBy ?b . 15 | } 16 | -------------------------------------------------------------------------------- /TripleBit/MessageEngine.h: -------------------------------------------------------------------------------- 1 | #ifndef MESSAGEENGINE_H_ 2 | #define MESSAGEENGINE_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class MessageEngine { 17 | public: 18 | enum MessageType { INFO = 1, WARNING, ERROR , DEFAULT}; 19 | MessageEngine(); 20 | virtual ~MessageEngine(); 21 | static void showMessage(char* msg, MessageType type = DEFAULT); 22 | }; 23 | 24 | #endif /* MESSAGEENGINE_H_ */ 25 | -------------------------------------------------------------------------------- /TripleBit/Sorter.h: -------------------------------------------------------------------------------- 1 | #ifndef SORTER_H 2 | #define SORTER_H 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class TempFile; 17 | 18 | /// Sort a temporary file 19 | class Sorter { 20 | public: 21 | /// Sort a file 22 | static void sort(TempFile& in,TempFile& out,const char* (*skip)(const char*),int (*compare)(const char*,const char*),bool eliminateDuplicates=false); 23 | }; 24 | #endif /*SOTER_H*/ 25 | -------------------------------------------------------------------------------- /TripleBit/OSFile.h: -------------------------------------------------------------------------------- 1 | #ifndef OSFILE_H_ 2 | #define OSFILE_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | 18 | class OSFile { 19 | public: 20 | OSFile(); 21 | virtual ~OSFile(); 22 | static bool fileExists(const string filename); 23 | static bool directoryExists(const string dir); 24 | static bool mkdir(const string dir); 25 | static size_t fileSize(const string filename); 26 | }; 27 | 28 | #endif /* OSFILE_H_ */ 29 | -------------------------------------------------------------------------------- /TripleBit/RDFParser.h: -------------------------------------------------------------------------------- 1 | #ifndef RDFPARSER_H_ 2 | #define RDFPARSER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | 23 | class RDFParser { 24 | public: 25 | RDFParser(); 26 | static void parserRDFFile(string fileName, raptor_statement_handler hanler, void * user_data); 27 | //static void parseRdf(void* user_data, const raptor_statement* triple); 28 | virtual ~RDFParser(); 29 | private: 30 | //std::string fileName; 31 | 32 | }; 33 | 34 | #endif /* RDFPARSER_H_ */ 35 | -------------------------------------------------------------------------------- /BuildTripleBitFromN3/BuildTripleBit.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "TripleBitBuilder.h" 14 | #include "OSFile.h" 15 | #include 16 | 17 | char* DATABASE_PATH; 18 | int main(int argc, char* argv[]) 19 | { 20 | if(argc != 3) { 21 | fprintf(stderr, "Usage: %s \n", argv[0]); 22 | return -1; 23 | } 24 | 25 | if(OSFile::directoryExists(argv[2]) == false) { 26 | OSFile::mkdir(argv[2]); 27 | } 28 | 29 | DATABASE_PATH = argv[2]; 30 | TripleBitBuilder* builder = new TripleBitBuilder(argv[2]); 31 | builder->startBuildN3(argv[1]); 32 | builder->endBuild(); 33 | delete builder; 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /TripleBitQuery/TripleBitQuery.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "TripleBit.h" 14 | #include "TripleBitRepository.h" 15 | #include "OSFile.h" 16 | #include "MMapBuffer.h" 17 | 18 | char* DATABASE_PATH; 19 | char* QUERY_PATH; 20 | int main(int argc, char* argv[]) 21 | { 22 | if(argc != 3) { 23 | fprintf(stderr, "Usage: %s \n", argv[0]); 24 | return -1; 25 | } 26 | 27 | DATABASE_PATH = argv[1]; 28 | 29 | TripleBitRepository* repo = TripleBitRepository::create(DATABASE_PATH); 30 | if(repo == NULL) { 31 | return -1; 32 | } 33 | 34 | QUERY_PATH = argv[2]; 35 | 36 | repo->cmd_line(stdin, stderr); 37 | delete repo; 38 | 39 | return 0; 40 | } 41 | -------------------------------------------------------------------------------- /TripleBit/QuerySemanticAnalysis.h: -------------------------------------------------------------------------------- 1 | #ifndef QUERYSEMANTICANALYSIS_H_ 2 | #define QUERYSEMANTICANALYSIS_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "IRepository.h" 17 | 18 | class IRepository; 19 | class SPARQLParser; 20 | class TripleBitQueryGraph; 21 | 22 | ///Semantic Analysis for SPARQL query. Transform the parse result into a query Graph. 23 | class QuerySemanticAnalysis { 24 | 25 | private: 26 | ///Repository use for String and URI lookup. 27 | IRepository& repo; 28 | public: 29 | QuerySemanticAnalysis(IRepository &repo); 30 | virtual ~QuerySemanticAnalysis(); 31 | 32 | /// Perform the transformation 33 | bool transform(const SPARQLParser& input, TripleBitQueryGraph& output); 34 | }; 35 | 36 | #endif /* QUERYSEMANTICANALYSIS_H_ */ 37 | -------------------------------------------------------------------------------- /TripleBit/BitmapWAH.h: -------------------------------------------------------------------------------- 1 | #ifndef _BITMAP_H_ 2 | #define _BITMAP_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include "BitVectorWAH.h" 18 | #include 19 | 20 | using namespace std; 21 | 22 | class BitmapWAH 23 | { 24 | 25 | public: 26 | BitmapWAH(); 27 | void insert(ID id, unsigned int pos); 28 | void print(); 29 | size_t get_size(); 30 | void completeInsert(); 31 | //BitVector* getBitVector(ID id); 32 | virtual ~BitmapWAH(); 33 | private: 34 | bool isIdInBitmap(ID id); 35 | void expandBitmap(); 36 | //BitVector* getBitVector(ID id); 37 | private: 38 | typedef BitVectorWAH* BitMapType; 39 | map bitMap; 40 | size_t bitMapSize; 41 | unsigned int capacity; 42 | }; 43 | 44 | #endif // !defined _BITMAP_H_ 45 | -------------------------------------------------------------------------------- /TripleBit/MessageEngine.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "MessageEngine.h" 14 | #include "TripleBit.h" 15 | 16 | MessageEngine::MessageEngine() { 17 | // TODO Auto-generated constructor stub 18 | 19 | } 20 | 21 | MessageEngine::~MessageEngine() { 22 | // TODO Auto-generated destructor stub 23 | } 24 | 25 | void MessageEngine::showMessage(char* msg, MessageType type /* = DEFAULT*/) 26 | { 27 | switch(type) 28 | { 29 | case INFO: 30 | fprintf(stderr, "\033[0;32mINFO: %s\033[0m.\n",msg); 31 | break; 32 | case WARNING: 33 | fprintf(stderr, "\033[1;33mWARNING: %s\033[0m.\n",msg); 34 | break; 35 | case ERROR: 36 | fprintf(stderr, "\033[0;31mERROR: %s\033[0m.\n",msg); 37 | break; 38 | default: 39 | printf("%s\n", msg); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /TripleBit/TimeStamp.h: -------------------------------------------------------------------------------- 1 | #ifndef TIMESTAMP_H_ 2 | #define TIMESTAMP_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include 17 | #include 18 | 19 | class TimeStamp { 20 | struct timeval start, end; 21 | double time; 22 | public: 23 | TimeStamp(){ 24 | time = 0; 25 | } 26 | 27 | void startTimer() { 28 | gettimeofday(&start,NULL); 29 | } 30 | 31 | void endTimer() { 32 | gettimeofday(&end, NULL); 33 | time = time + ((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec) / 1000.0; 34 | } 35 | 36 | void printTime(const char* timername) { 37 | fprintf(stderr, "%s time used %f ms.\n", timername,time); 38 | } 39 | 40 | void resetTimer() { 41 | time = 0; 42 | } 43 | 44 | virtual ~TimeStamp(){}; 45 | }; 46 | 47 | #endif /* TIMESTAMP_H_ */ 48 | -------------------------------------------------------------------------------- /TripleBit/RDFQuery.h: -------------------------------------------------------------------------------- 1 | #ifndef RDFQUERY_H_ 2 | #define RDFQUERY_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class SPARQLLexer; 17 | class SPARQLParser; 18 | class QuerySemanticAnalysis; 19 | class PlanGenerator; 20 | class TripleBitQuery; 21 | class TripleBitQueryGraph; 22 | class TripleBitRepository; 23 | class TripleBitBuilder; 24 | 25 | #include "TripleBit.h" 26 | #include 27 | using namespace std; 28 | 29 | class RDFQuery { 30 | private: 31 | QuerySemanticAnalysis* semAnalysis; 32 | PlanGenerator* planGen; 33 | TripleBitQuery* bitmapQuery; 34 | TripleBitQueryGraph* queryGraph; 35 | TripleBitRepository* repo; 36 | public: 37 | RDFQuery(TripleBitQuery* _bitmapQuery, TripleBitRepository* _repo); 38 | Status Execute(string& queryString, vector& resultSet); 39 | void Print(); 40 | virtual ~RDFQuery(); 41 | }; 42 | 43 | #endif /* RDFQUERY_H_ */ 44 | -------------------------------------------------------------------------------- /TripleBit/MMapBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef MMAPBUFFER_H_ 2 | #define MMAPBUFFER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | 18 | //#define VOLATILE 19 | class MMapBuffer { 20 | int fd; 21 | char volatile* mmap_addr; 22 | char* curretHead; 23 | string filename; 24 | size_t size; 25 | public: 26 | char* resize(size_t incrementSize); 27 | char* getBuffer(); 28 | char* getBuffer(int pos); 29 | void discard(); 30 | Status flush(); 31 | size_t getSize() { return size;} 32 | size_t get_length() { return size;} 33 | char * get_address() const { return (char*)mmap_addr; } 34 | 35 | virtual Status resize(size_t new_size,bool clear); 36 | virtual void memset(char value); 37 | 38 | MMapBuffer(const char* filename, size_t initSize); 39 | virtual ~MMapBuffer(); 40 | 41 | public: 42 | static MMapBuffer* create(const char* filename, size_t initSize); 43 | }; 44 | 45 | #endif /* MMAPBUFFER_H_ */ 46 | -------------------------------------------------------------------------------- /TripleBit/util/BufferManager.h: -------------------------------------------------------------------------------- 1 | #ifndef BUFFERMANAGER_H_ 2 | #define BUFFERMANAGER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #define INIT_BUFFERS 5 17 | #define INCREASE_BUFFERS 5 18 | 19 | class EntityIDBuffer; 20 | 21 | #include "../TripleBit.h" 22 | 23 | class BufferManager { 24 | private: 25 | vector bufferPool; 26 | vector usedBuffer; 27 | vector cleanBuffer; 28 | 29 | int bufferCnt; 30 | protected: 31 | static BufferManager* instance; 32 | BufferManager(); 33 | bool expandBuffer(); 34 | public: 35 | virtual ~BufferManager(); 36 | EntityIDBuffer* getNewBuffer(); 37 | Status freeBuffer(EntityIDBuffer* buffer); 38 | Status reserveBuffer(); 39 | void destroyBuffers(); 40 | public: 41 | static BufferManager* getInstance(); 42 | }; 43 | 44 | 45 | #endif /* BUFFERMANAGER_H_ */ 46 | -------------------------------------------------------------------------------- /TripleBit/RDFParser.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "RDFParser.h" 14 | 15 | RDFParser::RDFParser() { 16 | // TODO Auto-generated constructor stub 17 | } 18 | 19 | RDFParser::~RDFParser() { 20 | // TODO Auto-generated destructor stub 21 | 22 | } 23 | 24 | 25 | void RDFParser::parserRDFFile(string fileName, raptor_statement_handler handler, void* user_data) 26 | { 27 | raptor_parser *rdf_parser; 28 | raptor_uri *uri, *base_uri; 29 | unsigned char* uri_string; 30 | 31 | raptor_init(); 32 | rdf_parser = raptor_new_parser("guess"); 33 | raptor_set_statement_handler(rdf_parser, user_data, handler); 34 | 35 | uri_string = raptor_uri_filename_to_uri_string(fileName.c_str()); 36 | uri = raptor_new_uri(uri_string); 37 | base_uri = raptor_uri_copy(uri); 38 | raptor_parse_file(rdf_parser, uri, base_uri); 39 | 40 | raptor_free_parser(rdf_parser); 41 | raptor_free_uri(base_uri); 42 | raptor_free_uri(uri); 43 | raptor_free_memory(uri_string); 44 | 45 | raptor_finish(); 46 | } 47 | 48 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | TripleBit 2 | (c) 2011 Massive Data Management Group @ SCTS & CGCL. 3 | Web site: http://grid.hust.edu.cn/triplebit 4 | 5 | This work is licensed under the Creative Commons 6 | Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 7 | of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 8 | or send a letter to Creative Commons, 171 Second Street, Suite 300, 9 | San Francisco, California, 94105, USA. 10 | 11 | 12 | Dependency: 13 | ----------- 14 | Please install boost and raptor, you can use the following versions or other versions. 15 | 16 | boost-1.39.0.tar.gz 17 | raptor-1.4.21.tar.gz 18 | 19 | Building: 20 | --------- 21 | 22 | TripleBit must be build using GNU make and a reasonable C++ compiler. Ideally a simple 23 | 24 | make 25 | 26 | is enough, it will build the tree high-level executables in bin/lrelease/. 27 | 28 | Using: 29 | ------ 30 | 31 | TripleBit currently includes three high-level executables. The first (buildTripleBitFromN3) 32 | is used to build a new database from an turtle/ntriples input: 33 | 34 | buildTripleBitFromN3 mydata.n3 database_directory 35 | 36 | The input file can be arbitrarily large, the buildTripleBitFromN3 spools to disk if 37 | main memory is exhausted. 38 | 39 | The second (buildTripleBitFromRDF) is similar to the first executable except the 40 | input file is a rdf file. 41 | 42 | After loading the database can be queried with triplebitQuery: 43 | 44 | triplebitQuery database_directory query_directory 45 | 46 | The program shows a command prompt and accept SPARQL queries. 47 | 48 | Note: TripleBit currently only supports "select" queries. 49 | -------------------------------------------------------------------------------- /TripleBit/URITable.h: -------------------------------------------------------------------------------- 1 | #ifndef URITABLE_H_ 2 | #define URITABLE_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include "StringIDSegment.h" 18 | 19 | using namespace std; 20 | class URITable { 21 | ID prefixStartID; 22 | StringIDSegment* prefix_segment; 23 | StringIDSegment* suffix_segment; 24 | LengthString prefix, suffix; 25 | LengthString searchLen; 26 | 27 | string SINGLE; 28 | string searchStr; 29 | 30 | private: 31 | Status getPrefix(const char* URI); 32 | public: 33 | URITable(); 34 | URITable(const string dir); 35 | virtual ~URITable(); 36 | Status insertTable(const char* URI,ID& id); 37 | Status getIdByURI(const char* URI,ID& id); 38 | Status getURIById(string& URI,ID id); 39 | 40 | size_t getSize() { 41 | cout<<"max id: "<getMaxID()<getSize() + suffix_segment->getSize(); 43 | } 44 | 45 | ID getUriCount(){ 46 | return suffix_segment->idStroffPool->size(); 47 | } 48 | 49 | void dump(); 50 | public: 51 | static ID startID; 52 | static URITable* load(const string dir); 53 | static ID getMaxID(); 54 | }; 55 | 56 | #endif /* URITABLE_H_ */ 57 | -------------------------------------------------------------------------------- /TripleBit/PredicateTable.h: -------------------------------------------------------------------------------- 1 | #ifndef PREDICATETABLE_H_ 2 | #define PREDICATETABLE_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "StringIDSegment.h" 17 | #include "TripleBit.h" 18 | 19 | class PredicateTable { 20 | StringIDSegment* prefix_segment; 21 | StringIDSegment* suffix_segment; 22 | LengthString prefix, suffix; 23 | LengthString searchLen; 24 | 25 | string SINGLE; 26 | string searchStr; 27 | 28 | private: 29 | Status getPrefix(const char* URI); 30 | public: 31 | PredicateTable() : SINGLE("single") { } 32 | PredicateTable(const string dir); 33 | virtual ~PredicateTable(); 34 | Status insertTable(const char* str, ID& id); 35 | string getPredicateByID(ID id); 36 | Status getPredicateByID(string& URI, ID id); 37 | Status getIDByPredicate(const char* str, ID& id); 38 | 39 | size_t getSize() { 40 | return prefix_segment->getSize() + suffix_segment->getSize(); 41 | } 42 | 43 | ID getPredicateCount(){ 44 | return suffix_segment->idStroffPool->size(); 45 | } 46 | 47 | size_t getPredicateNo(); 48 | void dump(); 49 | public: 50 | static PredicateTable* load(const string dir); 51 | }; 52 | 53 | #endif /* PREDICATETABLE_H_ */ 54 | -------------------------------------------------------------------------------- /TripleBit/OSFile.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "OSFile.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | OSFile::OSFile() { 24 | // TODO Auto-generated constructor stub 25 | 26 | } 27 | 28 | OSFile::~OSFile() { 29 | // TODO Auto-generated destructor stub 30 | } 31 | 32 | 33 | bool OSFile::fileExists(const string filename) 34 | { 35 | struct stat sbuff; 36 | if( stat(filename.c_str(),&sbuff) == 0 ){ 37 | if( S_ISREG(sbuff.st_mode) ) 38 | return true; 39 | } 40 | return false; 41 | } 42 | 43 | 44 | bool OSFile::directoryExists(const string path) 45 | { 46 | struct stat sbuff; 47 | if( stat(path.c_str(),&sbuff) == 0 ){ 48 | cout<<"asdfasf"< 23 | 24 | #include 25 | 26 | using namespace std; 27 | 28 | class SynchronousBuffer { 29 | private: 30 | MemoryBuffer* buffer; 31 | 32 | //for synchronous access the buffer 33 | pthread_mutex_t bufferLock; 34 | pthread_cond_t bufferNotEmpty; 35 | pthread_cond_t bufferFull; 36 | pthread_cond_t bufferNotFull; 37 | 38 | char* base; 39 | unsigned int readPos; 40 | unsigned int writePos; 41 | 42 | unsigned int pageSize; 43 | unsigned int usedSize; 44 | unsigned int remainderSize; 45 | 46 | bool finish; //used to identify whether writing is finished; 47 | public: 48 | SynchronousBuffer(); 49 | Status MemoryCopy(void* src, size_t length); 50 | Status MemoryGet(void* dest, size_t length); 51 | void SetFinish() { finish = true; } 52 | virtual ~SynchronousBuffer(); 53 | 54 | private: 55 | bool IsBufferFull(size_t length) { return (writePos + length) % pageSize == readPos; } 56 | bool IsBufferEmpty() { return (readPos == writePos); } 57 | }; 58 | 59 | #endif /* SYNCHRONOUSBUFFER_H_ */ 60 | -------------------------------------------------------------------------------- /TripleBit/PlanGenerator.h: -------------------------------------------------------------------------------- 1 | #ifndef PLANGENERATOR_H_ 2 | #define PLANGENERATOR_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class IRepository; 17 | class TripleBitQueryGraph; 18 | 19 | #include "IRepository.h" 20 | #include "TripleBitQueryGraph.h" 21 | #include "TripleBit.h" 22 | 23 | class PlanGenerator { 24 | private: 25 | IRepository& repo; 26 | TripleBitQueryGraph::SubQuery* query; 27 | TripleBitQueryGraph* graph; 28 | static PlanGenerator* self; 29 | public: 30 | PlanGenerator(IRepository& _repo); 31 | Status generatePlan(TripleBitQueryGraph& _graph); 32 | virtual ~PlanGenerator(); 33 | static PlanGenerator* getInstance(); 34 | int getSelectivity(TripleBitQueryGraph::TripleNodeID& tripleID); 35 | int getSelectivity(vector::iterator iter); 36 | private: 37 | /// Generate the scan operator for the query pattern. 38 | Status generateScanOperator(TripleBitQueryGraph::TripleNode& node, TripleBitQueryGraph::JoinVariableNodeID varID); 39 | Status generateSelectivity(TripleBitQueryGraph::JoinVariableNode& node, map& selectivityMap); 40 | TripleBitQueryGraph::JoinVariableNode::JoinType getJoinType(TripleBitQueryGraph::JoinVariableNode& node); 41 | Status bfsTraverseVariableNode(); 42 | Status getAdjVariableByID(TripleBitQueryGraph::JoinVariableNodeID id, vector& nodes); 43 | }; 44 | 45 | #endif /* PLANGENERATOR_H_ */ 46 | -------------------------------------------------------------------------------- /TripleBit/util/SortMergeJoin.h: -------------------------------------------------------------------------------- 1 | #ifndef SORTMERGEJOIN_H_ 2 | #define SORTMERGEJOIN_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class EntityIDBuffer; 17 | 18 | #include "../TripleBit.h" 19 | #include "../ThreadPool.h" 20 | 21 | struct SortMergeJoinArg; 22 | 23 | class SortMergeJoin { 24 | //CThreadPool* pool; 25 | ID* temp1; 26 | ID* temp2; 27 | public: 28 | SortMergeJoin(); 29 | ///Execute join operation 30 | void Join(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2, bool secondModify = true); 31 | // do the merge operation; 32 | int Merge(SortMergeJoinArg* arg); 33 | // need to modify buffer2; 34 | void Merge1(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2); 35 | // not need to modify buffer2; 36 | void Merge2(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2); 37 | virtual ~SortMergeJoin(); 38 | }; 39 | 40 | struct SortMergeJoinArg 41 | { 42 | ID* buffer1, *buffer2; 43 | int length1, length2; 44 | char* flag1, *flag2; 45 | int IDCount1, IDCount2; 46 | int joinKey1, joinKey2; 47 | 48 | SortMergeJoinArg(ID* _buffer1, ID* _buffer2, int _length1, int _length2, char* flag1, char* flag2, int IDCount1, int IDCount2, 49 | int joinKey1, int joinKey2) : buffer1(_buffer1), buffer2(_buffer2), length1(_length1), length2(_length2), 50 | flag1(flag1), flag2(flag2), IDCount1(IDCount1), IDCount2(IDCount2),joinKey1(joinKey1),joinKey2(joinKey2){} 51 | }; 52 | 53 | #endif /* SORTMERGEJOIN_H_ */ 54 | -------------------------------------------------------------------------------- /TripleBit/IRepository.h: -------------------------------------------------------------------------------- 1 | #ifndef IREPOSITORY_H_ 2 | #define IREPOSITORY_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | 18 | class IRepository { 19 | 20 | public: 21 | IRepository(){} 22 | virtual ~IRepository(){} 23 | 24 | //virtual Status open() = 0; 25 | ///virtual Status load() = 0; 26 | //virtual Status create() = 0; 27 | //virtual void close() = 0; 28 | 29 | //SO(id,string)transform 30 | virtual bool find_soid_by_string(SOID& soid, const std::string& str) = 0; 31 | virtual bool find_string_by_soid(std::string& str, SOID& soid) = 0; 32 | 33 | //P(id,string)transform 34 | virtual bool find_pid_by_string(PID& pid, const std::string& str) = 0; 35 | virtual bool find_string_by_pid(std::string& str, ID& pid) = 0; 36 | 37 | //create a Repository specific in the path . 38 | //static IRepository * create(const char * path); 39 | 40 | //Get some statistics information 41 | virtual int get_predicate_count(PID pid) = 0; 42 | virtual int get_subject_count(ID subjectID) = 0; 43 | virtual int get_object_count(ID objectID) = 0; 44 | virtual int get_subject_predicate_count(ID subjectID, ID predicateID) = 0; 45 | virtual int get_object_predicate_count(ID objectID, ID predicateID) = 0; 46 | virtual int get_subject_object_count(ID subjectID, ID objectID) = 0; 47 | 48 | //scan the database; 49 | virtual Status getSubjectByObjectPredicate(ID oid, ID pod) = 0; 50 | virtual ID next() = 0; 51 | 52 | //Get the id by string; 53 | virtual bool lookup(const string& str, ID& id) = 0; 54 | }; 55 | 56 | #endif /* IREPOSITORY_H_ */ 57 | -------------------------------------------------------------------------------- /TripleBit/SPARQLLexer.h: -------------------------------------------------------------------------------- 1 | #ifndef SPARQLLexer_H_ 2 | #define SPARQLLexer_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include 17 | //--------------------------------------------------------------------------- 18 | /// A lexer for SPARQL input 19 | class SPARQLLexer 20 | { 21 | public: 22 | /// Possible tokens 23 | enum Token { None, Error, Eof, IRI, String, Variable, Identifier, Colon, Semicolon, Comma, Dot, Star, Underscore, LCurly, RCurly, LParen, RParen, LBracket, RBracket, Anon, Equal, NotEqual }; 24 | 25 | private: 26 | /// The input 27 | std::string input; 28 | /// The current position 29 | std::string::const_iterator pos; 30 | /// The start of the current token 31 | std::string::const_iterator tokenStart; 32 | /// The end of the curent token. Only set if delimiters are stripped 33 | std::string::const_iterator tokenEnd; 34 | /// The token put back with unget 35 | Token putBack; 36 | /// Was the doken end set? 37 | bool hasTokenEnd; 38 | 39 | public: 40 | /// Constructor 41 | SPARQLLexer(const std::string& input); 42 | /// Destructor 43 | ~SPARQLLexer(); 44 | 45 | /// Get the next token 46 | Token getNext(); 47 | /// Get the value of the current token 48 | std::string getTokenValue() const; 49 | /// Check if the current token matches a keyword 50 | bool isKeyword(const char* keyword) const; 51 | /// Put the last token back 52 | void unget(Token value) { putBack=value; } 53 | }; 54 | //--------------------------------------------------------------------------- 55 | #endif 56 | -------------------------------------------------------------------------------- /TripleBit/HashIndex.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHINDEX_H_ 2 | #define HASHINDEX_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class ChunkManager; 17 | class MemoryBuffer; 18 | class MMapBuffer; 19 | 20 | #include "TripleBit.h" 21 | 22 | class HashIndex { 23 | public: 24 | enum IndexType { SUBJECT_INDEX, OBJECT_INDEX}; 25 | private: 26 | /// store the chunks' position and the offset in chunk 27 | MemoryBuffer* hashTable; 28 | ID* hashTableEntries; 29 | //MMapBuffer* secondaryHashTable; 30 | /// the current size of hash index; 31 | unsigned int hashTableSize; 32 | //unsigned int secondaryHashTableSize; 33 | 34 | ChunkManager& chunkManager; 35 | /// index type; 36 | IndexType type; 37 | 38 | unsigned nextHashValue;// lastSecondaryHashTableOffset, secondaryHashTableOffset; 39 | unsigned firstValue; 40 | //ID* secondaryHashTableWriter; 41 | protected: 42 | void insertFirstValue(unsigned value); 43 | public: 44 | HashIndex(ChunkManager& _chunkManager, IndexType type); 45 | virtual ~HashIndex(); 46 | /// build hash index; chunkType: 1 or 2 47 | Status buildIndex(unsigned chunkType); 48 | /// search the chunk and offset in chunk by id; typeID 1 or 2 49 | Status getOffsetByID(ID id, unsigned& offset, unsigned typeID); 50 | void save(MMapBuffer*& buffer); 51 | public: 52 | static HashIndex* load(ChunkManager& manager, IndexType type, char* buffer, unsigned int& offset); 53 | private: 54 | /// insert a record into index; position is the position of chunk in chunks vector. 55 | Status hashInsert(ID id, unsigned int offset); 56 | unsigned hash(ID id); 57 | unsigned next(ID id); 58 | }; 59 | 60 | #endif /* HASHINDEX_H_ */ 61 | -------------------------------------------------------------------------------- /TripleBit/LineHashIndex.h: -------------------------------------------------------------------------------- 1 | #ifndef LINEHASHINDEX_H_ 2 | #define LINEHASHINDEX_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class MemoryBuffer; 17 | class ChunkManager; 18 | class MMapBuffer; 19 | 20 | #include "TripleBit.h" 21 | #include "EntityIDBuffer.h" 22 | 23 | class LineHashIndex { 24 | public: 25 | struct Point{ 26 | ID x; 27 | ID y; 28 | }; 29 | 30 | enum IndexType { SUBJECT_INDEX, OBJECT_INDEX}; 31 | private: 32 | MemoryBuffer* idTable; 33 | MemoryBuffer* offsetTable; 34 | ID* idTableEntries; 35 | ID* offsetTableEntries; 36 | ChunkManager& chunkManager; 37 | IndexType indexType; 38 | unsigned int tableSize; 39 | unsigned lineNo; 40 | 41 | //line parameters; 42 | double upperk[4]; 43 | double upperb[4]; 44 | double lowerk[4]; 45 | double lowerb[4]; 46 | 47 | ID startID[4]; 48 | private: 49 | void insertEntries(ID id, unsigned offset); 50 | int searchChunk(ID id); 51 | bool buildLine(int startEntry, int endEntry, int lineNo); 52 | public: 53 | LineHashIndex(ChunkManager& _chunkManager, IndexType type); 54 | Status buildIndex(unsigned chunkType); 55 | Status getOffsetByID(ID id, unsigned& offset, unsigned typeID); 56 | Status getFirstOffsetByID(ID id, unsigned& offset, unsigned typeID); 57 | Status getYByID(ID id,EntityIDBuffer* entBuffer,unsigned typeID); 58 | void save(MMapBuffer*& indexBuffer); 59 | virtual ~LineHashIndex(); 60 | private: 61 | bool isBufferFull(); 62 | public: 63 | static LineHashIndex* load(ChunkManager& manager, IndexType type, char* buffer, size_t& offset); 64 | static void unload( char* buffer, size_t& offset); 65 | }; 66 | 67 | #endif /* LINEHASHINDEX_H_ */ 68 | -------------------------------------------------------------------------------- /TripleBit/util/HashJoin.h: -------------------------------------------------------------------------------- 1 | #ifndef HASHJOIN_H_ 2 | #define HASHJOIN_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "../TripleBit.h" 17 | #include "../ThreadPool.h" 18 | 19 | class EntityIDBuffer; 20 | class HashJoinTask; 21 | class BuildHashIndexTask; 22 | 23 | struct HashJoinArg; 24 | 25 | class HashJoin { 26 | private: 27 | //CThreadPool* pool; 28 | public: 29 | friend class HashJoinTask; 30 | HashJoin(); 31 | virtual ~HashJoin(); 32 | 33 | static ID HashFunction(ID id, ID hashKey); 34 | static ID GetHashKey2(ID size); 35 | void Join(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2); 36 | static ID GetHashKey(ID size); 37 | static void BuildHashIndex(ID* p ,int joinKey, ID hashKey, vector& hashTrack, vector& prefixSum, int size, int IDCount); 38 | static void HashJoinInit(EntityIDBuffer* buffer,ID& hashKey, vector* >& hashTrack, int joinKey); 39 | static void SortMergeJoin(ID* buffer1, ID * buffer2, int IDCount1, int IDCount2, int joinKey1, int joinKey2, 40 | int size1, int size2, char* flagVector1, char* flagVector2); 41 | static void SortMergeJoin(ID* buffer1, ID * buffer2, int IDCount1, int IDCount2, int joinKey1, int joinKey2, 42 | int size1, int size2, char* flagVector1); 43 | static void run(HashJoinArg* arg); 44 | }; 45 | 46 | struct HashJoinArg 47 | { 48 | char* flag1; 49 | char* flag2; 50 | EntityIDBuffer* buffer1; 51 | EntityIDBuffer* buffer2; 52 | 53 | //the start pos in the buffer; 54 | int startPos1; 55 | int length1; 56 | 57 | int startPos2; 58 | int length2; 59 | 60 | int joinKey1; 61 | int joinKey2; 62 | }; 63 | 64 | #endif /* HASHJOIN_H_ */ 65 | -------------------------------------------------------------------------------- /TripleBit/ThreadPool.h: -------------------------------------------------------------------------------- 1 | #ifndef THREADPOOL_H_ 2 | #define THREADPOOL_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | 23 | class CThreadPool; 24 | 25 | class CTask { 26 | protected: 27 | string m_strTaskName; //the name of the task 28 | void* m_ptrData; //the data of the task to be executed 29 | public: 30 | CTask() {} 31 | virtual ~CTask() {} 32 | CTask(string taskName) { 33 | this->m_strTaskName = taskName; 34 | m_ptrData = NULL; 35 | } 36 | virtual int Run()= 0; 37 | void SetData(void* data); //set task data 38 | }; 39 | 40 | class CThreadPool { 41 | public: 42 | typedef boost::function Task; 43 | private: 44 | vector m_vecTaskList; //task list 45 | int m_iThreadNum; //the No of threads 46 | vector m_vecIdleThread; //idle thread list 47 | vector m_vecBusyThread; //busy thread list 48 | 49 | static CThreadPool* instance; //the thread pool instance; 50 | protected: 51 | friend class CTask; 52 | static void* ThreadFunc(void * threadData); //new thread function 53 | int MoveToIdle(pthread_t tid); //move the idle when the task complete 54 | int MoveToBusy(pthread_t tid); //move the tid to busy list 55 | int Create(); //create task 56 | public: 57 | static CThreadPool& getInstance(); 58 | pthread_mutex_t m_pthreadMutex; //used to syn 59 | pthread_mutex_t m_pthreadIdleMutex; 60 | pthread_mutex_t m_pthreadBusyMutex; 61 | pthread_cond_t m_pthreadCond; //used to syn 62 | pthread_cond_t m_pthreadEmpty; 63 | pthread_cond_t m_pthreadBusyEmpty; 64 | bool shutdown; 65 | CThreadPool(int threadNum); 66 | ~CThreadPool(); 67 | int AddTask(const Task& task); // Add the task to List 68 | int StopAll(); 69 | int Wait(); //waiting for task complete! 70 | }; 71 | 72 | struct ThreadPoolArg 73 | { 74 | CThreadPool* pool; 75 | vector* taskList; 76 | }; 77 | #endif /* THREADPOOL_H_ */ 78 | -------------------------------------------------------------------------------- /BuildTripleBitFromRDF/BuildTripleBit.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "TripleBitBuilder.h" 14 | #include "OSFile.h" 15 | #include 16 | 17 | static TempFile rawFacts("./test"); 18 | 19 | void rdfParser(void* user_data, const raptor_statement* triple) 20 | { 21 | TripleBitBuilder *builder = (TripleBitBuilder*)user_data; 22 | 23 | char* predicate = (char*)triple->predicate; 24 | char* subject = (char*)triple->subject; 25 | char* object = (char*)triple->object; 26 | 27 | 28 | if(strlen(predicate) && strlen(subject) && strlen(object)) 29 | builder-> NTriplesParse(subject, predicate, object, rawFacts); 30 | } 31 | 32 | void parserRDFFile(string fileName, raptor_statement_handler handler, void* user_data) 33 | { 34 | raptor_parser *rdf_parser; 35 | raptor_uri *uri, *base_uri; 36 | unsigned char* uri_string; 37 | 38 | raptor_init(); 39 | rdf_parser = raptor_new_parser("rdfxml"); 40 | raptor_set_statement_handler(rdf_parser, user_data, handler); 41 | 42 | uri_string = raptor_uri_filename_to_uri_string(fileName.c_str()); 43 | uri = raptor_new_uri(uri_string); 44 | base_uri = raptor_uri_copy(uri); 45 | raptor_parse_file(rdf_parser, uri, base_uri); 46 | 47 | raptor_free_parser(rdf_parser); 48 | raptor_free_uri(base_uri); 49 | raptor_free_uri(uri); 50 | raptor_free_memory(uri_string); 51 | 52 | raptor_finish(); 53 | } 54 | 55 | char* DATABASE_PATH; 56 | int main(int argc, char* argv[]) 57 | { 58 | if(argc != 3) { 59 | fprintf(stderr, "Usage: %s \n", argv[0]); 60 | return -1; 61 | } 62 | 63 | if(OSFile::directoryExists(argv[2]) == false) { 64 | OSFile::mkdir(argv[2]); 65 | } 66 | 67 | DATABASE_PATH = argv[2]; 68 | TripleBitBuilder* builder = new TripleBitBuilder(argv[2]); 69 | parserRDFFile(argv[1], rdfParser, builder); 70 | 71 | TempFile facts(argv[1]); 72 | builder->resolveTriples(rawFacts, facts); 73 | facts.discard(); 74 | 75 | builder->endBuild(); 76 | delete builder; 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /TripleBit/Hash.h: -------------------------------------------------------------------------------- 1 | #ifndef HASH_H_ 2 | #define HASH_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | 18 | inline ulonglong expand(ulonglong oldsize, ulonglong added, ID id) 19 | { 20 | ulonglong newsize = (oldsize+added)*2; 21 | if(oldsize >= ulonglong(1<<30)) { 22 | newsize = oldsize + added * 2 + ulonglong(1<<30); 23 | } 24 | #ifdef DEBUG 25 | printf("DEBUG: added %llu resize %llu to %llu by [%d]\n",added,oldsize,newsize,id); 26 | #endif 27 | return newsize; 28 | } 29 | 30 | inline HashCodeType get_hash_code(const char * str){ 31 | HashCodeType ret = 0; 32 | while(*str){ 33 | ret = ret*31 + *str; // 31 for LUBM; 131 for Uniprot 34 | str++; 35 | } 36 | return ret; 37 | } 38 | 39 | inline HashCodeType get_hash_code(const char * str,size_t length) { 40 | HashCodeType ret = 0; 41 | while(length--){ 42 | ret = ret*31 + *str; // 31 for LUBM; 131 for Uniprot 43 | str++; 44 | } 45 | return ret; 46 | } 47 | 48 | inline HashCodeType get_hash_code(LengthString * str){ 49 | HashCodeType ret = 0; 50 | uint length = str->length; 51 | const char * ps = str->str; 52 | while(length--){ 53 | ret = ret*31 + *ps;// 31 for LUBM; 131 for Uniprot 54 | ps++; 55 | } 56 | return ret; 57 | } 58 | 59 | inline HashCodeType next_prime_number( HashCodeType n ) 60 | { 61 | for(HashCodeType ret = n+1;;ret++){ 62 | HashCodeType up = (HashCodeType)(sqrt((double)ret))+1000; 63 | if(up>=ret) 64 | up = ret-1; 65 | 66 | bool ok = true; 67 | for(HashCodeType p = 2;p= HashCodeType(1<<25)) 84 | newPrimeSed = current + HashCodeType(1<<25); 85 | HashCodeType ret = next_prime_number(newPrimeSed); 86 | #ifdef DEBUG 87 | printf("DEBUG: resize hash table from %zd to %zd\n",current,ret); 88 | #endif 89 | if(ret>0) 90 | return ret; 91 | else{ 92 | exit(0); 93 | } 94 | } 95 | #endif /* HASH_H_ */ 96 | -------------------------------------------------------------------------------- /TripleBit/SynchronousBuffer.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "SynchronousBuffer.h" 14 | #include "MemoryBuffer.h" 15 | 16 | SynchronousBuffer::SynchronousBuffer() { 17 | // TODO Auto-generated constructor stub 18 | pthread_cond_init(&bufferNotEmpty,NULL); 19 | pthread_cond_init(&bufferFull,NULL); 20 | pthread_cond_init(&bufferNotFull,NULL); 21 | pthread_mutex_init(&bufferLock,NULL); 22 | 23 | this->buffer = new MemoryBuffer(1); 24 | 25 | this->pageSize = getpagesize(); 26 | this->usedSize = 0; 27 | this->remainderSize = pageSize - usedSize; 28 | 29 | this->base = buffer->getBuffer(); 30 | readPos = writePos = 0; 31 | 32 | this->finish = false; 33 | } 34 | 35 | SynchronousBuffer::~SynchronousBuffer() { 36 | // TODO Auto-generated destructor stub 37 | delete buffer; 38 | } 39 | 40 | Status SynchronousBuffer::MemoryCopy(void* src, size_t length) 41 | { 42 | //TODO copy something to the memory; 43 | int rtn; 44 | if((rtn = pthread_mutex_lock(&bufferLock)) != 0) 45 | fprintf(stderr, "pthread_mutex_lock %d", rtn), exit(1); 46 | 47 | while(IsBufferFull(length)){ 48 | if ((rtn = pthread_cond_wait(&bufferNotFull,&bufferLock)) != 0) 49 | fprintf(stderr, "pthread_cond_wait %d", rtn), exit(1); 50 | } 51 | 52 | memcpy(base+writePos,src,length); 53 | writePos = (writePos + length) % pageSize; 54 | 55 | if ((rtn = pthread_cond_broadcast(&bufferNotEmpty)) != 0) 56 | fprintf(stderr, "pthread_cond_signal %d", rtn), exit(1); 57 | 58 | pthread_mutex_unlock(&bufferLock); 59 | 60 | return OK; 61 | } 62 | 63 | Status SynchronousBuffer::MemoryGet(void* dest, size_t length) 64 | { 65 | // TODO copy something from buffer to dest 66 | 67 | int rtn; 68 | if ((rtn = pthread_mutex_lock(&bufferLock)) != 0) 69 | fprintf(stderr, "pthread_mutex_lock %d", rtn), exit(1); 70 | 71 | while (IsBufferEmpty()) { 72 | if( finish == true){ 73 | pthread_mutex_unlock(&bufferLock); 74 | return FINISH_READ; 75 | } 76 | if ((rtn = pthread_cond_wait(&bufferNotEmpty, &bufferLock)) != 0) 77 | fprintf(stderr, "pthread_cond_wait %d", rtn), exit(1); 78 | } 79 | 80 | memcpy((char*)dest, (const char*)(base + readPos), length); 81 | readPos = (readPos + length) % pageSize; 82 | 83 | if ((rtn = pthread_cond_broadcast(&bufferNotFull)) != 0) 84 | fprintf(stderr, "pthread_cond_signal %d", rtn), exit(1); 85 | 86 | pthread_mutex_unlock(&bufferLock); 87 | 88 | return OK; 89 | } 90 | 91 | 92 | -------------------------------------------------------------------------------- /TripleBit/MemoryBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef MEMORYBUFFER_H_ 2 | #define MEMORYBUFFER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #define MEMORYBUFFER_DEBUG 1 17 | 18 | class EntityIDBuffer; 19 | class ColumnBuffer; 20 | #include "TripleBit.h" 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | using namespace std; 29 | 30 | class MemoryBuffer { 31 | unsigned size; 32 | char* buffer; 33 | char* currentHead; 34 | public: 35 | static unsigned pagesize; 36 | public: 37 | friend class EntityIDBuffer; 38 | friend class ColumnBuffer; 39 | MemoryBuffer(); 40 | MemoryBuffer(unsigned size); 41 | virtual ~MemoryBuffer(); 42 | char* resize(unsigned increasedSize); 43 | Status resize(unsigned increasedSize, bool zero); 44 | char* getBuffer(); 45 | char* getBuffer(int pos); 46 | size_t getSize() { return size; } 47 | size_t get_length() {return size; } 48 | char* get_address() { return buffer; } 49 | void memset(char value); 50 | void save(ofstream& ofile); 51 | void load(ifstream& ifile); 52 | 53 | private: 54 | }; 55 | 56 | ///////////////////////////////////////////////////////////////////////////////////////// 57 | //// class URIStatisticsBuffer; 58 | //////////////////////////////////////////////////////////////////////////////////////// 59 | class URIStatisticsBuffer { 60 | MemoryBuffer* buffer; 61 | int chunkCount; 62 | unsigned short* p; 63 | public: 64 | static int totalStatisticsPerPage; 65 | Status addCount(ID id); 66 | Status getCount(ID id, unsigned short& count); 67 | URIStatisticsBuffer(); 68 | virtual ~URIStatisticsBuffer(); 69 | void save(ofstream& ofile); 70 | void load(ifstream& ifile); 71 | private: 72 | unsigned short* getBuffer(ID id); 73 | 74 | private: 75 | }; 76 | 77 | /////////////////////////////////////////////////////////////////////////////////////// 78 | /// class StatementReificationTable 79 | ////////////////////////////////////////////////////////////////////////////////////// 80 | class StatementReificationTable { 81 | private: 82 | MemoryBuffer * buffer; 83 | ID* currentBuffer; //current insert buffer; 84 | ID pos; //current position can be inserted into; 85 | public: 86 | StatementReificationTable(); 87 | Status insertStatementReification(ID statement, ID column); 88 | Status getColumn(ID statement, ID& column); 89 | virtual ~StatementReificationTable(); 90 | 91 | void save(ofstream& ofile); 92 | void load(ifstream& ifile); 93 | private: 94 | }; 95 | 96 | #endif /* MEMORYBUFFER_H_ */ 97 | -------------------------------------------------------------------------------- /TripleBit/BitVectorWAH.h: -------------------------------------------------------------------------------- 1 | #ifndef _BIT_VECTOR_H_ 2 | #define _BIT_VECTOR_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace std; 22 | 23 | struct CompressBlock 24 | { 25 | ID start; 26 | size_t size; 27 | size_t capacity; 28 | bitVector_ptr ptr; 29 | 30 | CompressBlock() 31 | { 32 | ptr = (bitVector_ptr)malloc( 7 * sizeof(word) ); 33 | 34 | memset((void*)ptr, 0, 7 * sizeof(word)); 35 | 36 | start = 0; 37 | capacity = 7; 38 | size = 0; 39 | } 40 | 41 | ~CompressBlock() 42 | { 43 | free(ptr); 44 | } 45 | }; 46 | 47 | struct BufferBlock{ 48 | word* ptr; 49 | unsigned int startBit; 50 | unsigned int endBit; 51 | }; 52 | 53 | class BitVectorWAH 54 | { 55 | public: 56 | BitVectorWAH(); 57 | BitVectorWAH(const BitVectorWAH &bVec); 58 | virtual ~BitVectorWAH(); 59 | //void initialize(); 60 | //void destroy(); 61 | void set(unsigned int pos, bool value=true); 62 | size_t getSize() const; 63 | vector* getVectorBuffer(); 64 | bool getValue(unsigned int pos); 65 | void completeInsert(); 66 | static bool decode(int &unit,bool& value); 67 | void print(); 68 | static BitVectorWAH* convertVector(vector& flagVector); 69 | int getValueOnPos(uint& pos, int& chFlag, bool& isCompressed, unsigned int& chunkNo, bool& value); 70 | private: 71 | static bool parseBit(word& temp, bool value); 72 | void insertIntoVector(unsigned int pos); 73 | bool parseBit(int &unitNo,bool& value); 74 | void increaseOnesCount(word& unit); 75 | void increaseZerosCount(unsigned int pos,int count); 76 | void addZeroCount(int count); 77 | void insertValue(unsigned char temp); 78 | void encode(unsigned short int units); 79 | private: 80 | bitVector_ptr pBitVec; 81 | size_t bitVecSize; 82 | size_t capacity; 83 | //unsigned int startPos; 84 | unsigned int currentPos; 85 | size_t current_capacity; 86 | CompressBlock block; 87 | vector < BufferBlock > BufferList; 88 | BufferBlock Buff; 89 | unsigned int buffNo; 90 | unsigned int parsedBit; 91 | public: 92 | //bit vector operations; 93 | static ID* XOR(BitVectorWAH* vec1, BitVectorWAH* vec2); 94 | static ID* XOR(BitVectorWAH* vec1, ID* vec2); 95 | static ID* XOR(ID* vec1, ID* vec2, size_t len); 96 | static ID* AND(BitVectorWAH* vec1, BitVectorWAH* vec2); 97 | static ID* AND(BitVectorWAH* vec1, ID* vec2); 98 | static ID* AND(ID* vec1, ID* vec2, size_t len); 99 | 100 | void save(ofstream& ofile); 101 | void load(ifstream& ifile); 102 | private: 103 | }; 104 | 105 | #endif //_BIT_VECTOR_H 106 | -------------------------------------------------------------------------------- /TripleBit/util/BufferManager.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "BufferManager.h" 14 | #include "../EntityIDBuffer.h" 15 | 16 | BufferManager* BufferManager::instance = NULL; 17 | 18 | BufferManager::BufferManager() 19 | { 20 | // TODO Auto-generated constructor stub 21 | for (int i = 0; i < INIT_BUFFERS; i++) { 22 | EntityIDBuffer* buffer = new EntityIDBuffer; 23 | bufferPool.push_back(buffer); 24 | cleanBuffer.push_back(buffer); 25 | } 26 | usedBuffer.clear(); 27 | bufferCnt = INIT_BUFFERS; 28 | } 29 | 30 | BufferManager::~BufferManager() 31 | { 32 | // TODO Auto-generated destructor stub 33 | } 34 | 35 | bool BufferManager::expandBuffer() 36 | { 37 | for (int i = 0; i < INCREASE_BUFFERS; i++) { 38 | EntityIDBuffer* buffer = new EntityIDBuffer; 39 | if (buffer == NULL) { 40 | bufferCnt += i; 41 | return false; 42 | } 43 | bufferPool.push_back(buffer); 44 | cleanBuffer.push_back(buffer); 45 | } 46 | 47 | bufferCnt += INCREASE_BUFFERS; 48 | return true; 49 | } 50 | 51 | EntityIDBuffer* BufferManager::getNewBuffer() 52 | { 53 | if (usedBuffer.size() == bufferPool.size()) { 54 | if (expandBuffer() == false) 55 | return NULL; 56 | } 57 | EntityIDBuffer* buffer = cleanBuffer.front(); 58 | cleanBuffer.erase(cleanBuffer.begin());; 59 | usedBuffer.push_back(buffer); 60 | 61 | return buffer; 62 | } 63 | 64 | void BufferManager::destroyBuffers() 65 | { 66 | for (size_t i = 0; i < bufferPool.size(); i++) { 67 | delete bufferPool[i]; 68 | } 69 | 70 | usedBuffer.clear(); 71 | cleanBuffer.clear(); 72 | } 73 | 74 | Status BufferManager::freeBuffer(EntityIDBuffer* buffer) 75 | { 76 | vector::iterator iter; 77 | iter = find(usedBuffer.begin(), usedBuffer.end(), buffer); 78 | if (iter != usedBuffer.end()) { 79 | usedBuffer.erase(iter); 80 | cleanBuffer.push_back(*iter); 81 | (*iter)->empty(); 82 | return OK; 83 | } else { 84 | return NOT_FOUND; 85 | } 86 | } 87 | 88 | Status BufferManager::reserveBuffer() 89 | { 90 | usedBuffer.clear(); 91 | cleanBuffer.clear(); 92 | int i; 93 | for (i = 0; i < INIT_BUFFERS; i++) { 94 | bufferPool[i]->empty(); 95 | cleanBuffer.push_back(bufferPool[i]); 96 | } 97 | 98 | vector::iterator iter = bufferPool.begin() + i; 99 | vector::iterator start = iter; 100 | for (; iter != bufferPool.end(); iter++) { 101 | delete *iter; 102 | *iter = NULL; 103 | } 104 | bufferPool.erase(start,iter); 105 | 106 | 107 | return OK; 108 | } 109 | 110 | BufferManager* BufferManager::getInstance() 111 | { 112 | if (instance == NULL) { 113 | instance = new BufferManager; 114 | } 115 | 116 | return instance; 117 | } 118 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SOURCE_DIR := src 2 | 3 | # COMPILER := icc 4 | COMPILER := g++ 5 | 6 | # release 7 | BINARY_DIR := bin/lrelease 8 | CPPFLAGS := -Wall -g -O3 `raptor-config --cflags` 9 | LIBS := -lpthread -L/usr/local/lib `raptor-config --libs` 10 | 11 | # debug 12 | # BINARY_DIR := bin/ldebug 13 | # CPPFLAGS := -g -fPIC -DDEBUG 14 | COMPILE.cpp = $(COMPILER) $(CFLAGS) $(CPPFLAGS) -c 15 | LINK.cpp = $(COMPILER) $(LIBS) 16 | 17 | %.o: %.cpp 18 | #$(call make-depend,$<,$@,$(subst .o,.d,$@)) 19 | $(COMPILE.cpp) $< -o $@ 20 | 21 | 22 | # $(call source-dir-to-binary-dir, directory-list) 23 | source-dir-to-binary-dir = $(addprefix $(BINARY_DIR)/,$1) 24 | 25 | # $(call source-to-object, source-file-list) 26 | source-to-object = $(call source-dir-to-binary-dir, $(subst .cpp,.o,$1)) 27 | 28 | # $(subdirectory) 29 | subdirectory = $(patsubst %/module.mk,%, \ 30 | $(word \ 31 | $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))) 32 | 33 | # $(call make-depend,source-file,object-file,depend-file) 34 | define make-depend 35 | g++ -MM \ 36 | -MF$3 \ 37 | -MP \ 38 | -MT$2 \ 39 | $(CFLAGS) \ 40 | $(CPPFLAGS) \ 41 | $(TARGET_ARCH) \ 42 | $1 43 | endef 44 | 45 | 46 | # $(call make-library, library-name, source-file-list) 47 | define make-library 48 | libraries += $(BINARY_DIR)/$1 49 | sources += $2 50 | 51 | $(BINARY_DIR)/$1: $(call source-dir-to-binary-dir, $(subst .cpp,.o,$2)) 52 | $(AR) $(ARFLAGS) $$@ $$^ 53 | 54 | endef 55 | 56 | # $(call make-program, program-name, library-list, source-file-list) 57 | define make-program 58 | programs += $(BINARY_DIR)/$1 59 | sources += $3 60 | 61 | $(BINARY_DIR)/$1: $(call source-dir-to-binary-dir, $(subst .cpp,.o,$3) $2 ) 62 | $(LINK.cpp) -o $$@ $$^ -lpthread 63 | 64 | endef 65 | 66 | # $(compile-rules) 67 | define compile-rules 68 | $(foreach f,$(local_src),$(call one-compile-rule,$(call source-to-object,$f),$f)) 69 | 70 | endef 71 | 72 | 73 | # $(call one-compile-rule, binary-file, source-file) 74 | define one-compile-rule 75 | $1: $2 76 | $(call make-depend,$2,$1,$(subst .o,.d,$1)) 77 | $(COMPILE.cpp) -o $1 $2 78 | 79 | endef 80 | 81 | 82 | modules := TripleBit TripleBitQuery BuildTripleBitFromN3 83 | programs := 84 | libraries := 85 | sources := 86 | 87 | objects = $(call source-to-object,$(sources)) 88 | dependencies = $(subst .o,.d,$(objects)) 89 | 90 | include_dirs := TripleBit 91 | CPPFLAGS += $(addprefix -I ,$(include_dirs)) 92 | 93 | MKDIR := mkdir -p 94 | MV := mv -f 95 | RM := rm -f 96 | SED := sed 97 | TEST := test 98 | 99 | 100 | create-output-directories := \ 101 | $(shell for f in $(call source-dir-to-binary-dir,$(modules)); \ 102 | do \ 103 | $(TEST) -d $$f || $(MKDIR) $$f; \ 104 | done) 105 | 106 | 107 | all: 108 | 109 | include $(addsuffix /module.mk,$(modules)) 110 | 111 | 112 | .PHONY: all 113 | 114 | all: $(programs) 115 | 116 | .PHONY: libraries 117 | 118 | libraries: $(libraries) 119 | 120 | .PHONY: clean 121 | 122 | clean: 123 | $(RM) -r $(BINARY_DIR) 124 | 125 | 126 | 127 | ifneq "$(MAKECMDGOALS)" "clean" 128 | -include $(dependencies) 129 | endif 130 | 131 | -------------------------------------------------------------------------------- /TripleBit/TripleBitBuilder.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIPLEBITBUILDER_H_ 2 | #define TRIPLEBITBUILDER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #define TRIPLEBITBUILDER_DEBUG 1 17 | 18 | class PredicateTable; 19 | class URITable; 20 | class URIStatisticsBuffer; 21 | class StatementReificationTable; 22 | class FindColumns; 23 | class BitmapBuffer; 24 | class Sorter; 25 | class TempFile; 26 | class StatisticsBuffer; 27 | 28 | #include "TripleBit.h" 29 | #include "StatisticsBuffer.h" 30 | 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #include "TurtleParser.h" 37 | #include "ThreadPool.h" 38 | #include "TempFile.h" 39 | 40 | using namespace std; 41 | 42 | class TripleBitBuilder { 43 | private: 44 | //MySQL* mysql; 45 | BitmapBuffer* bitmap; 46 | PredicateTable* preTable; 47 | URITable* uriTable; 48 | vector predicates; 49 | string dir; 50 | /// statistics buffer; 51 | StatisticsBuffer* statBuffer[4]; 52 | StatementReificationTable* staReifTable; 53 | FindColumns* columnFinder; 54 | fstream statementFile; 55 | public: 56 | TripleBitBuilder(); 57 | TripleBitBuilder(const string dir); 58 | Status initBuild(); 59 | Status startBuild(); 60 | static const char* skipIdIdId(const char* reader); 61 | static int compareValue(const char* left, const char* right); 62 | static int compare213(const char* left, const char* right); 63 | static int compare231(const char* left, const char* right); 64 | static int compare123(const char* left, const char* right); 65 | static int compare321(const char* left, const char* right); 66 | static inline void loadTriple(const char* data, ID& v1, ID& v2, ID& v3) { 67 | TempFile::readId(TempFile::readId(TempFile::readId(data, v1), v2), v3); 68 | } 69 | 70 | static inline int cmpValue(ID l ,ID r) { 71 | return (l < r) ? -1 : ((l > r) ? 1 : 0); 72 | } 73 | static inline int cmpTriples(ID l1, ID l2, ID l3, ID r1, ID r2, ID r3) { 74 | int c = cmpValue(l1, r1); 75 | if(c) 76 | return c; 77 | c = cmpValue(l2, r2); 78 | if(c) 79 | return c; 80 | return cmpValue(l3, r3); 81 | 82 | } 83 | StatisticsBuffer* getStatBuffer(StatisticsBuffer::StatisticsType type) { 84 | return statBuffer[static_cast(type)]; 85 | } 86 | 87 | Status resolveTriples(TempFile& rawFacts, TempFile& facts); 88 | Status startBuildN3(string fileName); 89 | bool N3Parse(istream& in, const char* name, TempFile&); 90 | Status importFromMySQL(string db, string server, string username, string password); 91 | void NTriplesParse(const char* subject, const char* predicate, const char* object, TempFile&); 92 | ID generateXY(ID& subjectID, ID& objectID); 93 | Status buildIndex(); 94 | Status endBuild(); 95 | 96 | static bool isStatementReification(const char* object); 97 | virtual ~TripleBitBuilder(); 98 | }; 99 | #endif /* TRIPLEBITBUILDER_H_ */ 100 | -------------------------------------------------------------------------------- /TripleBit/TripleBitRepository.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIPLEBITRESPOSITORY_H_ 2 | #define TRIPLEBITRESPOSITORY_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class TripleBitBuilder; 17 | class PredicateTable; 18 | class URITable; 19 | class URIStatisticsBuffer; 20 | class BitmapBuffer; 21 | class FindEntityID; 22 | class EntityIDBuffer; 23 | class MMapBuffer; 24 | class TripleBitQuery; 25 | class RDFQuery; 26 | 27 | #include "IRepository.h" 28 | #include "TripleBit.h" 29 | #include "StatisticsBuffer.h" 30 | 31 | class TripleBitRepository : public IRepository{ 32 | PredicateTable * preTable; 33 | URITable* UriTable; 34 | BitmapBuffer* bitmapBuffer; 35 | StatisticsBuffer* subjectStat, *subPredicateStat, *objectStat, *objPredicateStat; 36 | FindEntityID* columnFinder; 37 | EntityIDBuffer* buffer; 38 | int pos; 39 | 40 | MMapBuffer* bitmapImage, *bitmapIndexImage, *bitmapPredicateImage; 41 | 42 | vector resultSet; 43 | vector::iterator resBegin; 44 | vector::iterator resEnd; 45 | 46 | TripleBitQuery* bitmapQuery; 47 | RDFQuery* query; 48 | public: 49 | TripleBitRepository(); 50 | virtual ~TripleBitRepository(); 51 | 52 | //SO(id,string)transform 53 | bool find_soid_by_string(SOID& soid, const std::string& str); 54 | bool find_string_by_soid(std::string& str, SOID& soid); 55 | 56 | //P(id,string)transform 57 | bool find_pid_by_string(PID& pid, const std::string& str); 58 | bool find_string_by_pid(std::string& str, ID& pid); 59 | 60 | //create a Repository specific in the path . 61 | static TripleBitRepository * create(const string path); 62 | 63 | //Get some statistics information 64 | int get_predicate_count(PID pid); 65 | int get_subject_count(ID subjectID); 66 | int get_object_count(ID objectID); 67 | int get_subject_predicate_count(ID subjectID, ID predicateID); 68 | int get_object_predicate_count(ID objectID, ID predicateID); 69 | int get_subject_object_count(ID subjectID, ID objectID); 70 | 71 | PredicateTable* getPredicateTable() const { return preTable; } 72 | URITable* getURITable() const { return UriTable; } 73 | BitmapBuffer* getBitmapBuffer() const { return bitmapBuffer; } 74 | 75 | StatisticsBuffer* getStatisticsBuffer(StatisticsBuffer::StatisticsType type) { 76 | switch(type) { 77 | case StatisticsBuffer::SUBJECT_STATIS: 78 | return subjectStat; 79 | case StatisticsBuffer::OBJECT_STATIS: 80 | return objectStat; 81 | case StatisticsBuffer::SUBJECTPREDICATE_STATIS: 82 | return subPredicateStat; 83 | case StatisticsBuffer::OBJECTPREDICATE_STATIS: 84 | return objPredicateStat; 85 | } 86 | 87 | return NULL; 88 | } 89 | //scan the database; 90 | Status getSubjectByObjectPredicate(ID oid, ID pod); 91 | ID next(); 92 | 93 | //lookup string id; 94 | bool lookup(const string& str, ID& id); 95 | Status nextResult(string& str); 96 | Status execute(string query); 97 | size_t getResultSize() const { return resultSet.size(); } 98 | 99 | void cmd_line(FILE* fin, FILE* fout); 100 | static int colNo; 101 | }; 102 | 103 | #endif /* TRIPLEBITRESPOSITORY_H_ */ 104 | -------------------------------------------------------------------------------- /TripleBit/StringIDSegment.h: -------------------------------------------------------------------------------- 1 | #ifndef STRINGIDSEGMENT_H_ 2 | #define STRINGIDSEGMENT_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include "ObjectPool.h" 18 | #include "MMapBuffer.h" 19 | 20 | //idStroffPool entry 21 | struct IDStroffEntry{ 22 | OffsetType stroff; 23 | }; 24 | 25 | class StringIDSegment { 26 | double fillRate; // max hash table fill rate 27 | public: 28 | ObjectPool * stringPool; // a var strings table(length,string;length,string;...) 29 | MMapBuffer * stringHashTable; // hashtable for fast string to str_off in stringPool finding 30 | FixedObjectPool * idStroffPool; // a fix pool(id,stroff) for id to string 31 | 32 | public: 33 | StringIDSegment(); 34 | 35 | virtual ~StringIDSegment(); 36 | 37 | private: 38 | // 39 | void buildStringHashTable(); 40 | 41 | //add string to stringPool,and update the stringHashTable. 42 | OffsetType addStringToStringPoolAndUpdateStringHashTable( LengthString * aStr, ID id); 43 | 44 | ID addIDStroffToIdStroffPool(IDStroffEntry * entry){ 45 | return idStroffPool->append_object_get_id(entry); 46 | } 47 | 48 | OffsetType findStringOffset(LengthString * aStr); 49 | public: 50 | //add string to StringIDSegment ,update stringPool,stringHashTable,idStroffPool. 51 | ID addStringToSegment(LengthString * aStr); 52 | 53 | ID addStringToSegment(std::string& aStr){ 54 | LengthString* lstr = new LengthString(aStr); 55 | ID id = addStringToSegment(lstr); 56 | delete lstr; 57 | return id; 58 | } 59 | 60 | ID addStringToSegment(const char* str) { 61 | LengthString* lstr = new LengthString(str); 62 | ID id = addStringToSegment(lstr); 63 | delete lstr; 64 | return id; 65 | } 66 | //reverse more memory for fast insert. 67 | void reserveStringPoolSpace(OffsetType size){ 68 | stringPool->reserve(size); 69 | } 70 | // 71 | void reserveIdStroffPoolSpace(OffsetType size){ 72 | idStroffPool->reserve(size); 73 | } 74 | 75 | //Optimize memory 76 | Status optimize(); 77 | 78 | 79 | 80 | //(id ,string) finding 81 | bool findStringById(std::string& aStr, const ID& id); 82 | bool findIdByString(ID& id, const std::string& aStr); 83 | 84 | //second way (id,string) finding 85 | bool findStringById(LengthString * aStr, const ID& id); 86 | bool findIdByString(ID& id, LengthString * aStr); 87 | 88 | bool findIdByString(ID& id, const char* str) { 89 | LengthString* lstr = new LengthString(str); 90 | bool b = findIdByString(id, lstr); 91 | delete lstr; 92 | return b; 93 | } 94 | 95 | size_t getSize() { 96 | cout<usage()<<" : "<get_length()<<" : "<usage()<usage() + stringHashTable->get_length() + idStroffPool->usage(); 98 | } 99 | //for test 100 | void cmd_line(FILE * fin,FILE * fout); 101 | void dump(); 102 | 103 | ID getMaxID(); 104 | public: 105 | static StringIDSegment* create(const string dir, const string segmentName); 106 | static StringIDSegment* load(const string dir, const string segmentName); 107 | }; 108 | 109 | #endif /* STRINGIDSEGMENT_H_ */ 110 | -------------------------------------------------------------------------------- /TripleBit/ObjectPool.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "Hash.h" 14 | #include "ObjectPool.h" 15 | 16 | void FixedObjectPool::clear() 17 | { 18 | ObjectPoolMeta * meta = get_meta(); 19 | meta->size = 0; 20 | meta->usage = sizeof(ObjectPoolMeta); 21 | } 22 | 23 | OffsetType FixedObjectPool::next_offset( OffsetType offset ) 24 | { 25 | OffsetType ret = offset+1; 26 | return ret<=get_meta()->size?ret:0; 27 | } 28 | 29 | Status FixedObjectPool::get_by_offset( OffsetType offset, OffsetType * plength, void **ppdata ) 30 | { 31 | ObjectPoolMeta * meta = get_meta(); 32 | assert(offset <= meta->size); 33 | if(plength) 34 | *plength = meta->entrysize; 35 | *ppdata = data->get_address() + sizeof(ObjectPoolMeta) + (offset-1)*(meta->entrysize); 36 | return OK; 37 | } 38 | 39 | 40 | Status FixedObjectPool::get_by_id( ID id, OffsetType * plength, void **ppdata ) 41 | { 42 | // TODO: optimaze 43 | ObjectPoolMeta * meta = get_meta(); 44 | char * p = data->get_address() + sizeof(ObjectPoolMeta); 45 | char * end = data->get_address() + meta->usage; 46 | 47 | id = id - 1; 48 | *ppdata = p + id * meta->entrysize; 49 | *plength = meta->entrysize; 50 | if(*ppdata < end) 51 | return OK; 52 | return NOT_FOUND; 53 | } 54 | 55 | ID FixedObjectPool::append_object_get_id( const void * pdata ) 56 | { 57 | ObjectPoolMeta * meta = get_meta(); 58 | 59 | if( meta->usage + meta->entrysize > meta->length ){ 60 | OffsetType new_length = expand(meta->length,meta->entrysize,meta->classtype); 61 | Status ret = data->resize(new_length, false); 62 | if(ret!=OK) 63 | return 0; 64 | meta = get_meta(); 65 | meta->length = new_length; 66 | } 67 | 68 | memcpy( data->get_address() + meta->usage , pdata, get_meta()->entrysize ); 69 | meta->usage += meta->entrysize; 70 | return meta->size++ + meta->id_offset; 71 | } 72 | 73 | bool FixedObjectPool::initialize( int type,const char * name,ID init_capacity,OffsetType entrysize ) 74 | { 75 | if(data) 76 | delete data; 77 | data = new MMapBuffer(name, sizeof(ObjectPoolMeta)+init_capacity*entrysize); 78 | if(data == NULL){ 79 | // TODO: log 80 | return false; 81 | } 82 | ObjectPoolMeta * meta = get_meta(); 83 | memset(meta,0,sizeof(ObjectPoolMeta)); 84 | meta->entrysize = entrysize; 85 | meta->length = data->get_length(); 86 | meta->usage = sizeof(ObjectPoolMeta); 87 | meta->type = type; 88 | meta->size = 1; 89 | return true; 90 | } 91 | 92 | FixedObjectPool * FixedObjectPool::create( int type,const char * name,ID init_capacity,OffsetType entrysize ) 93 | { 94 | FixedObjectPool * ret = new FixedObjectPool(); 95 | if(ret->initialize(type,name,init_capacity,entrysize)==false){ 96 | delete ret; 97 | return NULL; 98 | } 99 | return ret; 100 | } 101 | 102 | FixedObjectPool* FixedObjectPool::load(const char* name) 103 | { 104 | FixedObjectPool* ret = new FixedObjectPool(); 105 | ret->data = MMapBuffer::create(name,0); 106 | if(ret->data == NULL){ 107 | // TODO: log 108 | delete ret; 109 | return NULL; 110 | } 111 | return ret; 112 | } 113 | 114 | OffsetType FixedObjectPool::first_offset() 115 | { 116 | if(get_meta()->size>0) 117 | return 1; 118 | else 119 | return 0; 120 | } 121 | -------------------------------------------------------------------------------- /TripleBit/TempFile.h: -------------------------------------------------------------------------------- 1 | #ifndef _TEMPFILE_H_ 2 | #define _TEMPFILE_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "TripleBit.h" 17 | #include 18 | #include 19 | //--------------------------------------------------------------------------- 20 | #if defined(_MSC_VER) 21 | typedef unsigned __int64 uint64_t; 22 | #else 23 | #include 24 | #endif 25 | //--------------------------------------------------------------------------- 26 | /// A temporary file 27 | class TempFile { 28 | private: 29 | /// The next id 30 | static unsigned id; 31 | 32 | /// The base file name 33 | std::string baseName; 34 | /// The file name 35 | std::string fileName; 36 | /// The output 37 | std::ofstream out; 38 | 39 | /// The buffer size 40 | static const unsigned bufferSize = 16384; 41 | /// The write buffer 42 | char writeBuffer[bufferSize]; 43 | /// The write pointer 44 | unsigned writePointer; 45 | 46 | /// Construct a new suffix 47 | static std::string newSuffix(); 48 | 49 | public: 50 | /// Constructor 51 | TempFile(const std::string& baseName); 52 | /// Destructor 53 | ~TempFile(); 54 | 55 | /// Get the base file name 56 | const std::string& getBaseFile() const { 57 | return baseName; 58 | } 59 | /// Get the file name 60 | const std::string& getFile() const { 61 | return fileName; 62 | } 63 | 64 | /// Flush the file 65 | void flush(); 66 | /// Close the file 67 | void close(); 68 | /// Discard the file 69 | void discard(); 70 | 71 | /// Write a string 72 | void writeString(unsigned len, const char* str); 73 | /// Write a id 74 | /// flag==0 subject 75 | /// flag==1 object 76 | /// flag==2 predicate 77 | void writeId(ID id, unsigned char flag); 78 | void writeId(ID id); 79 | /// Raw write 80 | void write(unsigned len, const char* data); 81 | 82 | /// Skip a predicate 83 | static const char* skipId(const char* reader); 84 | /// Skip a string 85 | static const char* skipString(const char* reader); 86 | /// Read an id 87 | static const char* readId(const char* reader, ID& id); 88 | /// Read a string 89 | static const char* readString(const char* reader, unsigned& len, const char*& str); 90 | }; 91 | 92 | //---------------------------------------------------------------------------- 93 | /// Maps a file read-only into memory 94 | class MemoryMappedFile 95 | { 96 | private: 97 | /// os dependent data 98 | struct Data; 99 | 100 | /// os dependen tdata 101 | Data* data; 102 | /// Begin of the file 103 | const char* begin; 104 | /// End of the file 105 | const char* end; 106 | 107 | public: 108 | /// Constructor 109 | MemoryMappedFile(); 110 | /// Destructor 111 | ~MemoryMappedFile(); 112 | 113 | /// Open 114 | bool open(const char* name); 115 | /// Close 116 | void close(); 117 | 118 | /// Get the begin 119 | const char* getBegin() const { return begin; } 120 | /// Get the end 121 | const char* getEnd() const { return end; } 122 | 123 | /// Ask the operating system to prefetch a part of the file 124 | void prefetch(const char* start,const char* end); 125 | }; 126 | //--------------------------------------------------------------------------- 127 | #endif 128 | -------------------------------------------------------------------------------- /TripleBit/BitmapWAH.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "BitmapWAH.h" 14 | 15 | #if _MSC_VER > 1000 16 | #pragma once 17 | #pragma warning(disable: 4786) 18 | #endif // _MSC_VER > 1000 19 | ////////////////////////////////////////////////////////////////////// 20 | // Construction/Destruction 21 | ////////////////////////////////////////////////////////////////////// 22 | 23 | BitmapWAH::BitmapWAH() 24 | { 25 | // bitMap = (BitMapType*)malloc(sizeof(BitMapType) * BITMAP_INITIAL_SIZE); 26 | /*bitMap = Alloc::allocate(BITMAP_INITIAL_SIZE); 27 | bitMapSize = 0; 28 | capacity = BITMAP_INITIAL_SIZE; 29 | unsigned int i; 30 | for(i = 0; i < capacity; i++) 31 | { 32 | bitMap[i] = NULL; 33 | }*/ 34 | bitMapSize = 0; 35 | } 36 | 37 | BitmapWAH::~BitmapWAH() 38 | { 39 | //unsigned int size; 40 | /*unsigned int i; 41 | for( i = 0; i < bitMapSize; i++) 42 | { 43 | //bitMap[i]->destroy(); 44 | ///free(bitMap[i]); 45 | delete bitMap[i]; 46 | } 47 | 48 | free(bitMap);*/ 49 | 50 | cout<<"destroy bitmap"<::iterator iter = bitMap.begin(); 52 | for(;iter != bitMap.end();iter++) 53 | { 54 | delete iter->second; 55 | } 56 | } 57 | 58 | /************************************************************************/ 59 | /* */ 60 | /************************************************************************/ 61 | void BitmapWAH::insert(ID id, unsigned int pos) 62 | { 63 | /*if(id < 0) 64 | return; 65 | if(id <= capacity){ 66 | if(bitMap[id-1] == NULL){ 67 | BitVector * pBitVec = new BitVector; 68 | bitMap[id-1] = pBitVec; 69 | bitMapSize = id; 70 | } 71 | bitMap[id-1]->set(pos); 72 | cout<initialize(); 77 | BitVector * pBitVec = new BitVector; 78 | bitMap[id-1] = pBitVec; 79 | bitMap[id-1]->set(pos); 80 | bitMapSize = id; 81 | 82 | cout<set(pos); 87 | }else{ 88 | BitVectorWAH* pBitVec = new BitVectorWAH; 89 | bitMap[id] = pBitVec; 90 | bitMap[id]->set(pos); 91 | bitMapSize++; 92 | } 93 | } 94 | 95 | void BitmapWAH::expandBitmap() 96 | { 97 | /*bitMap = (BitMapType*)realloc(bitMap, BITMAP_INCREASE_SIZE * sizeof(BitVector*)); 98 | int i = 0; 99 | //for(i = 0; i 0 && id <= bitMapSize) 110 | return true; 111 | else 112 | return false;*/ 113 | 114 | map::iterator iter = bitMap.find(id); 115 | if(iter == bitMap.end()) 116 | return false; 117 | else 118 | return true; 119 | } 120 | 121 | size_t BitmapWAH::get_size() 122 | { 123 | size_t size = 0; 124 | 125 | map::iterator iter = bitMap.begin(); 126 | 127 | for(;iter != bitMap.end();iter++) 128 | { 129 | size+=(iter->second)->getSize(); 130 | } 131 | return size; 132 | } 133 | 134 | 135 | /*********************************************************************/ 136 | /* */ 137 | /************************************************************************/ 138 | void BitmapWAH::print() 139 | { 140 | cout<<"the bit map size is "<completeInsert(); 151 | }*/ 152 | 153 | map::iterator iter = bitMap.begin(); 154 | 155 | for(;iter != bitMap.end(); iter++) 156 | { 157 | (iter->second)->completeInsert(); 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /TripleBit/EntityIDBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef ENTITYIDBUFFER_H_ 2 | #define ENTITYIDBUFFER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class MemoryBuffer; 17 | class EntityIDBuffer; 18 | 19 | #include "TripleBit.h" 20 | #include "ThreadPool.h" 21 | 22 | class SortTask 23 | { 24 | public: 25 | SortTask() {} 26 | virtual ~SortTask() {} 27 | static int Run(ID* p, size_t length, int sortKey, int IDCount); 28 | static int qcompareInt(const void* a, const void* b); 29 | static int qcompareLongByFirst32(const void* a, const void* b); 30 | static int qcompareLongBySecond32(const void* a, const void* b); 31 | 32 | static bool compareInt(ID a, ID b); 33 | static bool compareLongByFirst32(int64_t a, int64_t b); 34 | static bool compareLongBySecond32(int64_t a, int64_t b); 35 | }; 36 | 37 | class EntityIDBuffer { 38 | public: 39 | int IDCount; //the ID count in a record. 40 | ID* buffer; 41 | ID* p; 42 | int pos; 43 | size_t usedSize; 44 | size_t totalSize; 45 | int sizePerPage; 46 | 47 | //used to sort 48 | int sortKey; 49 | bool sorted; 50 | bool firstTime; 51 | bool unique; 52 | //used to hash join; 53 | vector hashBucket; 54 | vector prefixSum; 55 | public: 56 | friend class SortTask; 57 | friend class HashJoin; 58 | friend class SortMergeJoin; 59 | friend class ScanMemory; 60 | 61 | Status insertID(ID id); 62 | Status getID(ID& id, size_t _pos); 63 | void empty() { 64 | hashBucket.clear(); 65 | prefixSum.clear(); 66 | 67 | sorted = true; 68 | firstTime = true; 69 | 70 | p = buffer; 71 | usedSize = 0; 72 | pos = 0; 73 | } 74 | 75 | void setSize(size_t size) { 76 | usedSize = size * IDCount; 77 | } 78 | 79 | void setIDCount(int c) { IDCount = c;} 80 | size_t getSize() const { 81 | return usedSize / IDCount; 82 | } 83 | 84 | size_t getCapacity() const { 85 | return totalSize; 86 | } 87 | 88 | int getTotalPerChunk() const { 89 | return sizePerPage / IDCount; 90 | } 91 | 92 | int getIDCount() const { return IDCount; } 93 | 94 | Status sort(int _sortKey) { 95 | if ( ( _sortKey - 1) == sortKey && sorted == true ){ 96 | return OK; 97 | } 98 | 99 | sortKey = _sortKey - 1; 100 | return sort(); 101 | } 102 | Status sort(); 103 | 104 | size_t getEntityIDPos(ID id); 105 | ID* getBuffer() const { 106 | return buffer; 107 | } 108 | 109 | Status mergeIntersection( EntityIDBuffer* entBuffer, char* flags, ID joinKey); 110 | Status intersection( EntityIDBuffer* entBuffer, char* flags, ID joinKey1, ID joinKey2); 111 | Status mergeBuffer(EntityIDBuffer* XTemp, EntityIDBuffer* XYTemp); 112 | static Status mergeSingleBuffer(EntityIDBuffer* entbuffer , ID* buffer1, ID* buffer, size_t size1, size_t size2); 113 | static Status mergeSingleBuffer(EntityIDBuffer* ,EntityIDBuffer* ,EntityIDBuffer* ); 114 | static Status mergeSingleBuffer(EntityIDBuffer* ,EntityIDBuffer*); 115 | void uniqe(); 116 | Status mergeBuffer(ID* destBuffer, ID* buffer1, ID* buffer, size_t size1, size_t size2, int IDCount, int key); 117 | Status modifyByFlag( char* flags, int no); 118 | void getMinMax(ID& min, ID& max); 119 | inline void setSortKey(int _sortKey) { 120 | if ( firstTime == true) { 121 | sortKey = _sortKey; 122 | firstTime = false; 123 | return; 124 | } 125 | sortKey = _sortKey; 126 | sorted = false; 127 | } 128 | int getSortKey() const { return sortKey; } 129 | bool isInBuffer(ID res[]); 130 | void print(); 131 | ID getMaxID(); 132 | void initialize(int pageCount = 1); 133 | EntityIDBuffer(); 134 | virtual ~EntityIDBuffer(); 135 | EntityIDBuffer(const EntityIDBuffer* ent); 136 | 137 | EntityIDBuffer* operator=(const EntityIDBuffer* otherBuffer); 138 | ID& operator[] (const size_t ); 139 | Status appendBuffer(const ID* buffer, size_t size); 140 | Status appendBuffer1(const ID* buffer, size_t size); 141 | Status appendBuffer(const EntityIDBuffer* otherBuffer); 142 | static Status swapBuffer(EntityIDBuffer* &buffer1,EntityIDBuffer* &buffer2); 143 | private: 144 | int partition(ID* p, int first, int last); 145 | void quickSort(ID*p, int first, int last); 146 | void quickSort1(ID*p, int first, int last); 147 | void quickSort(ID* p, int size); 148 | void merge(int start1, int end1, int start2, int end2, ID* tempBuffer); 149 | void swapBuffer(ID*& tempBuffer); 150 | 151 | }; 152 | 153 | #endif /* ENTITYIDBUFFER_H_ */ 154 | -------------------------------------------------------------------------------- /TripleBit/TripleBitQuery.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIPLEBITQUERY_H_ 2 | #define TRIPLEBITQUERY_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class BitmapBuffer; 17 | class URITable; 18 | class PredicateTable; 19 | class FindEntityID; 20 | class TripleBitRepository; 21 | class TripleBitQueryGraph; 22 | class EntityIDBuffer; 23 | class HashJoin; 24 | 25 | #include "TripleBitQueryGraph.h" 26 | #include "TripleBit.h" 27 | #include "ThreadPool.h" 28 | #include "util/HashJoin.h" 29 | #include "util/SortMergeJoin.h" 30 | #include 31 | #include 32 | 33 | using namespace std; 34 | 35 | typedef map EntityIDListType; 36 | typedef map::iterator EntityIDListIterType; 37 | 38 | class TripleBitQuery { 39 | private: 40 | BitmapBuffer* bitmap; 41 | URITable* UriTable; 42 | PredicateTable* preTable; 43 | FindEntityID* entityFinder; 44 | 45 | TripleBitQueryGraph* _queryGraph; 46 | 47 | TripleBitQueryGraph::SubQuery* _query; 48 | 49 | EntityIDListType EntityIDList; 50 | vector idTreeBFS; 51 | vector leafNode; 52 | vector varVec; 53 | 54 | /// used to get the results; 55 | vector varPos; 56 | vector keyPos; 57 | vector resultPos; 58 | vector verifyPos; 59 | vector resultVec; 60 | vector bufPreIndexs; 61 | bool needselect; 62 | 63 | HashJoin hashJoin; 64 | SortMergeJoin mergeJoin; 65 | 66 | vector* resultPtr; 67 | 68 | float istringtime; 69 | unsigned resultnum; 70 | 71 | public: 72 | TripleBitQuery(TripleBitRepository& repo); 73 | virtual ~TripleBitQuery(); 74 | Status query(TripleBitQueryGraph* queryGraph, vector& resultSet); 75 | void releaseBuffer(); 76 | private: 77 | Status findEntitiesAndJoin(TripleBitQueryGraph::JoinVariableNodeID id, 78 | vector< pair < TripleBitQueryGraph::TripleNodeID, TripleBitQueryGraph::JoinVariableNode::DimType> >& tpnodes, bool firstTime); 79 | Status findEntitiesAndJoinFirstTime(vector< pair < TripleBitQueryGraph::TripleNodeID, TripleBitQueryGraph::JoinVariableNode::DimType> >& tpnodes, 80 | ID tripleID, map& firstInsertFlag, TripleBitQueryGraph::JoinVariableNodeID id); 81 | Status modifyEntitiesAndJoin(vector< pair < TripleBitQueryGraph::TripleNodeID, TripleBitQueryGraph::JoinVariableNode::DimType> >& tpnodes, 82 | ID tripleID, TripleBitQueryGraph::JoinVariableNodeID id); 83 | Status getTripleNodeByID(TripleBitQueryGraph::TripleNode*& triple, TripleBitQueryGraph::TripleNodeID nodeID); 84 | EntityType getDimInTriple(vector< pair < TripleBitQueryGraph::TripleNodeID, TripleBitQueryGraph::JoinVariableNode::DimType> >& tpnodes, 85 | TripleBitQueryGraph::TripleNodeID tripleID); 86 | Status getVariableNodeByID(TripleBitQueryGraph::JoinVariableNode*& node, TripleBitQueryGraph::JoinVariableNodeID id); 87 | int getVariableCount(TripleBitQueryGraph::TripleNodeID id); 88 | int getVariableCount(TripleBitQueryGraph::TripleNode* triple); 89 | Status singleVariableJoin(); 90 | Status acyclicJoin(); 91 | Status cyclicJoin(); 92 | bool nodeIsLeaf(TripleBitQueryGraph::JoinVariableNodeID varID); 93 | int getVariablePos(EntityType type, TripleBitQueryGraph::TripleNode* triple); 94 | Status findEntityIDByTriple( TripleBitQueryGraph::TripleNode * triple, EntityIDBuffer* buffer, ID minID, ID maxID,unsigned maxNum=INT_MAX); 95 | Status findEntityIDByKnowBuffer( TripleBitQueryGraph::TripleNode * triple, EntityIDBuffer* buffer, EntityIDBuffer* knowBuffer,EntityType knowElement); 96 | int getVariablePos(TripleBitQueryGraph::JoinVariableNodeID id, TripleBitQueryGraph::TripleNodeID tripleID); 97 | int getVariablePos(TripleBitQueryGraph::JoinVariableNodeID id, TripleBitQueryGraph::TripleNode* triple); 98 | 99 | bool getResult(ID key, std::vector& bufferlist, size_t buf_index); 100 | bool getResult_with_flags(ID key, std::vector& bufferlist, size_t buflist_index, vector flags); 101 | void getResult_join(ID key, std::vector& buffetlist, size_t buf_index); 102 | void getResult_join_with_flags(ID key, std::vector& bufferlist, size_t buflist_index, vector flags); 103 | EntityType getKnowBufferType(TripleBitQueryGraph::TripleNode* node1,TripleBitQueryGraph::TripleNode* node2); 104 | 105 | void clearTimeandCount(); 106 | void printIDtoStringTime(); 107 | void displayAllTriples(); 108 | void onePatternWithThreeVariables(); 109 | }; 110 | #endif /* TRIPLEBITQUERY_H_ */ 111 | -------------------------------------------------------------------------------- /TripleBit/RDFQuery.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "RDFQuery.h" 14 | #include "SPARQLLexer.h" 15 | #include "SPARQLParser.h" 16 | #include "QuerySemanticAnalysis.h" 17 | #include "PlanGenerator.h" 18 | #include "TripleBitQuery.h" 19 | #include "TripleBitQueryGraph.h" 20 | #include "TripleBitRepository.h" 21 | 22 | #include 23 | 24 | RDFQuery::RDFQuery(TripleBitQuery* _bitmapQuery, TripleBitRepository* _repo) 25 | { 26 | this->bitmapQuery = _bitmapQuery; 27 | repo = _repo; 28 | this->queryGraph = new TripleBitQueryGraph(); 29 | this->planGen = new PlanGenerator(*repo); 30 | this->semAnalysis = new QuerySemanticAnalysis(*repo); 31 | } 32 | 33 | RDFQuery::~RDFQuery() { 34 | // TODO Auto-generated destructor stub 35 | //delete bitmapQuery; 36 | if(repo != NULL) 37 | delete repo; 38 | repo = NULL; 39 | if(queryGraph != NULL) 40 | delete queryGraph; 41 | queryGraph = NULL; 42 | if(planGen != NULL) 43 | delete planGen; 44 | planGen = NULL; 45 | if(semAnalysis != NULL) 46 | delete semAnalysis; 47 | semAnalysis = NULL; 48 | } 49 | 50 | Status RDFQuery::Execute(string& queryString, vector& resultSet) 51 | { 52 | struct timeval start, end; 53 | 54 | gettimeofday(&start,NULL); 55 | 56 | SPARQLLexer *lexer = new SPARQLLexer(queryString); 57 | SPARQLParser *parser = new SPARQLParser(*lexer); 58 | try { 59 | parser->parse(); 60 | } catch (const SPARQLParser::ParserException& e) { 61 | cerr << "parse error: " << e.message << endl; 62 | return ERROR; 63 | } 64 | 65 | queryGraph->Clear(); 66 | 67 | if(!this->semAnalysis->transform(*parser,*queryGraph)) { 68 | return NOT_FOUND; 69 | } 70 | 71 | if(queryGraph->knownEmpty() == true) { 72 | cerr<<"Empty result"<generatePlan(*queryGraph); 79 | //cout<<"-----------------------------After Generate Plan-------------------------------"<releaseBuffer(); 83 | bitmapQuery->query(queryGraph, resultSet); 84 | gettimeofday(&end,NULL); 85 | cerr<<" time elapsed: "<<((end.tv_sec - start.tv_sec) * 1000000 + end.tv_usec - start.tv_usec ) / 1000000.0<<" s"<Clear(); 88 | delete lexer; 89 | delete parser; 90 | return OK; 91 | } 92 | 93 | void RDFQuery::Print() 94 | { 95 | TripleBitQueryGraph::SubQuery& query = queryGraph->getQuery(); 96 | unsigned int i, size, j; 97 | size = query.tripleNodes.size(); 98 | cout<<"join triple size: "<& triples = query.tripleNodes; 101 | for ( i = 0; i < size; i++) 102 | { 103 | cout<& variables = query.joinVariables; 113 | 114 | for( i = 0 ; i < size; i++) 115 | { 116 | cout<& nodes = query.joinVariableNodes; 120 | 121 | size = nodes.size(); 122 | cout<<"Join variable nodes size: "<& edge = query.joinVariableEdges; 137 | for( i = 0; i < size; i++) 138 | { 139 | cout<& projection = queryGraph->getProjection(); 157 | 158 | cout<<"variables need to project: "< 17 | #include 18 | #include 19 | #include 20 | //--------------------------------------------------------------------------- 21 | /// Parse a turtle file 22 | class TurtleParser 23 | { 24 | public: 25 | /// A parse error 26 | class Exception {}; 27 | 28 | private: 29 | /// A turtle lexer 30 | class Lexer { 31 | public: 32 | /// Possible tokens 33 | enum Token { Eof, Dot, Colon, Comma, Semicolon, LBracket, RBracket, LParen, RParen, At, Type, Integer, Decimal, Double, Name, A, True, False, String, URI }; 34 | 35 | private: 36 | /// The input 37 | std::istream& in; 38 | /// The putback 39 | Token putBack; 40 | /// The putback string 41 | std::string putBackValue; 42 | /// Buffer for parsing when ignoring the value 43 | std::string ignored; 44 | /// The current line 45 | unsigned line; 46 | 47 | /// Size of the read buffer 48 | static const unsigned readBufferSize = 1024; 49 | /// Read buffer 50 | char readBuffer[readBufferSize]; 51 | /// Read buffer pointers 52 | char* readBufferStart,*readBufferEnd; 53 | 54 | /// Read new characters 55 | bool doRead(char& c); 56 | /// Read a character 57 | bool read(char& c) { if (readBufferStart=Integer) putBackValue=s; } 86 | /// Put a token back 87 | void ungetIgnored(Token t) { putBack=t; if (t>=Integer) putBackValue=ignored; } 88 | /// Get the line 89 | unsigned getLine() const { return line; } 90 | }; 91 | /// A triple 92 | struct Triple { 93 | /// The entries 94 | std::string subject,predicate,object; 95 | 96 | /// Constructor 97 | Triple(const std::string& subject,const std::string& predicate,const std::string& object) : subject(subject),predicate(predicate),object(object) {} 98 | }; 99 | 100 | /// The lexer 101 | Lexer lexer; 102 | /// The uri base 103 | std::string base; 104 | /// All known prefixes 105 | std::map prefixes; 106 | /// The currently available triples 107 | std::vector triples; 108 | /// Reader in the triples 109 | unsigned triplesReader; 110 | /// The next blank node id 111 | unsigned nextBlank; 112 | 113 | /// Is a (generalized) name token? 114 | static inline bool isName(Lexer::Token token); 115 | 116 | /// Construct a new blank node 117 | void newBlankNode(std::string& node); 118 | /// Report an error 119 | void parseError(const std::string& message); 120 | /// Parse a qualified name 121 | void parseQualifiedName(const std::string& prefix,std::string& name); 122 | /// Parse a blank entry 123 | void parseBlank(std::string& entry); 124 | /// Parse a subject 125 | void parseSubject(Lexer::Token token,std::string& subject); 126 | /// Parse an object 127 | void parseObject(std::string& object); 128 | /// Parse a predicate object list 129 | void parsePredicateObjectList(const std::string& subject,std::string& predicate,std::string& object); 130 | /// Parse a directive 131 | void parseDirective(); 132 | /// Parse a new triple 133 | void parseTriple(Lexer::Token token,std::string& subject,std::string& predicate,std::string& object); 134 | 135 | public: 136 | /// Constructor 137 | TurtleParser(std::istream& in); 138 | /// Destructor 139 | ~TurtleParser(); 140 | 141 | /// Read the next triple 142 | bool parse(std::string& subject,std::string& predicate,std::string& object); 143 | }; 144 | //--------------------------------------------------------------------------- 145 | #endif 146 | -------------------------------------------------------------------------------- /TripleBit/SPARQLParser.h: -------------------------------------------------------------------------------- 1 | #ifndef SPARQLParser_H_ 2 | #define SPARQLParser_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include 17 | #include 18 | #include 19 | //--------------------------------------------------------------------------- 20 | class SPARQLLexer; 21 | //--------------------------------------------------------------------------- 22 | /// A parser for SPARQL input 23 | class SPARQLParser 24 | { 25 | public: 26 | /// A parsing exception 27 | struct ParserException { 28 | /// The message 29 | std::string message; 30 | 31 | /// Constructor 32 | ParserException(const std::string& message); 33 | /// Constructor 34 | ParserException(const char* message); 35 | /// Destructor 36 | ~ParserException(); 37 | }; 38 | /// An element in a graph pattern 39 | struct Element { 40 | /// Possible types 41 | enum Type { Variable, String, IRI }; 42 | /// The type 43 | Type type; 44 | /// The string value 45 | std::string value; 46 | /// The id for variables 47 | unsigned id; 48 | }; 49 | 50 | /// A graph pattern 51 | struct Pattern { 52 | /// The entires 53 | Element subject,predicate,object; 54 | 55 | /// Constructor 56 | Pattern(Element subject,Element predicate,Element object); 57 | /// Destructor 58 | ~Pattern(); 59 | }; 60 | /// A filter condition 61 | struct Filter { 62 | /// Possible types 63 | enum Type { Normal, Exclude, Path }; 64 | 65 | /// The filtered variable 66 | unsigned id; 67 | /// Valid entries 68 | std::vector values; 69 | /// The type 70 | Type type; 71 | }; 72 | /// A group of patterns 73 | struct PatternGroup { 74 | /// The patterns 75 | std::vector patterns; 76 | /// The filter conditions 77 | std::vector filters; 78 | /// The optional parts 79 | std::vector optional; 80 | /// The union parts 81 | std::vector > unions; 82 | }; 83 | /// The projection modifier 84 | enum ProjectionModifier { Modifier_None, Modifier_Distinct, Modifier_Reduced, Modifier_Count, Modifier_Duplicates }; 85 | 86 | private: 87 | /// The lexer 88 | SPARQLLexer& lexer; 89 | /// The registered prefixes 90 | std::map prefixes; 91 | /// The named variables 92 | std::map namedVariables; 93 | /// The total variable count 94 | unsigned variableCount; 95 | 96 | /// The projection modifier 97 | ProjectionModifier projectionModifier; 98 | /// The projection clause 99 | std::vector projection; 100 | /// The pattern 101 | PatternGroup patterns; 102 | /// The result limit 103 | unsigned limit; 104 | 105 | /// Lookup or create a named variable 106 | unsigned nameVariable(const std::string& name); 107 | 108 | /// Parse a filter condition 109 | void parseFilter(PatternGroup& group,std::map& localVars); 110 | /// Parse an entry in a pattern 111 | Element parsePatternElement(PatternGroup& group,std::map& localVars); 112 | /// Parse blank node patterns 113 | Element parseBlankNode(PatternGroup& group,std::map& localVars); 114 | // Parse a graph pattern 115 | void parseGraphPattern(PatternGroup& group); 116 | // Parse a group of patterns 117 | void parseGroupGraphPattern(PatternGroup& group); 118 | 119 | /// Parse the prefix part if any 120 | void parsePrefix(); 121 | /// Parse the projection 122 | void parseProjection(); 123 | /// Parse the from part if any 124 | void parseFrom(); 125 | /// Parse the where part if any 126 | void parseWhere(); 127 | /// Parse the limit part if any 128 | void parseLimit(); 129 | 130 | public: 131 | /// Constructor 132 | explicit SPARQLParser(SPARQLLexer& lexer); 133 | /// Destructor 134 | ~SPARQLParser(); 135 | 136 | /// Parse the input. Throws an exception in the case of an error 137 | void parse(); 138 | 139 | /// Get the patterns 140 | const PatternGroup& getPatterns() const { return patterns; } 141 | 142 | /// Iterator over the projection clause 143 | typedef std::vector::const_iterator projection_iterator; 144 | /// Iterator over the projection 145 | projection_iterator projectionBegin() const { return projection.begin(); } 146 | /// Iterator over the projection 147 | projection_iterator projectionEnd() const { return projection.end(); } 148 | 149 | /// The projection modifier 150 | ProjectionModifier getProjectionModifier() const { return projectionModifier; } 151 | /// The size limit 152 | unsigned int getLimit() const { return limit; } 153 | }; 154 | //--------------------------------------------------------------------------- 155 | #endif 156 | -------------------------------------------------------------------------------- /TripleBit/MMapBuffer.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include "MMapBuffer.h" 23 | 24 | MMapBuffer::MMapBuffer(const char* _filename, size_t initSize) : filename(_filename) { 25 | // TODO Auto-generated constructor stub 26 | fd = open(filename.c_str(), O_CREAT | O_RDWR, 0666); 27 | if(fd < 0) { 28 | perror(_filename); 29 | MessageEngine::showMessage("Create map file error", MessageEngine::ERROR); 30 | } 31 | 32 | bool createNew = false; 33 | 34 | size = lseek(fd, 0, SEEK_END); 35 | if(size < initSize) { 36 | size = initSize; 37 | if(ftruncate(fd, initSize) != 0) { 38 | perror(_filename); 39 | MessageEngine::showMessage("ftruncate file error", MessageEngine::ERROR); 40 | } 41 | createNew = true; 42 | } 43 | if(lseek(fd, 0, SEEK_SET) != 0) { 44 | perror(_filename); 45 | MessageEngine::showMessage("lseek file error", MessageEngine::ERROR); 46 | } 47 | 48 | mmap_addr = (char volatile*)mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 49 | if(mmap_addr == MAP_FAILED) { 50 | perror(_filename); 51 | cout<<"size: "<size; i++) { 173 | ch = buffer->mmap_addr[i]; 174 | } 175 | return buffer; 176 | } 177 | -------------------------------------------------------------------------------- /TripleBit/URITable.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "URITable.h" 14 | #include "StringIDSegment.h" 15 | #include 16 | 17 | ID URITable::startID = 1; 18 | 19 | ID URITable::getMaxID() 20 | { 21 | return startID; 22 | } 23 | 24 | URITable::URITable() { 25 | SINGLE.assign("single"); 26 | prefix_segment = NULL; 27 | suffix_segment = NULL; 28 | } 29 | 30 | URITable::URITable(const string dir) : SINGLE("single") { 31 | // TODO Auto-generated constructor stub 32 | prefix_segment = StringIDSegment::create(dir, "/uri_prefix"); 33 | suffix_segment = StringIDSegment::create(dir, "/uri_suffix"); 34 | 35 | prefix_segment->addStringToSegment(SINGLE); 36 | } 37 | 38 | URITable::~URITable() { 39 | // TODO Auto-generated destructor stub 40 | #ifdef DEBUG 41 | cout<<"destroy URITable"<findIdByString(id, &searchLen) == false) 62 | return URI_NOT_FOUND; 63 | } else { 64 | char temp[10]; 65 | ID prefixId; 66 | if (prefix_segment->findIdByString(prefixId, &prefix) == false) { 67 | return URI_NOT_FOUND; 68 | } else { 69 | sprintf(temp, "%d", prefixId); 70 | searchStr.assign(suffix.str, suffix.length); 71 | for (size_t i = 0; i < strlen(temp); i++) { 72 | #ifdef USE_C_STRING 73 | searchStr.insert(searchStr.begin() + i, temp[i] - '0' + 1); 74 | #else 75 | searchStr.insert(searchStr.begin() + i, temp[i] - '0'); 76 | #endif 77 | } 78 | 79 | searchLen.str = searchStr.c_str(); 80 | searchLen.length = searchStr.length(); 81 | if (suffix_segment->findIdByString(id, &searchLen) == false) 82 | return URI_NOT_FOUND; 83 | } 84 | } 85 | 86 | searchStr.clear(); 87 | return URI_FOUND; 88 | } 89 | 90 | Status URITable::getPrefix(const char* URI) 91 | { 92 | size_t size = strlen(URI); 93 | int i; 94 | for (i = size - 2; i >= 0; i--) { 95 | if (URI[i] == '/') 96 | break; 97 | } 98 | 99 | if (i == -1) { 100 | prefix.str = SINGLE.c_str(); 101 | prefix.length = SINGLE.length(); 102 | suffix.str = URI; 103 | suffix.length = size; 104 | } else { 105 | prefix.str = URI; 106 | prefix.length = i; 107 | suffix.str = URI + i + 1; 108 | suffix.length = size - i - 1; 109 | } 110 | 111 | return OK; 112 | } 113 | 114 | Status URITable::insertTable(const char* URI, ID& id) 115 | { 116 | getPrefix(URI); 117 | char temp[20]; 118 | ID prefixId; 119 | 120 | prefixId = 1; 121 | if(prefix_segment->findIdByString(prefixId, &prefix) == false) 122 | prefixId = prefix_segment->addStringToSegment(&prefix); 123 | sprintf(temp, "%d",prefixId); 124 | 125 | searchStr.assign(suffix.str, suffix.length); 126 | for(size_t i = 0; i < strlen(temp); i++) { 127 | #ifdef USE_C_STRING 128 | searchStr.insert(searchStr.begin() + i, temp[i] - '0' + 1);//suffix.insert(suffix.begin() + i, temp[i] - '0'); 129 | #else 130 | searchStr.insert(searchStr.begin() + i, temp[i] - '0'); 131 | #endif 132 | } 133 | 134 | searchLen.str = searchStr.c_str(); searchLen.length = searchStr.length(); 135 | id = suffix_segment->addStringToSegment(&searchLen); 136 | searchStr.clear(); 137 | return OK; 138 | } 139 | 140 | Status URITable::getURIById(string& URI, ID id) 141 | { 142 | URI.clear(); 143 | if (suffix_segment->findStringById(&suffix, id) == false) 144 | return URI_NOT_FOUND; 145 | char temp[10]; 146 | memset(temp, 0, 10); 147 | const char* ptr = suffix.str; 148 | 149 | int i; 150 | #ifdef USE_C_STRING 151 | for (i = 0; i < 10; i++) { 152 | if (ptr[i] > 10) 153 | break; 154 | temp[i] = (ptr[i] - 1) + '0'; 155 | } 156 | #else 157 | for(i = 0; i < 10; i++) { 158 | if(ptr[i] > 9) 159 | break; 160 | temp[i] = ptr[i] + '0'; 161 | } 162 | #endif 163 | 164 | ID prefixId = atoi(temp); 165 | if (prefixId == 1) 166 | URI.assign(suffix.str + 1, suffix.length - 1); 167 | else { 168 | if (prefix_segment->findStringById(&prefix, prefixId) == false) 169 | return URI_NOT_FOUND; 170 | URI.assign(prefix.str, prefix.length); 171 | URI.append("/"); 172 | URI.append(suffix.str + i, suffix.length - i); 173 | } 174 | 175 | return OK; 176 | } 177 | 178 | URITable* URITable::load(const string dir) 179 | { 180 | URITable* uriTable = new URITable(); 181 | uriTable->prefix_segment = StringIDSegment::load(dir, "/uri_prefix"); 182 | uriTable->suffix_segment = StringIDSegment::load(dir, "/uri_suffix"); 183 | return uriTable; 184 | } 185 | 186 | void URITable::dump() 187 | { 188 | prefix_segment->dump(); 189 | suffix_segment->dump(); 190 | } 191 | -------------------------------------------------------------------------------- /TripleBit/StatisticsBuffer.h: -------------------------------------------------------------------------------- 1 | #ifndef STATISTICSBUFFER_H_ 2 | #define STATISTICSBUFFER_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class HashIndex; 17 | class EntityIDBuffer; 18 | class MMapBuffer; 19 | 20 | #include "TripleBit.h" 21 | 22 | class StatisticsBuffer { 23 | public: 24 | enum StatisticsType { SUBJECT_STATIS, OBJECT_STATIS, SUBJECTPREDICATE_STATIS, OBJECTPREDICATE_STATIS }; 25 | StatisticsBuffer(); 26 | virtual ~StatisticsBuffer(); 27 | /// add a statistics record; 28 | virtual Status addStatis(unsigned v1, unsigned v2, unsigned v3) = 0; 29 | /// get a statistics record; 30 | virtual Status getStatis(unsigned& v1, unsigned v2) = 0; 31 | /// save the statistics record to file; 32 | //virtual Status save(ofstream& file) = 0; 33 | /// load the statistics record from file; 34 | //virtual StatisticsBuffer* load(ifstream& file) = 0; 35 | virtual void flush() = 0; 36 | protected: 37 | const unsigned HEADSPACE; 38 | }; 39 | 40 | class OneConstantStatisticsBuffer : public StatisticsBuffer { 41 | public: 42 | struct Triple { 43 | ID value1; 44 | unsigned count; 45 | }; 46 | 47 | private: 48 | StatisticsType type; 49 | MMapBuffer* buffer; 50 | const unsigned char* reader; 51 | unsigned char* writer; 52 | 53 | /// index for query; 54 | vector index; 55 | unsigned indexSize; 56 | unsigned nextHashValue; 57 | unsigned lastId; 58 | unsigned usedSpace; 59 | 60 | const unsigned ID_HASH; 61 | 62 | Triple* triples, *pos, *posLimit; 63 | bool first; 64 | public: 65 | OneConstantStatisticsBuffer(const string path, StatisticsType type); 66 | virtual ~OneConstantStatisticsBuffer(); 67 | Status addStatis(unsigned v1, unsigned v2, unsigned v3 = 0); 68 | Status getStatis(unsigned& v1, unsigned v2 = 0); 69 | Status save(MMapBuffer*& indexBuffer); 70 | static OneConstantStatisticsBuffer* load(StatisticsType type, const string path, char*& indexBuffer); 71 | /// get the subject or object ids from minID to maxID; 72 | Status getIDs(EntityIDBuffer* entBuffer, ID minID, ID maxID); 73 | Status getEntityIDs(EntityIDBuffer* entBuffer, ID minID, ID maxID); 74 | unsigned int getEntityCount(); 75 | void flush(); 76 | private: 77 | /// write a id to buffer; isID indicate the id really is a ID, maybe is a count. 78 | void writeId(unsigned id, char*& ptr, bool isID); 79 | /// read a id from buffer; 80 | const char* readId(unsigned& id, const char* ptr, bool isID); 81 | /// judge the buffer is full; 82 | bool isPtrFull(unsigned len); 83 | /// get the value length in bytes; 84 | unsigned getLen(unsigned v); 85 | 86 | const unsigned char* decode(const unsigned char* begin, const unsigned char* end); 87 | const unsigned char* decodeID(const unsigned char* begin, const unsigned char* end); 88 | Status decodeAndInsertID(const unsigned char* begin, const unsigned char* end, EntityIDBuffer *entBuffer); 89 | bool find(unsigned value); 90 | bool find_last(unsigned value); 91 | bool findInsertPoint1(unsigned minID); 92 | bool findInsertPoint2(unsigned maxID); 93 | }; 94 | 95 | class TwoConstantStatisticsBuffer : public StatisticsBuffer { 96 | public: 97 | struct Triple{ 98 | ID value1; 99 | ID value2; 100 | ID count; 101 | }; 102 | 103 | private: 104 | StatisticsType type; 105 | MMapBuffer* buffer; 106 | const unsigned char* reader; 107 | unsigned char* writer; 108 | 109 | Triple* index; 110 | 111 | unsigned lastId, lastPredicate; 112 | unsigned usedSpace; 113 | unsigned currentChunkNo; 114 | unsigned indexPos, indexSize; 115 | 116 | Triple triples[3 * 4096]; 117 | Triple* pos, *posLimit; 118 | bool first; 119 | public: 120 | TwoConstantStatisticsBuffer(const string path, StatisticsType type); 121 | virtual ~TwoConstantStatisticsBuffer(); 122 | /// add a statistics record; 123 | Status addStatis(unsigned v1, unsigned v2, unsigned v3); 124 | /// get a statistics record; 125 | Status getStatis(unsigned& v1, unsigned v2); 126 | /// get the buffer position by a id, used in query 127 | Status getPredicatesByID(unsigned id, EntityIDBuffer* buffer, ID minID, ID maxID); 128 | /// save the statistics buffer; 129 | Status save(MMapBuffer*& indexBuffer); 130 | /// load the statistics buffer; 131 | static TwoConstantStatisticsBuffer* load(StatisticsType type, const string path, char*& indxBuffer); 132 | void flush(); 133 | private: 134 | /// get the value length in bytes; 135 | unsigned getLen(unsigned v); 136 | /// decode a chunk 137 | const uchar* decode(const uchar* begin, const uchar* end); 138 | /// decode id and predicate in a chunk 139 | const uchar* decodeIdAndPredicate(const uchar* begin, const uchar* end); 140 | /// 141 | bool find(unsigned value1, unsigned value2); 142 | int findPredicate(unsigned,Triple*,Triple*); 143 | /// 144 | bool find_last(unsigned value1, unsigned value2); 145 | bool find(unsigned,Triple* &,Triple* &); 146 | bool findID(unsigned value1,Triple*&,Triple*&); 147 | const uchar* decode(const uchar* begin, const uchar* end,Triple*,Triple*& ,Triple*&); 148 | 149 | }; 150 | #endif /* STATISTICSBUFFER_H_ */ 151 | -------------------------------------------------------------------------------- /TripleBit/ThreadPool.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "ThreadPool.h" 14 | #include "TripleBit.h" 15 | 16 | #include 17 | #include 18 | 19 | using namespace std; 20 | 21 | void CTask::SetData(void * data) { 22 | m_ptrData = data; 23 | } 24 | 25 | 26 | ////////////////////////////////////////////////////////////////////// 27 | ////// class CThreadPool 28 | ////////////////////////////////////////////////////////////////////// 29 | CThreadPool* CThreadPool::instance = NULL; 30 | 31 | CThreadPool::CThreadPool(int threadNum) { 32 | this->m_iThreadNum = threadNum; 33 | pthread_mutex_init(&m_pthreadMutex,NULL); 34 | pthread_mutex_init(&m_pthreadIdleMutex,NULL); 35 | pthread_mutex_init(&m_pthreadBusyMutex,NULL); 36 | pthread_cond_init(&m_pthreadCond,NULL); 37 | pthread_cond_init(&m_pthreadEmpty,NULL); 38 | pthread_cond_init(&m_pthreadBusyEmpty,NULL); 39 | shutdown = false; 40 | Create(); 41 | } 42 | 43 | CThreadPool::~CThreadPool() 44 | { 45 | this->StopAll(); 46 | pthread_mutex_destroy(&m_pthreadMutex); 47 | pthread_mutex_destroy(&m_pthreadIdleMutex); 48 | pthread_mutex_destroy(&m_pthreadBusyMutex); 49 | pthread_cond_destroy(&m_pthreadCond); 50 | pthread_cond_destroy(&m_pthreadEmpty); 51 | pthread_cond_destroy(&m_pthreadBusyEmpty); 52 | } 53 | 54 | int CThreadPool::MoveToIdle(pthread_t tid) { 55 | pthread_mutex_lock(&m_pthreadBusyMutex); 56 | vector::iterator busyIter = m_vecBusyThread.begin(); 57 | while (busyIter != m_vecBusyThread.end()) { 58 | if (tid == *busyIter) { 59 | break; 60 | } 61 | busyIter++; 62 | } 63 | m_vecBusyThread.erase(busyIter); 64 | 65 | if ( m_vecBusyThread.size() == 0) { 66 | pthread_cond_broadcast(&m_pthreadBusyEmpty); 67 | } 68 | 69 | pthread_mutex_unlock(&m_pthreadBusyMutex); 70 | 71 | pthread_mutex_lock(&m_pthreadIdleMutex); 72 | m_vecIdleThread.push_back(tid); 73 | pthread_mutex_unlock(&m_pthreadIdleMutex); 74 | return 0; 75 | } 76 | 77 | int CThreadPool::MoveToBusy(pthread_t tid) { 78 | pthread_mutex_lock(&m_pthreadIdleMutex); 79 | vector::iterator idleIter = m_vecIdleThread.begin(); 80 | while (idleIter != m_vecIdleThread.end()) { 81 | if (tid == *idleIter) { 82 | break; 83 | } 84 | idleIter++; 85 | } 86 | m_vecIdleThread.erase(idleIter); 87 | pthread_mutex_unlock(&m_pthreadIdleMutex); 88 | 89 | pthread_mutex_lock(&m_pthreadBusyMutex); 90 | m_vecBusyThread.push_back(tid); 91 | pthread_mutex_unlock(&m_pthreadBusyMutex); 92 | return 0; 93 | } 94 | 95 | void* CThreadPool::ThreadFunc(void * threadData) { 96 | pthread_t tid = pthread_self(); 97 | int rnt; 98 | ThreadPoolArg* arg = (ThreadPoolArg*)threadData; 99 | vector* taskList = arg->taskList; 100 | CThreadPool* pool = arg->pool; 101 | while (1) { 102 | rnt = pthread_mutex_lock(&pool->m_pthreadMutex); 103 | if ( rnt != 0){ 104 | cout<<"Get mutex error"<size() == 0 && pool->shutdown == false){ 108 | pthread_cond_wait(&pool->m_pthreadCond, &pool->m_pthreadMutex); 109 | } 110 | 111 | if ( pool->shutdown == true){ 112 | pthread_mutex_unlock(&pool->m_pthreadMutex); 113 | pthread_exit(NULL); 114 | } 115 | 116 | pool->MoveToBusy(tid); 117 | Task task = Task(taskList->front()); 118 | taskList->erase(taskList->begin()); 119 | 120 | if ( taskList->size() == 0 ) { 121 | pthread_cond_broadcast(&pool->m_pthreadEmpty); 122 | } 123 | pthread_mutex_unlock(&pool->m_pthreadMutex); 124 | task(); 125 | pool->MoveToIdle(tid); 126 | } 127 | return (void*) 0; 128 | } 129 | 130 | int CThreadPool::AddTask(const Task& task) { 131 | pthread_mutex_lock(&m_pthreadMutex); 132 | this->m_vecTaskList.push_back(task); 133 | pthread_mutex_unlock(&m_pthreadMutex); 134 | pthread_cond_broadcast(&m_pthreadCond); 135 | return 0; 136 | } 137 | 138 | int CThreadPool::Create() { 139 | m_vecTaskList.clear(); 140 | struct ThreadPoolArg* arg = new ThreadPoolArg; 141 | pthread_mutex_lock(&m_pthreadIdleMutex); 142 | for (int i = 0; i < m_iThreadNum; i++) { 143 | pthread_t tid = 0; 144 | arg->pool = this; 145 | arg->taskList = &m_vecTaskList; 146 | pthread_create(&tid, NULL, ThreadFunc, arg); 147 | m_vecIdleThread.push_back(tid); 148 | } 149 | pthread_mutex_unlock(&m_pthreadIdleMutex); 150 | return 0; 151 | } 152 | 153 | int CThreadPool::StopAll() { 154 | shutdown = true; 155 | pthread_mutex_unlock(&m_pthreadMutex); 156 | pthread_cond_broadcast(&m_pthreadCond); 157 | vector::iterator iter = m_vecIdleThread.begin(); 158 | while (iter != m_vecIdleThread.end()) { 159 | pthread_join(*iter, NULL); 160 | iter++; 161 | } 162 | 163 | iter = m_vecBusyThread.begin(); 164 | while (iter != m_vecBusyThread.end()) { 165 | pthread_join(*iter, NULL); 166 | iter++; 167 | } 168 | 169 | return 0; 170 | } 171 | 172 | int CThreadPool::Wait() 173 | { 174 | int rnt; 175 | 176 | rnt = pthread_mutex_lock(&m_pthreadMutex); 177 | while( m_vecTaskList.size() != 0 ) { 178 | pthread_cond_wait(&m_pthreadEmpty, &m_pthreadMutex); 179 | } 180 | rnt = pthread_mutex_unlock(&m_pthreadMutex); 181 | pthread_mutex_lock(&m_pthreadBusyMutex); 182 | while( m_vecBusyThread.size() != 0) { 183 | pthread_cond_wait(&m_pthreadBusyEmpty, &m_pthreadBusyMutex); 184 | } 185 | pthread_mutex_unlock(&m_pthreadBusyMutex); 186 | return 0; 187 | } 188 | 189 | CThreadPool& CThreadPool::getInstance() 190 | { 191 | if(instance == NULL) { 192 | instance = new CThreadPool(THREAD_NUMBER); 193 | } 194 | return *instance; 195 | } 196 | -------------------------------------------------------------------------------- /TripleBit/MemoryBuffer.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "MemoryBuffer.h" 14 | 15 | unsigned MemoryBuffer::pagesize = 4096; //4KB 16 | 17 | MemoryBuffer::MemoryBuffer() 18 | { 19 | 20 | } 21 | 22 | MemoryBuffer::MemoryBuffer(unsigned _size) :size(_size) { 23 | // TODO Auto-generated constructor stub 24 | buffer = NULL; 25 | buffer = (char*)malloc(size * sizeof(char)); 26 | if(buffer == NULL) { 27 | MessageEngine::showMessage("MemoryBuffer::MemoryBuffer, malloc error!", MessageEngine::ERROR); 28 | } 29 | ::memset(buffer, 0, size); 30 | currentHead = buffer; 31 | } 32 | 33 | MemoryBuffer::~MemoryBuffer() { 34 | // TODO Auto-generated destructor stub 35 | //free the buffer 36 | if(buffer != NULL) 37 | free(buffer); 38 | buffer = NULL; 39 | size = 0; 40 | } 41 | 42 | char* MemoryBuffer::resize(unsigned increaseSize) 43 | { 44 | size_t newsize = size + increaseSize; 45 | buffer = (char*)realloc(buffer, newsize * sizeof(char)); 46 | if(buffer == NULL) { 47 | MessageEngine::showMessage("MemoryBuffer::addBuffer, realloc error!", MessageEngine::ERROR); 48 | } 49 | currentHead = buffer + size; 50 | ::memset(currentHead, 0, increaseSize); 51 | size = size + increaseSize; 52 | 53 | return currentHead; 54 | } 55 | 56 | Status MemoryBuffer::resize(unsigned increaseSize, bool zero) { 57 | size_t newsize = increaseSize; 58 | buffer = (char*)realloc(buffer, newsize * sizeof(char)); 59 | if(buffer == NULL) { 60 | MessageEngine::showMessage("MemoryBuffer::addBuffer, realloc error!", MessageEngine::ERROR); 61 | return ERROR; 62 | } 63 | currentHead = buffer + size; 64 | ::memset(currentHead, 0, increaseSize - size); 65 | size = increaseSize; 66 | 67 | return OK; 68 | } 69 | 70 | char* MemoryBuffer::getBuffer() 71 | { 72 | return buffer; 73 | } 74 | 75 | void MemoryBuffer::memset(char value) 76 | { 77 | ::memset(buffer, value, size); 78 | } 79 | 80 | char* MemoryBuffer::getBuffer(int pos) 81 | { 82 | return buffer + pos; 83 | } 84 | 85 | void MemoryBuffer::save(ofstream& ofile) 86 | { 87 | ofile<>size; 101 | ifile>>offset; 102 | 103 | ifile.get(); 104 | 105 | if( buffer != NULL) { 106 | free(buffer); 107 | buffer = NULL; 108 | } 109 | 110 | unsigned remainSize, writeSize, allocSize; 111 | remainSize = size; writeSize = 0; allocSize = 0; 112 | 113 | if(remainSize >= INIT_PAGE_COUNT * pagesize) { 114 | buffer = (char*)malloc(INIT_PAGE_COUNT * pagesize); 115 | writeSize = allocSize = INIT_PAGE_COUNT * pagesize; 116 | } else { 117 | buffer = (char*)malloc(remainSize); 118 | writeSize = allocSize = remainSize; 119 | } 120 | 121 | if(buffer == NULL) { 122 | MessageEngine::showMessage("MemoryBuffer::load, malloc error!", MessageEngine::ERROR); 123 | } 124 | 125 | unsigned i; 126 | currentHead = buffer; 127 | while(remainSize > 0) { 128 | for(i = 0; i < writeSize; i++) { 129 | //ifile>>currentHead[i]; 130 | currentHead[i] = ifile.get(); 131 | } 132 | 133 | remainSize = remainSize - writeSize; 134 | if(remainSize >= INIT_PAGE_COUNT * pagesize) { 135 | buffer = (char*)realloc(buffer, allocSize + INIT_PAGE_COUNT * pagesize); 136 | writeSize = INIT_PAGE_COUNT * pagesize; 137 | } else { 138 | buffer = (char*)realloc(buffer, allocSize + remainSize); 139 | writeSize = remainSize; 140 | } 141 | 142 | if(buffer == NULL) { 143 | MessageEngine::showMessage("MemoryBuffer::load, realloc error!", MessageEngine::ERROR); 144 | } 145 | currentHead = buffer + allocSize; 146 | allocSize = allocSize + writeSize; 147 | } 148 | 149 | currentHead = buffer + offset; 150 | } 151 | 152 | ///////////////////////////////////////////////////////////////////////////////////////// 153 | /////// class StatementReificationTable 154 | ///////////////////////////////////////////////////////////////////////////////////////// 155 | StatementReificationTable::StatementReificationTable() { 156 | // TODO Auto-generated constructor stub 157 | buffer = new MemoryBuffer(REIFICATION_INIT_PAGE_COUNT * MemoryBuffer::pagesize); 158 | currentBuffer = (ID*)buffer->getBuffer(); 159 | pos = 0; 160 | } 161 | 162 | StatementReificationTable::~StatementReificationTable() { 163 | // TODO Auto-generated destructor stub 164 | if(buffer != NULL) 165 | delete buffer; 166 | buffer = NULL; 167 | } 168 | 169 | Status StatementReificationTable::insertStatementReification(ID statement, ID column) 170 | { 171 | if(REIFICATION_INIT_PAGE_COUNT * getpagesize() / sizeof(ID) <= pos){ 172 | currentBuffer = (ID*)buffer->resize(REIFICATION_INCREMENT_PAGE_COUNT * MemoryBuffer::pagesize); 173 | pos = 0; 174 | } 175 | 176 | currentBuffer[pos] = statement; 177 | pos++; 178 | currentBuffer[pos] = column; 179 | pos++; 180 | return OK; 181 | } 182 | 183 | Status StatementReificationTable::getColumn(ID statement, ID& column) 184 | { 185 | const ID* reader, *limit; 186 | reader = (ID*)buffer->getBuffer(); limit = (ID*)(buffer->getBuffer() + buffer->getSize()); 187 | while( reader < limit ) { 188 | if(reader[0] == statement) { 189 | column = reader[1]; 190 | return OK; 191 | } 192 | reader++; 193 | } 194 | 195 | return REIFICATION_NOT_FOUND; 196 | } 197 | 198 | 199 | void StatementReificationTable::save(ofstream& ofile) 200 | { 201 | ofile<getBuffer()<<" "; 203 | buffer->save(ofile); 204 | } 205 | 206 | void StatementReificationTable::load(ifstream& ifile) 207 | { 208 | unsigned offset; 209 | ifile>>pos; 210 | ifile>>offset; 211 | buffer->load(ifile); 212 | 213 | currentBuffer = (ID*)buffer->getBuffer(offset); 214 | } 215 | -------------------------------------------------------------------------------- /TripleBit/PredicateTable.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "PredicateTable.h" 14 | #include "StringIDSegment.h" 15 | 16 | PredicateTable::PredicateTable(const string dir) : SINGLE("single"){ 17 | // TODO Auto-generated constructor stub 18 | prefix_segment = StringIDSegment::create(dir, "/predicate_prefix"); 19 | suffix_segment = StringIDSegment::create(dir, "/predicate_suffix"); 20 | 21 | prefix_segment->addStringToSegment(SINGLE); 22 | } 23 | 24 | PredicateTable::~PredicateTable() { 25 | // TODO Auto-generated destructor stub 26 | if(prefix_segment != NULL) 27 | delete prefix_segment; 28 | prefix_segment = NULL; 29 | 30 | if(suffix_segment != NULL) 31 | delete suffix_segment; 32 | suffix_segment = NULL; 33 | } 34 | 35 | 36 | Status PredicateTable::getPrefix(const char* URI) 37 | { 38 | size_t size = strlen(URI); 39 | int i; 40 | for(i = size - 2; i >= 0; i--) { 41 | if(URI[i] == '/') 42 | break; 43 | } 44 | 45 | if(i == -1) { 46 | prefix.str = SINGLE.c_str(); 47 | prefix.length = SINGLE.length(); 48 | suffix.str = URI; 49 | suffix.length = size; 50 | } else { 51 | //prefix.assign(URI.begin(), URI.begin() + size); 52 | //suffix.assign(URI.begin() + size + 1, URI.end()); 53 | prefix.str = URI; 54 | prefix.length = i; 55 | suffix.str = URI + i + 1; 56 | suffix.length = size - i - 1; 57 | } 58 | 59 | return OK; 60 | } 61 | 62 | Status PredicateTable::insertTable(const char* str, ID& id) 63 | { 64 | getPrefix(str); 65 | char temp[20]; 66 | ID prefixId; 67 | 68 | if(prefix_segment->findIdByString(prefixId, &prefix) == false) 69 | prefixId = prefix_segment->addStringToSegment(&prefix); 70 | sprintf(temp, "%d",prefixId); 71 | 72 | searchStr.assign(suffix.str, suffix.length); 73 | for(size_t i = 0; i < strlen(temp); i++) { 74 | #ifdef USE_C_STRING 75 | searchStr.insert(searchStr.begin() + i, temp[i] - '0' + 1);//suffix.insert(suffix.begin() + i, temp[i] - '0'); 76 | #else 77 | searchStr.insert(searchStr.begin() + i, temp[i] - '0'); 78 | #endif 79 | } 80 | 81 | searchLen.str = searchStr.c_str(); searchLen.length = searchStr.length(); 82 | id = suffix_segment->addStringToSegment(&searchLen); 83 | searchStr.clear(); 84 | return OK; 85 | } 86 | 87 | Status PredicateTable::getPredicateByID(string& URI, ID id) 88 | { 89 | URI.clear(); 90 | if (suffix_segment->findStringById(&suffix, id) == false) 91 | return URI_NOT_FOUND; 92 | char temp[10]; 93 | memset(temp, 0, 10); 94 | const char* ptr = suffix.str; 95 | 96 | int i; 97 | #ifdef USE_C_STRING 98 | for (i = 0; i < 10; i++) { 99 | if (ptr[i] > 10) 100 | break; 101 | temp[i] = (ptr[i] - 1) + '0'; 102 | } 103 | #else 104 | for(i = 0; i < 10; i++) { 105 | if(ptr[i] > 9) 106 | break; 107 | temp[i] = ptr[i] + '0'; 108 | } 109 | #endif 110 | 111 | ID prefixId = atoi(temp); 112 | if (prefixId == 1) 113 | URI.assign(suffix.str + 1, suffix.length - 1); 114 | else { 115 | if (prefix_segment->findStringById(&prefix, prefixId) == false) 116 | return URI_NOT_FOUND; 117 | URI.assign(prefix.str, prefix.length); 118 | URI.append("/"); 119 | URI.append(suffix.str + i, suffix.length - i); 120 | } 121 | 122 | return OK; 123 | } 124 | 125 | string PredicateTable::getPredicateByID(ID id) 126 | { 127 | searchStr.clear(); 128 | if(suffix_segment->findStringById(&suffix, id) == false) 129 | return searchStr; 130 | char temp[10]; 131 | memset(temp, 0, 10); 132 | const char* ptr = suffix.str; 133 | 134 | int i; 135 | #ifdef USE_C_STRING 136 | for(i = 0; i < 10; i++) { 137 | if(ptr[i] > 10) 138 | break; 139 | temp[i] = (ptr[i] - 1)+ '0'; 140 | } 141 | #else 142 | for(i = 0; i < 10; i++) { 143 | if(ptr[i] > 9) 144 | break; 145 | temp[i] = ptr[i] + '0'; 146 | } 147 | #endif 148 | 149 | ID prefixId = atoi(temp); 150 | if(prefixId == 1) 151 | searchStr.assign(suffix.str + 1, suffix.length - 1); 152 | else { 153 | if(prefix_segment->findStringById(&prefix, prefixId) == false) 154 | return string(""); 155 | searchStr.assign(prefix.str,prefix.length); 156 | searchStr.append("/"); 157 | searchStr.append(suffix.str + i, suffix.length - i); 158 | } 159 | 160 | return searchStr; 161 | } 162 | 163 | Status PredicateTable::getIDByPredicate(const char* str,ID& id) 164 | { 165 | getPrefix(str); 166 | if (prefix.equals(SINGLE.c_str())) { 167 | searchStr.clear(); 168 | searchStr.insert(searchStr.begin(), 1); 169 | searchStr.append(suffix.str, suffix.length); 170 | searchLen.str = searchStr.c_str(); searchLen.length = searchStr.length(); 171 | if (suffix_segment->findIdByString(id, &searchLen) == false) 172 | return PREDICATE_NOT_BE_FINDED; 173 | } else { 174 | char temp[10]; 175 | ID prefixId; 176 | if (prefix_segment->findIdByString(prefixId, &prefix) == false) { 177 | return PREDICATE_NOT_BE_FINDED; 178 | } else { 179 | sprintf(temp, "%d", prefixId); 180 | searchStr.assign(suffix.str, suffix.length); 181 | for (size_t i = 0; i < strlen(temp); i++) { 182 | #ifdef USE_C_STRING 183 | searchStr.insert(searchStr.begin() + i, temp[i] - '0' + 1); 184 | #else 185 | searchStr.insert(searchStr.begin() + i, temp[i] - '0'); 186 | #endif 187 | } 188 | 189 | //cout<findIdByString(id, &searchLen) == false) 192 | return PREDICATE_NOT_BE_FINDED; 193 | } 194 | } 195 | 196 | searchStr.clear(); 197 | return OK; 198 | } 199 | 200 | size_t PredicateTable::getPredicateNo() 201 | { 202 | return suffix_segment->idStroffPool->size(); 203 | } 204 | 205 | PredicateTable* PredicateTable::load(const string dir) 206 | { 207 | PredicateTable* table = new PredicateTable(); 208 | table->prefix_segment = StringIDSegment::load(dir, "/predicate_prefix"); 209 | table->suffix_segment = StringIDSegment::load(dir, "/predicate_suffix"); 210 | 211 | return table; 212 | } 213 | 214 | void PredicateTable::dump() 215 | { 216 | prefix_segment->dump(); 217 | suffix_segment->dump(); 218 | } 219 | -------------------------------------------------------------------------------- /TripleBit/SPARQLLexer.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "SPARQLLexer.h" 14 | 15 | SPARQLLexer::SPARQLLexer(const std::string& input) 16 | : input(input),pos(this->input.begin()),tokenStart(pos),tokenEnd(pos), 17 | putBack(None),hasTokenEnd(false) 18 | // Constructor 19 | { 20 | } 21 | //--------------------------------------------------------------------------- 22 | SPARQLLexer::~SPARQLLexer() 23 | // Destructor 24 | { 25 | } 26 | //--------------------------------------------------------------------------- 27 | SPARQLLexer::Token SPARQLLexer::getNext() 28 | // Get the next token 29 | { 30 | // Do we have a token already? 31 | if (putBack!=None) { 32 | Token result=putBack; 33 | putBack=None; 34 | return result; 35 | } 36 | 37 | // Reset the token end 38 | hasTokenEnd=false; 39 | 40 | // Read the string 41 | while (pos!=input.end()) { 42 | tokenStart=pos; 43 | // Interpret the first character 44 | switch (*(pos++)) { 45 | // Whitespace 46 | case ' ': case '\t': case '\n': case '\r': case '\f': continue; 47 | // Single line comment 48 | case '#': 49 | while (pos!=input.end()) { 50 | if (((*pos)=='\n')||((*pos)=='\r')) 51 | break; 52 | ++pos; 53 | } 54 | if (pos!=input.end()) ++pos; 55 | continue; 56 | // Simple tokens 57 | case ':': return Colon; 58 | case ';': return Semicolon; 59 | case ',': return Comma; 60 | case '.': return Dot; 61 | case '*': return Star; 62 | case '_': return Underscore; 63 | case '{': return LCurly; 64 | case '}': return RCurly; 65 | case '(': return LParen; 66 | case ')': return RParen; 67 | case '=': return Equal; 68 | // Not equal 69 | case '!': 70 | if ((pos==input.end())||((*pos)!='=')) 71 | return Error; 72 | ++pos; 73 | return NotEqual; 74 | // Brackets 75 | case '[': 76 | // Skip whitespaces 77 | while (pos!=input.end()) { 78 | switch (*pos) { 79 | case ' ': case '\t': case '\n': case '\r': case '\f': ++pos; continue; 80 | } 81 | break; 82 | } 83 | // Check for a closing ] 84 | if ((pos!=input.end())&&((*pos)==']')) { 85 | ++pos; 86 | return Anon; 87 | } 88 | return LBracket; 89 | case ']': return RBracket; 90 | // IRI Ref 91 | case '<': 92 | tokenStart=pos; 93 | while (pos!=input.end()) { 94 | if ((*pos)=='>') 95 | break; 96 | ++pos; 97 | } 98 | tokenEnd=pos; hasTokenEnd=true; 99 | if (pos!=input.end()) ++pos; 100 | return IRI; 101 | // String 102 | case '\'': 103 | tokenStart=pos; 104 | while (pos!=input.end()) { 105 | if ((*pos)=='\\') { 106 | ++pos; 107 | if (pos!=input.end()) ++pos; 108 | continue; 109 | } 110 | if ((*pos)=='\'') 111 | break; 112 | ++pos; 113 | } 114 | tokenEnd=pos; hasTokenEnd=true; 115 | if (pos!=input.end()) ++pos; 116 | return String; 117 | // String 118 | case '\"': 119 | tokenStart=pos; 120 | while (pos!=input.end()) { 121 | if ((*pos)=='\\') { 122 | ++pos; 123 | if (pos!=input.end()) ++pos; 124 | continue; 125 | } 126 | if ((*pos)=='\"') 127 | break; 128 | ++pos; 129 | } 130 | tokenEnd=pos; hasTokenEnd=true; 131 | if (pos!=input.end()) ++pos; 132 | return String; 133 | // Variables 134 | case '?': case '$': 135 | tokenStart=pos; 136 | while (pos!=input.end()) { 137 | char c=*pos; 138 | if (((c>='0')&&(c<='9'))||((c>='A')&&(c<='Z'))||((c>='a')&&(c<='z'))) { 139 | ++pos; 140 | } else break; 141 | } 142 | tokenEnd=pos; hasTokenEnd=true; 143 | return Variable; 144 | // Identifier 145 | default: 146 | --pos; 147 | while (pos!=input.end()) { 148 | char c=*pos; 149 | if (((c>='0')&&(c<='9'))||((c>='A')&&(c<='Z'))||((c>='a')&&(c<='z')) || ( c == '_') || ( c == '-') || (c == '.')) { 150 | ++pos; 151 | } else break; 152 | } 153 | if (pos==tokenStart) 154 | return Error; 155 | return Identifier; 156 | } 157 | } 158 | return Eof; 159 | } 160 | //--------------------------------------------------------------------------- 161 | std::string SPARQLLexer::getTokenValue() const 162 | // Get the value of the current token 163 | { 164 | if (hasTokenEnd) 165 | return std::string(tokenStart,tokenEnd); 166 | else 167 | return std::string(tokenStart,pos); 168 | } 169 | //--------------------------------------------------------------------------- 170 | bool SPARQLLexer::isKeyword(const char* keyword) const 171 | // Check if the current token matches a keyword 172 | { 173 | std::string::const_iterator iter=tokenStart,limit=hasTokenEnd?tokenEnd:pos; 174 | 175 | while (iter!=limit) { 176 | char c=*iter; 177 | if ((c>='A')&&(c<='Z')) c+='a'-'A'; 178 | if (c!=(*keyword)) 179 | return false; 180 | if (!*keyword) return false; 181 | ++iter; ++keyword; 182 | } 183 | return !*keyword; 184 | } 185 | //--------------------------------------------------------------------------- 186 | -------------------------------------------------------------------------------- /TripleBit/Sorter.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "Sorter.h" 14 | #include "TempFile.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | using namespace std; 20 | //--------------------------------------------------------------------------- 21 | /// Maximum amount of usable memory. XXX detect at runtime! 22 | static const unsigned memoryLimit = sizeof(void*) * (1 << 27); 23 | //--------------------------------------------------------------------------- 24 | namespace { 25 | //--------------------------------------------------------------------------- 26 | /// A memory range 27 | struct Range { 28 | const char* from, *to; 29 | 30 | /// Constructor 31 | Range(const char* from, const char* to) : 32 | from(from), to(to) { 33 | } 34 | 35 | /// Some content? 36 | bool equals(const Range& o) { 37 | return ((to - from) == (o.to - o.from)) && (memcmp(from, o.from, to - from) == 0); 38 | } 39 | }; 40 | //--------------------------------------------------------------------------- 41 | /// Sort wrapper that colls the comparison function 42 | struct CompareSorter { 43 | /// Comparison function 44 | typedef int (*func)(const char*, const char*); 45 | 46 | /// Comparison function 47 | const func compare; 48 | 49 | /// Constructor 50 | CompareSorter(func compare) : 51 | compare(compare) { 52 | } 53 | 54 | /// Compare two entries 55 | bool operator()(const Range& a, const Range& b) const { 56 | return compare(a.from, b.from) < 0; 57 | } 58 | }; 59 | //--------------------------------------------------------------------------- 60 | static char* spool(char* ofs, TempFile& out, const vector& items, bool eliminateDuplicates) 61 | // Spool items to disk 62 | { 63 | Range last(0, 0); 64 | for (vector::const_iterator iter = items.begin(), limit = items.end(); iter != limit; ++iter) { 65 | if ((!eliminateDuplicates) || (!last.equals(*iter))) { 66 | last = *iter; 67 | out.write(last.to - last.from, last.from); 68 | ofs += last.to - last.from; 69 | } 70 | } 71 | return ofs; 72 | } 73 | //--------------------------------------------------------------------------- 74 | } 75 | //--------------------------------------------------------------------------- 76 | void Sorter::sort(TempFile& in, TempFile& out, const char* (*skip)(const char*), int(*compare)(const char*, const char*), bool eliminateDuplicates) 77 | // Sort a temporary file 78 | { 79 | // Open the input 80 | in.close(); 81 | MemoryMappedFile mappedIn; 82 | assert(mappedIn.open(in.getFile().c_str())); 83 | const char* reader = mappedIn.getBegin(), *limit = mappedIn.getEnd(); 84 | 85 | // Produce runs 86 | vector runs; 87 | TempFile intermediate(out.getBaseFile()); 88 | char* ofs = 0; 89 | while (reader < limit) { 90 | // Collect items 91 | vector items; 92 | const char* maxReader = reader + memoryLimit; 93 | while (reader < limit) { 94 | const char* start = reader; 95 | reader = skip(reader); 96 | items.push_back(Range(start, reader)); 97 | 98 | // Memory Overflow? 99 | if ((reader + (sizeof(Range) * items.size())) > maxReader) 100 | break; 101 | } 102 | 103 | // Sort the run 104 | std::sort(items.begin(), items.end(), CompareSorter(compare)); 105 | 106 | // Did everything fit? 107 | if ((reader == limit) && (runs.empty())) { 108 | spool(0, out, items, eliminateDuplicates); 109 | break; 110 | } 111 | 112 | // No, spool to intermediate file 113 | char* newOfs = spool(ofs, intermediate, items, eliminateDuplicates); 114 | runs.push_back(Range(ofs, newOfs)); 115 | ofs = newOfs; 116 | } 117 | intermediate.close(); 118 | mappedIn.close(); 119 | 120 | // Do we habe to merge runs? 121 | if (!runs.empty()) { 122 | // Map the ranges 123 | MemoryMappedFile tempIn; 124 | assert(tempIn.open(intermediate.getFile().c_str())); 125 | for (vector::iterator iter = runs.begin(), limit = runs.end(); iter != limit; ++iter) { 126 | (*iter).from = tempIn.getBegin() + ((*iter).from - static_cast (0)); 127 | (*iter).to = tempIn.getBegin() + ((*iter).to - static_cast (0)); 128 | } 129 | 130 | // Sort the run heads 131 | std::sort(runs.begin(), runs.end(), CompareSorter(compare)); 132 | 133 | // And merge them 134 | Range last(0, 0); 135 | while (!runs.empty()) { 136 | // Write the first entry if no duplicate 137 | Range head(runs.front().from, skip(runs.front().from)); 138 | if ((!eliminateDuplicates) || (!last.equals(head))) 139 | out.write(head.to - head.from, head.from); 140 | last = head; 141 | 142 | // Update the first entry. First entry done? 143 | if ((runs.front().from = head.to) == runs.front().to) { 144 | runs[0] = runs[runs.size() - 1]; 145 | runs.pop_back(); 146 | } 147 | 148 | // Check the heap condition 149 | unsigned pos = 0, size = runs.size(); 150 | while (pos < size) { 151 | unsigned left = 2 * pos + 1, right = left + 1; 152 | if (left >= size) 153 | break; 154 | if (right < size) { 155 | if (compare(runs[pos].from, runs[left].from) > 0) { 156 | if (compare(runs[pos].from, runs[right].from) > 0) { 157 | if (compare(runs[left].from, runs[right].from) < 0) { 158 | std::swap(runs[pos], runs[left]); 159 | pos = left; 160 | } else { 161 | std::swap(runs[pos], runs[right]); 162 | pos = right; 163 | } 164 | } else { 165 | std::swap(runs[pos], runs[left]); 166 | pos = left; 167 | } 168 | } else if (compare(runs[pos].from, runs[right].from) > 0) { 169 | std::swap(runs[pos], runs[right]); 170 | pos = right; 171 | } else 172 | break; 173 | } else { 174 | if (compare(runs[pos].from, runs[left].from) > 0) { 175 | std::swap(runs[pos], runs[left]); 176 | pos = left; 177 | } else 178 | break; 179 | } 180 | } 181 | } 182 | tempIn.close(); 183 | } 184 | 185 | intermediate.discard(); 186 | out.close(); 187 | } 188 | //--------------------------------------------------------------------------- 189 | -------------------------------------------------------------------------------- /TripleBit/TripleBitQueryGraph.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "TripleBitQueryGraph.h" 14 | #include 15 | using namespace std; 16 | 17 | bool TripleBitQueryGraph::TripleNode::canJoin(const TripleNode& other) const 18 | // Is there an implicit join edge to another node? 19 | { 20 | // Extract variables 21 | ID v11=0,v12=0,v13=0; 22 | if (!constSubject) v11=subject+1; 23 | if (!constPredicate) v12=predicate+1; 24 | if (!constObject) v13=object+1; 25 | ID v21=0,v22=0,v23=0; 26 | if (!other.constSubject) v21=other.subject+1; 27 | if (!other.constPredicate) v22=other.predicate+1; 28 | if (!other.constObject) v23=other.object+1; 29 | 30 | // Do they have a variable in common? 31 | bool canJoin=false; 32 | if (v11&&v21&&(v11==v21)) canJoin=true; 33 | if (v11&&v22&&(v11==v22)) canJoin=true; 34 | if (v11&&v23&&(v11==v23)) canJoin=true; 35 | if (v12&&v21&&(v12==v21)) canJoin=true; 36 | if (v12&&v22&&(v12==v22)) canJoin=true; 37 | if (v12&&v23&&(v12==v23)) canJoin=true; 38 | if (v13&&v21&&(v13==v21)) canJoin=true; 39 | if (v13&&v22&&(v13==v22)) canJoin=true; 40 | if (v13&&v23&&(v13==v23)) canJoin=true; 41 | 42 | return canJoin; 43 | } 44 | 45 | // 46 | TripleBitQueryGraph::TripleNodesEdge::TripleNodesEdge(TripleNodeID from, TripleNodeID to, const std::vector& common) 47 | : from(from),to(to),common(common) 48 | // Constructor 49 | { 50 | } 51 | 52 | // 53 | TripleBitQueryGraph::TripleNodesEdge::~TripleNodesEdge() 54 | //Deconstructor 55 | { 56 | 57 | } 58 | 59 | //TODO 60 | bool TripleBitQueryGraph::JoinVariableNode::hasEdge(const TripleBitQueryGraph::JoinVariableNode& other)const 61 | { 62 | return false; 63 | } 64 | 65 | // 66 | TripleBitQueryGraph::JoinVariableNodeTripleNodeEdge::JoinVariableNodeTripleNodeEdge(JoinVariableNode& from, TripleNode& to,DimType dimType) 67 | : from(from),to(to),dimType(dimType) 68 | { 69 | } 70 | 71 | // 72 | TripleBitQueryGraph::JoinVariableNodeTripleNodeEdge::~JoinVariableNodeTripleNodeEdge() 73 | { 74 | } 75 | 76 | // 77 | TripleBitQueryGraph::TripleBitQueryGraph(): duplicateHandling(AllDuplicates),limit(~0u),knownEmptyResult(false),hasPredicate(false) 78 | // 79 | { 80 | } 81 | 82 | // 83 | TripleBitQueryGraph::~TripleBitQueryGraph() 84 | { 85 | } 86 | 87 | void TripleBitQueryGraph::clear() 88 | // clear the QueryGraph 89 | { 90 | query=SubQuery(); 91 | duplicateHandling=AllDuplicates; 92 | knownEmptyResult=false; 93 | hasPredicate = false; 94 | } 95 | 96 | void TripleBitQueryGraph::Clear() 97 | { 98 | query = SubQuery(); 99 | projection.clear(); 100 | predicateFlag.clear(); 101 | hasPredicate = false; 102 | } 103 | 104 | //--------------------------------------------------------------------------- 105 | static bool intersects(const set& a,const set& b,vector& common) 106 | // Check if two sets overlap 107 | { 108 | common.clear(); 109 | set::const_iterator ia,la,ib,lb; 110 | if (a.size()vb) { 127 | ++ib; 128 | } else { 129 | result=true; 130 | common.push_back(*ia); 131 | ++ia; ++ib; 132 | } 133 | } 134 | return result; 135 | } 136 | 137 | //--------------------------------------------------------------------------- 138 | void TripleBitQueryGraph::constructSubqueryEdges() 139 | // Construct the edges for a specific subquery 140 | { 141 | set bindings; 142 | //############################################################# 143 | // Collect all variable bindings 144 | vector > patternBindings;// 145 | //TODO optionalBindings,unionBindings 146 | 147 | set TripleNodesVariables; 148 | //Collect Patterns variable 149 | patternBindings.resize(query.tripleNodes.size()); 150 | for (unsigned int index=0,limit=patternBindings.size();index common; //common variable 177 | for (unsigned int index=0,limit=patternBindings.size();index& node = query.tripleNodes; 192 | for(size_t i = 0; i < size; i++) { 193 | if(node[i].constPredicate == false) 194 | return false; 195 | } 196 | 197 | return true; 198 | } 199 | -------------------------------------------------------------------------------- /TripleBit/TripleBit.h: -------------------------------------------------------------------------------- 1 | #ifndef _TRIPLEBIT_H_ 2 | #define _TRIPLEBIT_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | using namespace std; 27 | 28 | #include 29 | #include 30 | 31 | #include "MessageEngine.h" 32 | 33 | //bitmap settings 34 | const unsigned int INIT_PAGE_COUNT = 1024; 35 | const unsigned int INCREMENT_PAGE_COUNT = 1024; 36 | const unsigned int CHUNK_SIZE = 16; 37 | 38 | //uri settings 39 | const unsigned int URI_INIT_PAGE_COUNT = 256; 40 | const unsigned int URI_INCREMENT_PAGE_COUNT = 256; 41 | 42 | //reification settings 43 | const unsigned int REIFICATION_INIT_PAGE_COUNT = 2; 44 | const unsigned int REIFICATION_INCREMENT_PAGE_COUNT = 2; 45 | 46 | //column buffer settings 47 | const unsigned int COLUMN_BUFFER_INIT_PAGE_COUNT = 2; 48 | const unsigned int COLUMN_BUFFER_INCREMENT_PAGE_COUNT = 2; 49 | 50 | //URI statistics buffer settings 51 | const unsigned int STATISTICS_BUFFER_INIT_PAGE_COUNT = 1; 52 | const unsigned int STATISTICS_BUFFER_INCREMENT_PAGE_COUNT = 1; 53 | 54 | //entity buffer settings 55 | const unsigned int ENTITY_BUFFER_INIT_PAGE_COUNT = 1; 56 | const unsigned int ENTITY_BUFFER_INCREMENT_PAGE_COUNT = 2; 57 | 58 | //hash index 59 | const unsigned int HASH_RANGE = 200; 60 | const unsigned int HASH_CAPACITY = 100000 / HASH_RANGE; 61 | const unsigned int HASH_CAPACITY_INCREASE = 100000 / HASH_RANGE; 62 | const unsigned int SECONDARY_HASH_RANGE = 10; 63 | const unsigned int SECONDARY_HASH_CAPACITY = 100000 / SECONDARY_HASH_RANGE; 64 | const unsigned int SECONDARY_HASH_CAPACITY_INCREASE = 100000 / SECONDARY_HASH_RANGE; 65 | 66 | extern char* DATABASE_PATH; 67 | 68 | //thread pool 69 | const unsigned int THREAD_NUMBER = 8; //should be 2^n; 70 | enum Status { 71 | OK = 1, 72 | NOT_FIND = -1, 73 | OUT_OF_MEMORY = -5, 74 | PTR_IS_FULL = -11, 75 | PTR_IS_NOT_FULL = -10, 76 | CHUNK_IS_FULL = -21, 77 | CHUNK_IS_NOT_FULL = -20, 78 | PREDICATE_NOT_BE_FINDED = -30, 79 | CHARBUFFER_IS_FULL = -40, 80 | CHARBUFFER_IS_NOT_FULL = -41, 81 | URI_NOT_FOUND = -50, 82 | URI_FOUND = -51, 83 | PREDICATE_FILE_NOT_FOUND = -60, 84 | PREDICATE_FILE_END = -61, 85 | REIFICATION_NOT_FOUND, 86 | FINISH_WIRITE, 87 | FINISH_READ, 88 | ERROR, 89 | SUBJECTID_NOT_FOUND, 90 | OBJECTID_NOT_FOUND, 91 | COLUMNNO_NOT_FOUND, 92 | BUFFER_NOT_FOUND, 93 | ENTITY_NOT_INCLUDED, 94 | NO_HIT, 95 | NOT_OPENED, // file was not previously opened 96 | END_OF_FILE, // read beyond end of file or no space to extend file 97 | LOCK_ERROR, // file is used by another program 98 | NO_MEMORY, 99 | URID_NOT_FOUND , 100 | ALREADY_EXISTS, 101 | NOT_FOUND, 102 | CREATE_FAILURE, 103 | NOT_SUPPORT, 104 | ID_NOT_MATCH, 105 | ERROR_UNKOWN, 106 | BUFFER_MODIFIED, 107 | NULL_RESULT, 108 | TOO_MUCH 109 | }; 110 | 111 | //join shape of patterns within a join variable. 112 | enum JoinShape{ 113 | STAR, 114 | CHAIN 115 | }; 116 | 117 | enum EntityType 118 | { 119 | PREDICATE = 1 << 0, 120 | SUBJECT = 1 << 1, 121 | OBJECT = 1 << 2, 122 | DEFAULT = -1 123 | }; 124 | 125 | typedef long long int64; 126 | typedef unsigned char word; 127 | typedef word* word_prt; 128 | typedef word_prt bitVector_ptr; 129 | typedef unsigned int ID; 130 | typedef unsigned int SOID; 131 | typedef unsigned int PID; 132 | typedef bool status; 133 | typedef short COMPRESS_UNIT; 134 | typedef unsigned int uint; 135 | typedef unsigned char uchar; 136 | typedef unsigned short ushort; 137 | typedef unsigned long long ulonglong; 138 | typedef long long longlong; 139 | typedef size_t OffsetType; 140 | typedef size_t HashCodeType; 141 | 142 | extern const ID INVALID_ID; 143 | 144 | #define BITVECTOR_INITIAL_SIZE 100 145 | #define BITVECTOR_INCREASE_SIZE 100 146 | 147 | #define BITMAP_INITIAL_SIZE 100 148 | #define BITMAP_INCREASE_SIZE 30 149 | 150 | #define BUFFER_SIZE 1024 151 | //#define DEBUG 1 152 | #ifndef NULL 153 | #define NULL 0 154 | #endif 155 | 156 | //#define PRINT_RESULT 1 157 | //#define TEST_TIME 1 158 | #define WORD_SIZE (sizeof(word)) 159 | 160 | inline unsigned char Length_2_Type(unsigned char xLen, unsigned char yLen) { 161 | return (xLen - 1) * 4 + yLen; 162 | } 163 | 164 | // 165 | inline unsigned char Type_2_Length(unsigned char type) { 166 | return (type - 1) / 4 + (type - 1) % 4 + 2; 167 | } 168 | 169 | inline void Type_2_Length(unsigned char type, unsigned char& xLen, unsigned char& yLen) 170 | { 171 | xLen = (type - 1) / 4 + 1; 172 | yLen = (type - 1) % 4 + 1; 173 | } 174 | 175 | struct LengthString { 176 | const char * str; 177 | uint length; 178 | void dump(FILE * file) { 179 | for (uint i = 0; i < length; i++) 180 | fputc(str[i], file); 181 | } 182 | LengthString() : 183 | str(NULL), length(0) { 184 | } 185 | LengthString(const char * str) { 186 | this->str = str; 187 | this->length = strlen(str); 188 | } 189 | LengthString(const char * str, uint length) { 190 | this->str = str; 191 | this->length = length; 192 | } 193 | LengthString(const std::string & rhs) : 194 | str(rhs.c_str()), length(rhs.length()) { 195 | } 196 | bool equals(LengthString * rhs) { 197 | if (length != rhs->length) 198 | return false; 199 | for (uint i = 0; i < length; i++) 200 | if (str[i] != rhs->str[i]) 201 | return false; 202 | return true; 203 | } 204 | bool equals(const char * str, uint length) { 205 | if (this->length != length) 206 | return false; 207 | for (uint i = 0; i < length; i++) 208 | if (this->str[i] != str[i]) 209 | return false; 210 | return true; 211 | } 212 | bool equals(const char * str) { 213 | if(length != strlen(str)) 214 | return false; 215 | for (uint i = 0; i < length; i++) 216 | if (this->str[i] != str[i]) 217 | return false; 218 | return str[length] == 0; 219 | } 220 | bool copyTo(char * buff, uint bufflen) { 221 | if (length < bufflen) { 222 | for (uint i = 0; i < length; i++) 223 | buff[i] = str[i]; 224 | buff[length] = 0; 225 | return true; 226 | } 227 | return false; 228 | } 229 | }; 230 | 231 | #endif // _TRIPLEBIT_H_ 232 | -------------------------------------------------------------------------------- /TripleBit/util/FindEntityID.h: -------------------------------------------------------------------------------- 1 | #ifndef FINDENTITYID_H_ 2 | #define FINDENTITYID_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | class BitmapBuffer; 17 | class URITable; 18 | class PredicateTable; 19 | class ColumnBuffer; 20 | class TripleBitRepository; 21 | class EntityIDBuffer; 22 | class Chunk; 23 | class TwoConstantStatisticsBuffer; 24 | class OneConstantStatisticsBuffer; 25 | 26 | #include "../TripleBit.h" 27 | #include "../ThreadPool.h" 28 | 29 | #ifdef TEST_TIME 30 | #include "../TimeStamp.h" 31 | #endif 32 | 33 | class FindEntityID { 34 | private: 35 | BitmapBuffer* bitmap; 36 | URITable* UriTable; 37 | PredicateTable* preTable; 38 | 39 | TwoConstantStatisticsBuffer* spStatBuffer, *opStatBuffer; 40 | OneConstantStatisticsBuffer* sStatBuffer, *oStatBuffer; 41 | 42 | EntityIDBuffer *XTemp, *XYTemp, *tempBuffer1, *tempBuffer2; 43 | pthread_mutex_t mergeBufferMutex; 44 | #ifdef TEST_TIME 45 | TimeStamp indexTimer, readTimer; 46 | #endif 47 | 48 | public: 49 | FindEntityID(TripleBitRepository* repo); 50 | virtual ~FindEntityID(); 51 | Status findSubjectIDByPredicate(ID predicateID, EntityIDBuffer* buffer, ID minID, ID maxID); 52 | 53 | Status findObjectIDByPredicate(ID predicateID, EntityIDBuffer* buffer, ID minID, ID maxID); 54 | 55 | Status findObjectIDAndSubjectIDByPredicate(ID predicate, EntityIDBuffer* buffer, ID min, ID max,unsigned maxNum= INT_MAX); 56 | Status findSubjectIDAndObjectIDByPredicate(ID predicate, EntityIDBuffer* buffer); 57 | Status findSubjectIDAndObjectIDByPredicate(ID predicate, EntityIDBuffer* buffer, ID min, ID max,unsigned maxNum= INT_MAX); 58 | 59 | Status findSubjectIDByPredicateAndObject(ID predicateID, ID objectID, EntityIDBuffer* buffer, ID minID, ID maxID); 60 | Status findObjectIDByPredicateAndSubject(ID predicateID, ID subjectID, EntityIDBuffer* buffer, ID minID, ID maxID); 61 | 62 | Status findObjectIDAndPredicateIDBySubject(ID subjectID, EntityIDBuffer* buffer, ID minID, ID maxID); 63 | Status findPredicateIDAndObjectIDBySubject(ID subjectID, EntityIDBuffer* buffer, ID minID, ID maxID); 64 | 65 | Status findPredicateIDAndSubjectIDByObject(ID objectID, EntityIDBuffer* buffer, ID minID, ID maxID); 66 | Status findSubjectIDAndPredicateIDByObject(ID objectID, EntityIDBuffer* buffer, ID minID, ID maxID); 67 | 68 | Status findPredicateIDBySubjectAndObject(ID subject, ID object, EntityIDBuffer* buffer); 69 | 70 | Status findPredicateIDBySubject(ID subject, EntityIDBuffer* buffer, ID minID, ID maxID); 71 | 72 | Status findPredicateIDByObject(ID object, EntityIDBuffer* buffer, ID minID, ID maxID); 73 | 74 | Status findSubjectIDByObject(ID object, EntityIDBuffer* buffer, ID minID, ID maxID); 75 | Status findObjectIDBySubject(ID subject, EntityIDBuffer* buffer, ID minID, ID maxID); 76 | 77 | Status findSubject(EntityIDBuffer* buffer, ID minID, ID maxID); 78 | Status findPredicate(EntityIDBuffer* buffer, ID minID, ID maxID); 79 | Status findObject(EntityIDBuffer* buffer, ID minID, ID maxID); 80 | 81 | Status findSOByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 82 | Status findOSByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 83 | Status findSPByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 84 | Status findPSByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 85 | Status findOPByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 86 | Status findPOByKnowBuffer(EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 87 | 88 | Status findSOByKnowBuffer(ID preID,EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 89 | Status findOSByKnowBuffer(ID preID,EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 90 | Status findSByKnowBuffer(ID preID,EntityIDBuffer* buffer,EntityIDBuffer* knowBuffer, EntityType knowElement); 91 | 92 | 93 | private: 94 | Status findSubjectIDByPredicate(ID predicateID, EntityIDBuffer* buffer); 95 | 96 | Status findObjectIDByPredicate(ID predicateID, EntityIDBuffer* buffer); 97 | 98 | Status findObjectIDAndSubjectIDByPredicate(ID predicate, EntityIDBuffer* buffer); 99 | 100 | Status findSubjectIDByPredicateAndObject(ID predicateID, ID objectID, EntityIDBuffer* buffer); 101 | Status findObjectIDByPredicateAndSubject(ID predicateID, ID subjectID, EntityIDBuffer* buffer); 102 | 103 | Status findSubjectIDAndPredicateIDByPredicateAndObject(ID predicateID, ID objectID, EntityIDBuffer* buffer); 104 | Status findSubjectIDAndPredicateIDByPredicateAndObject(ID predicateID, ID objectID, EntityIDBuffer* buffer, ID minID, ID maxID); 105 | Status findPredicateIDAndSubjectIDByPredicateAndObject(ID predicateID, ID objectID, EntityIDBuffer* buffer); 106 | 107 | Status findPredicateIDAndObjectIDByPredicateAndSubject(ID predicateID, ID subjectID, EntityIDBuffer* buffer); 108 | Status findObjectIDAndPredicateIDByPredicateAndSubject(ID predicateID, ID subjectID, EntityIDBuffer* buffer); 109 | Status findObjectIDAndPredicateIDByPredicateAndSubject(ID predicateID, ID subjectID, EntityIDBuffer* buffer, ID minID, ID maxID); 110 | 111 | Status findSubjectIDAndPredicateIDByObject(ID objectID, EntityIDBuffer* buffer); 112 | Status findObjectIDAndPredicateIDBySubject(ID subjectID, EntityIDBuffer *buffer); 113 | 114 | Status findSubjectIDByObject(ID object, EntityIDBuffer* buffer); 115 | Status findObjectIDBySubject(ID subject, EntityIDBuffer* buffer); 116 | 117 | void findSubjectIDAndPredicateIDByPredicateAndObjectTask(ID predicateID, ID objectID); 118 | void findSubjectIDAndPredicateIDByPredicateAndObjectTask(ID predicateID, ID objectID, ID minID, ID maxID); 119 | void findPredicateIDAndSubjectIDByPredicateAndObjectTask(ID predicateID, ID objectID); 120 | 121 | void findObjectIDAndPredicateIDByPredicateAndSubjectTask(ID predicateID, ID subjectID); 122 | void findObjectIDAndPredicateIDByPredicateAndSubjectTask(ID predicateID, ID subjectID, ID minID, ID maxID); 123 | void findPredicateIDAndObjectIDByPredicateAndSubjectTask(ID predicateID, ID subjectID); 124 | 125 | Status findSOByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 126 | Status findOSByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 127 | Status findSPByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 128 | Status findPSByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 129 | Status findOPByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 130 | Status findPOByKnowBufferTask(ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 131 | 132 | Status findSOByKnowBufferTask1(ID preID,ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 133 | Status findOSByKnowBufferTask1(ID preID,ID* p, size_t length , EntityType knowElement,EntityIDBuffer* resultBuffer); 134 | Status findSByKnowBufferTask1(ID preID,ID* p, size_t length ,EntityIDBuffer* resultBuffer); 135 | #ifdef TEST_TIME 136 | void printTime() { 137 | indexTimer.printTime("index timer"); 138 | readTimer.printTime("read time"); 139 | } 140 | 141 | #endif 142 | }; 143 | #endif /* FINDENTITYID_H_ */ 144 | -------------------------------------------------------------------------------- /TripleBit/TripleBitQueryGraph.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIPLEBITQUERYGRAPH_H_ 2 | #define TRIPLEBITQUERYGRAPH_H_ 3 | 4 | //--------------------------------------------------------------------------- 5 | // TripleBit 6 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 7 | // Web site: http://grid.hust.edu.cn/triplebit 8 | // 9 | // This work is licensed under the Creative Commons 10 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 11 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 12 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 13 | // San Francisco, California, 94105, USA. 14 | //--------------------------------------------------------------------------- 15 | 16 | #include "IRepository.h" 17 | #include "SPARQLLexer.h" 18 | #include "SPARQLParser.h" 19 | #include 20 | #include 21 | 22 | class TripleBitQueryGraph { 23 | 24 | public: 25 | /// Possible duplicate handling modes 26 | enum DuplicateHandling { AllDuplicates, CountDuplicates, ReducedDuplicates, NoDuplicates, ShowDuplicates }; 27 | //join variable's style 28 | enum JoinGraph{ CYCLIC, ACYCLIC }; 29 | 30 | typedef unsigned int ID; 31 | typedef unsigned int TripleNodeID; 32 | typedef unsigned int JoinVariableNodeID; 33 | typedef unsigned int ConstElementNodeID; 34 | 35 | ///A triple node in the graph 36 | //TODO the binding to bitmat ,may be use a enum 37 | typedef struct TripleNode { 38 | ID subject, predicate, object; 39 | // Which of the three values are constants? 40 | bool constSubject,constPredicate,constObject; 41 | enum Op{FINDSBYPO, FINDOBYSP, FINDPBYSO, FINDSBYP, FINDOBYP, FINDPBYS,FINDPBYO, FINDSBYO, FINDOBYS, FINDS, FINDP, FINDO, 42 | FINDSPBYO, FINDSOBYP, FINDPOBYS, FINDPSBYO, FINDOSBYP, FINDOPBYS, 43 | FINDSOBYNONE, FINDOSBYNONE, FINDSPBYNONE, FINDPSBYNONE, FINDOPBYNONE, FINDPOBYNONE, 44 | FINDSPO, NOOP 45 | }; 46 | 47 | TripleNodeID tripleNodeID; 48 | /// define the first scan operator 49 | Op scanOperation; 50 | int selectivity; 51 | // Is there an implicit join edge to another TripleNode? 52 | TripleNode() { selectivity = -1; } 53 | bool canJoin(const TripleNode& other) const; 54 | }tp_node; 55 | 56 | 57 | /// The variable node of triple node. 58 | typedef struct JoinVariableNode { 59 | //used to identify the join operation type 60 | enum JoinType{ 61 | SS, 62 | OO, 63 | SO, 64 | PP, 65 | OP, 66 | SP, 67 | PS, 68 | PO, 69 | OS, 70 | UNKNOWN 71 | }; 72 | //not use! 73 | std::string text; 74 | //the value in the triple node 75 | ID value; 76 | //the variable appear in which triple node and the triples dimension. 77 | enum DimType{SUBJECT = 1,PREDICATE = 2,OBJCET = 4}; 78 | std::vector > appear_tpnodes; 79 | // Is there an variable edge to another JoinVariableNode 80 | bool hasEdge(const JoinVariableNode& other)const; 81 | }jvar_node; 82 | 83 | /// A value filter 84 | struct Filter { 85 | /// The id 86 | unsigned id; 87 | /// The valid values. Sorted by id. 88 | std::vector values; 89 | /// Negative filter? 90 | bool exclude; 91 | }; 92 | 93 | /// A (potentially) complex filter. Currently very limited. 94 | struct ComplexFilter { 95 | /// The ids 96 | unsigned id1,id2; 97 | /// Test for equal? 98 | bool equal; 99 | }; 100 | 101 | /// The same Element(s,p,o) between TripleNodes. 102 | //TODO:Not use now 103 | typedef struct ConstElementNode { 104 | enum Type { SUBJECT, PREDICATE, OBJECT }; 105 | ID value; 106 | Type type; 107 | }const_e_node; 108 | 109 | 110 | /// Join Edge between two TripleNodes. 111 | typedef struct TripleNodesEdge { 112 | /// The endpoints 113 | TripleNodeID from,to; 114 | /// Common variables 115 | std::vector common; 116 | /// Constructor 117 | TripleNodesEdge(TripleNodeID from, TripleNodeID to, const std::vector& common); 118 | /// Destructor 119 | ~TripleNodesEdge(); 120 | }tpn_edge; 121 | 122 | 123 | /// an edge between two variable nodes 124 | typedef struct JoinVariableNodesEdge { 125 | JoinVariableNodeID from; 126 | JoinVariableNodeID to; 127 | 128 | //JoinVariableNodesEdge(JoinVariableNodeID& from ,JoinVariableNodeID& to); 129 | //~JoinVariableNodesEdge(); 130 | }j_var_edge; 131 | 132 | 133 | /// an edge between a join variable node and a triple node. 134 | typedef struct JoinVariableNodeTripleNodeEdge { 135 | JoinVariableNode from; 136 | TripleNode to; 137 | enum DimType{SUBJECT= 0,PREDICATE = 1,OBJCET = 2}; 138 | DimType dimType; 139 | //TODO 140 | JoinVariableNodeTripleNodeEdge(JoinVariableNode& from, TripleNode& to, DimType dimType); 141 | ~JoinVariableNodeTripleNodeEdge(); 142 | }jvarn_tpn_edge; 143 | 144 | /// Description of a subquery 145 | struct SubQuery { 146 | // The TripleNodes 147 | std::vector tripleNodes; 148 | // The triple node's edges 149 | std::vector tripleEdges; 150 | 151 | //the join Variable Node 152 | std::vector joinVariables; 153 | std::vector joinVariableNodes; 154 | //the join Variable Edge. 155 | std::vector joinVariableEdges; 156 | 157 | //not use!! the join variable and the triple node edge. 158 | std::vector joinVriableNodeTripleNodeEdge; 159 | 160 | //TODO not implement!!! 161 | /// The filter conditions 162 | std::vector filters; 163 | /// The complex filter conditions 164 | std::vector complexFilters; 165 | /// Optional subqueries 166 | std::vector optional; 167 | /// Union subqueries 168 | std::vector > unions; 169 | /// tree root node 170 | JoinVariableNodeID rootID; 171 | /// leaf nodes 172 | std::vector leafNodes; 173 | /// is cyclic or acyclic 174 | JoinGraph joinGraph; 175 | /// selectivity; 176 | std::map selectivityMap; 177 | }; 178 | 179 | private: 180 | /// The query itself,also is the Graph triple 181 | SubQuery query; 182 | /// The projection 183 | std::vector projection; 184 | std::vector predicateFlag; 185 | bool hasPredicate; 186 | /// The duplicate handling 187 | DuplicateHandling duplicateHandling; 188 | /// Maximum result size 189 | unsigned int limit; 190 | /// Is the query known to produce an empty result? 191 | bool knownEmptyResult; 192 | 193 | 194 | public: 195 | ///constructor 196 | TripleBitQueryGraph(); 197 | virtual ~TripleBitQueryGraph(); 198 | 199 | /// Clear the graph 200 | void clear(); 201 | /// Construct the edges for a specific subquery(always the graph pattern ,option pattern,filter pattern join edges) 202 | void constructSubqueryEdges(); 203 | 204 | /// Set the duplicate handling mode 205 | void setDuplicateHandling(DuplicateHandling d) { duplicateHandling=d; } 206 | /// Get the duplicate handling mode 207 | DuplicateHandling getDuplicateHandling() const { return duplicateHandling; } 208 | /// Set the result limit 209 | void setLimit(unsigned int l) { limit=l; } 210 | /// Get the result limit 211 | unsigned getLimit() const { return limit; } 212 | /// Known empty result 213 | void markAsKnownEmpty() { knownEmptyResult=true; } 214 | /// Known empty result? 215 | bool knownEmpty() const { return knownEmptyResult; } 216 | 217 | void Clear(); 218 | /// Get the query 219 | SubQuery& getQuery() { return query; } 220 | /// Get the query 221 | const SubQuery& getQuery() const { return query; } 222 | 223 | /// Add an entry to the output projection 224 | void addProjection(unsigned int id) { projection.push_back(id); } 225 | void addPredicateFlag(unsigned flag) { predicateFlag.push_back(flag); } 226 | void setHasPredicate(bool flag) { hasPredicate = flag; } 227 | /// Iterator over the projection 228 | typedef std::vector::const_iterator projection_iterator; 229 | /// Iterator over the projection 230 | projection_iterator projectionBegin() const { return projection.begin(); } 231 | /// Iterator over the projection 232 | projection_iterator projectionEnd() const { return projection.end(); } 233 | /// project IDs 234 | vector& getProjection() { return projection; } 235 | vector& getPredicateFlag() { return predicateFlag; } 236 | bool getHasPredicate() { return hasPredicate; } 237 | 238 | bool isPredicateConst(); 239 | }; 240 | 241 | #endif /* TRIPLEBITQUERYGRAPH_H_ */ 242 | -------------------------------------------------------------------------------- /TripleBit/util/SortMergeJoin.cpp: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------------------------------- 2 | // TripleBit 3 | // (c) 2011 Massive Data Management Group @ SCTS & CGCL. 4 | // Web site: http://grid.hust.edu.cn/triplebit 5 | // 6 | // This work is licensed under the Creative Commons 7 | // Attribution-Noncommercial-Share Alike 3.0 Unported License. To view a copy 8 | // of this license, visit http://creativecommons.org/licenses/by-nc-sa/3.0/ 9 | // or send a letter to Creative Commons, 171 Second Street, Suite 300, 10 | // San Francisco, California, 94105, USA. 11 | //--------------------------------------------------------------------------- 12 | 13 | #include "SortMergeJoin.h" 14 | #include "../EntityIDBuffer.h" 15 | 16 | SortMergeJoin::SortMergeJoin() { 17 | // TODO Auto-generated constructor stub 18 | //pool = new CThreadPool(THREAD_NUMBER); 19 | temp1 = (ID*)malloc(4096 * sizeof(ID)); 20 | temp2 = (ID*)malloc(4096 * sizeof(ID)); 21 | } 22 | 23 | SortMergeJoin::~SortMergeJoin() { 24 | // TODO Auto-generated destructor stub 25 | if(temp1 != NULL) 26 | free(temp1); 27 | temp1 = NULL; 28 | 29 | if(temp2 != NULL) 30 | free(temp2); 31 | temp2 = NULL; 32 | } 33 | 34 | /* 35 | * @param 36 | * secondModify: 是否要修改entBuffer2; 37 | */ 38 | void SortMergeJoin::Join(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2, bool secondModify /* = true */) 39 | { 40 | entBuffer1->sort(joinKey1); 41 | entBuffer2->sort(joinKey2); 42 | 43 | joinKey1--; 44 | joinKey2--; 45 | 46 | // cout<<"============="<print(); 48 | // cout<<"-------------"<print(); 50 | 51 | // int size1 = entBuffer1->getSize(); 52 | // int size2 = entBuffer2->getSize(); 53 | 54 | /*char* flag1 = (char*)malloc(size1); 55 | memset(flag1,0,size1); 56 | char* flag2; 57 | if( secondModify == true) { 58 | flag2 = (char*)malloc(size2); 59 | memset(flag2,0,size2); 60 | } else { 61 | flag2 = NULL; 62 | }*/ 63 | 64 | // cout<<"iSize: "<getBuffer(); 67 | //ID* buffer2 = entBuffer2->getBuffer(); 68 | 69 | //SortMergeJoinArg* arg; 70 | //arg = new SortMergeJoinArg(buffer1, buffer2, size1, size2, flag1, flag2, entBuffer1->IDCount, entBuffer2->IDCount, joinKey1, joinKey2); 71 | //Merge(arg); 72 | if(secondModify) { 73 | Merge1(entBuffer1, entBuffer2, joinKey1, joinKey2); 74 | } else { 75 | Merge2(entBuffer1, entBuffer2, joinKey1, joinKey2); 76 | } 77 | 78 | /*entBuffer1->usedSize = arg->length1; 79 | entBuffer2->usedSize*/ 80 | /* 81 | SortMergeJoinArg* arg; 82 | 83 | int length1 = size1 / THREAD_NUMBER; 84 | int length2 = size2 / THREAD_NUMBER; 85 | 86 | int startPos1 = 0; 87 | int startPos2 = 0; 88 | 89 | for( int i = 0; i < THREAD_NUMBER; i++ ) { 90 | if ( i < THREAD_NUMBER - 1 ) { 91 | if ( secondModify == true ) 92 | arg = new SortMergeJoinArg(buffer1 + startPos1 * entBuffer1->IDCount, buffer2 + startPos2 * entBuffer2->IDCount, length1, length2, 93 | flag1 + startPos1, flag2 + startPos2, entBuffer1->IDCount, entBuffer2->IDCount, joinKey1, joinKey2); 94 | else 95 | arg = new SortMergeJoinArg(buffer1 + startPos1 * entBuffer1->IDCount, buffer2 + startPos2 * entBuffer2->IDCount, length1, length2, 96 | flag1 + startPos1, NULL, entBuffer1->IDCount, entBuffer2->IDCount, joinKey1, joinKey2); 97 | CThreadPool::getInstance().AddTask(boost::bind(&SortMergeJoin::Merge, this, arg)); 98 | startPos1 += length1; 99 | startPos2 += length2; 100 | } else { 101 | if ( secondModify == true ) 102 | arg = new SortMergeJoinArg(buffer1 + startPos1 * entBuffer1->IDCount, buffer2 + startPos2 * entBuffer2->IDCount, 103 | size1 - startPos1, size2 - startPos2, flag1 + startPos1, flag2 + startPos2, entBuffer1->IDCount, entBuffer2->IDCount, joinKey1, joinKey2); 104 | else 105 | arg = new SortMergeJoinArg(buffer1 + startPos1 * entBuffer1->IDCount, buffer2 + startPos2 * entBuffer2->IDCount, 106 | size1 - startPos1, size2 - startPos2, flag1 + startPos1, NULL, entBuffer1->IDCount, entBuffer2->IDCount, joinKey1, joinKey2); 107 | CThreadPool::getInstance().AddTask(boost::bind(&SortMergeJoin::Merge, this, arg)); 108 | } 109 | } 110 | 111 | CThreadPool::getInstance().Wait(); 112 | */ 113 | 114 | /*entBuffer1->modifyByFlag(flag1,1); 115 | if ( secondModify ) 116 | { 117 | entBuffer2->modifyByFlag(flag2,1); 118 | } 119 | 120 | free(flag1); 121 | if ( secondModify ) 122 | free(flag2);*/ 123 | 124 | } 125 | 126 | int SortMergeJoin::Merge(SortMergeJoinArg* arg) 127 | { 128 | /*if ( arg->flag2 != NULL ) 129 | Merge1(arg); 130 | else 131 | Merge2(arg);*/ 132 | 133 | //delete arg; 134 | return 0; 135 | } 136 | 137 | void SortMergeJoin::Merge1(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2) 138 | { 139 | register size_t i = 0; 140 | register size_t j = 0; 141 | register int k; 142 | register size_t pos1 = 0, pos2 = 0; 143 | size_t size1 = 0, size2 = 0; 144 | register ID keyValue; 145 | 146 | ID* buffer1 = entBuffer1->getBuffer(); 147 | ID* buffer2 = entBuffer2->getBuffer(); 148 | size_t length1 = entBuffer1->getSize(); 149 | size_t length2 = entBuffer2->getSize(); 150 | int IDCount1 = entBuffer1->getIDCount(); 151 | int IDCount2 = entBuffer2->getIDCount(); 152 | 153 | while ( i < length1 && j < length2 ) { 154 | keyValue = buffer1[i * IDCount1 + joinKey1]; 155 | 156 | while (buffer2[j * IDCount2 + joinKey2] < keyValue && j < length2) { 157 | j++; 158 | } 159 | 160 | if (buffer2[j * IDCount2 + joinKey2] == keyValue) { 161 | while (buffer1[i * IDCount1 + joinKey1] == keyValue && i < length1) { 162 | if(pos1 == 4096) { 163 | memcpy(buffer1 + size1, temp1, 4096 * sizeof(ID)); 164 | size1 = size1 + pos1; 165 | pos1 = 0; 166 | } 167 | 168 | for (k = 0; k < IDCount1; k++) { 169 | temp1[pos1] = buffer1[i * IDCount1 + k]; 170 | pos1++; 171 | } 172 | 173 | // memcpy(temp1 + pos1, buffer1 + i * IDCount1, IDCount1 * sizeof(ID)); 174 | // pos1 += IDCount1; 175 | i++; 176 | } 177 | 178 | while (buffer2[j * IDCount2 + joinKey2] == keyValue && j < length2) { 179 | if(pos2 == 4096) { 180 | memcpy(buffer2 + size2, temp2, 4096 * sizeof(ID)); 181 | size2 = size2 + pos2; 182 | pos2 = 0; 183 | } 184 | 185 | for (k = 0; k < IDCount2; k++) { 186 | temp2[pos2] = buffer2[j * IDCount2 + k]; 187 | pos2++; 188 | } 189 | 190 | //memcpy(temp2 + pos2, buffer2 + j * IDCount2, IDCount2 * sizeof(ID)); 191 | // pos2 += IDCount2; 192 | j++; 193 | } 194 | } else { 195 | while (buffer1[i * IDCount1 + joinKey1] == keyValue && i < length1) { 196 | i++; 197 | } 198 | } 199 | } 200 | 201 | memcpy(buffer1 + size1, temp1, pos1 * sizeof(ID)); 202 | size1 = size1 + pos1; 203 | entBuffer1->usedSize = size1; 204 | 205 | memcpy(buffer2 + size2, temp2, pos2 * sizeof(ID)); 206 | size2 = size2 + pos2; 207 | entBuffer2->usedSize = size2; 208 | } 209 | 210 | void SortMergeJoin::Merge2(EntityIDBuffer* entBuffer1, EntityIDBuffer* entBuffer2, int joinKey1, int joinKey2) 211 | { 212 | register int i = 0; 213 | register int j = 0; 214 | register int k; 215 | register size_t pos1 = 0; 216 | size_t size1 = 0; 217 | register ID keyValue; 218 | 219 | ID* buffer1 = entBuffer1->getBuffer(); 220 | ID* buffer2 = entBuffer2->getBuffer(); 221 | int length1 = entBuffer1->getSize(); 222 | int length2 = entBuffer2->getSize(); 223 | int IDCount1 = entBuffer1->getIDCount(); 224 | int IDCount2 = entBuffer2->getIDCount(); 225 | 226 | while ( i < length1 && j < length2 ) { 227 | keyValue = buffer1[i * IDCount1 + joinKey1]; 228 | 229 | while (buffer2[j * IDCount2 + joinKey2] < keyValue && j < length2) { 230 | j++; 231 | } 232 | 233 | if (buffer2[j * IDCount2 + joinKey2] == keyValue) { 234 | while (buffer1[i * IDCount1 + joinKey1] == keyValue && i < length1) { 235 | if(pos1 == 4096) { 236 | memcpy(buffer1 + size1, temp1, 4096 * sizeof(ID)); 237 | size1 = size1 + pos1; 238 | pos1 = 0; 239 | } 240 | 241 | for (k = 0; k < IDCount1; k++) { 242 | temp1[pos1] = buffer1[i * IDCount1 + k]; 243 | pos1++; 244 | } 245 | i++; 246 | } 247 | 248 | while (buffer2[j * IDCount2 + joinKey2] == keyValue && j < length2) { 249 | j++; 250 | } 251 | } else { 252 | while (buffer1[i * IDCount1 + joinKey1] == keyValue && i < length1) { 253 | i++; 254 | } 255 | } 256 | } 257 | 258 | memcpy(buffer1 + size1, temp1, pos1 * sizeof(ID)); 259 | size1 = size1 + pos1; 260 | entBuffer1->usedSize = size1; 261 | } 262 | --------------------------------------------------------------------------------