├── docs
    ├── gAnswer_help.pdf
    ├── TKDE18_gAnswer.pdf
    ├── gAnswer_help_en.pdf
    └── TKDE18_gAnswer_supplementary.pdf
├── src
    ├── qa
    │   ├── Matches.java
    │   ├── mapping
    │   │   ├── EntityFragmentDict.java
    │   │   └── DBpediaLookup.java
    │   ├── Answer.java
    │   ├── Query.java
    │   ├── Globals.java
    │   ├── extract
    │   │   └── CorefResolution.java
    │   ├── parsing
    │   │   └── QuestionParsing.java
    │   └── GAnswer.java
    ├── fgmt
    │   ├── Fragment.java
    │   ├── VariableFragment.java
    │   ├── RelationFragment.java
    │   ├── TypeFragment.java
    │   └── EntityFragment.java
    ├── rdf
    │   ├── NodeSelectedWithScore.java
    │   ├── PredicateMapping.java
    │   ├── MergedWord.java
    │   ├── EntityMapping.java
    │   ├── TypeMapping.java
    │   ├── SemanticUnit.java
    │   ├── ImplicitRelation.java
    │   ├── SimpleRelation.java
    │   ├── SemanticQueryGraph.java
    │   ├── SemanticRelation.java
    │   ├── Sparql.java
    │   └── Triple.java
    ├── lcn
    │   ├── EntityNameAndScore.java
    │   ├── Main.java
    │   ├── EntityFragmentFields.java
    │   ├── SearchInEntityFragments.java
    │   ├── BuildIndexForTypeShortName.java
    │   ├── BuildIndexForEntityFragments.java
    │   └── SearchInTypeShortName.java
    ├── paradict
    │   └── PredicateIDAndSupport.java
    ├── nlp
    │   ├── tool
    │   │   ├── Main.java
    │   │   ├── NERecognizer.java
    │   │   ├── StanfordParser.java
    │   │   ├── MaltParser.java
    │   │   ├── MaltParserCon.java
    │   │   └── CoreNLP.java
    │   └── ds
    │   │   ├── Sentence.java
    │   │   ├── Word.java
    │   │   └── DependencyTreeNode.java
    ├── application
    │   ├── GinfoHandler.java
    │   ├── GanswerHttp.java
    │   ├── GanswerHttpConnector.java
    │   └── GanswerHandler.java
    ├── utils
    │   └── FileUtil.java
    ├── log
    │   └── QueryLogger.java
    └── addition
    │   ├── AggregationRecognition.java
    │   └── AddtionalFix.java
├── .gitignore
├── genrate_fragments
    ├── extra_get_basic_and_yago.py
    ├── step2_dedubplicate.py
    ├── step7_get_predicate_fragment.py
    ├── step6_get_type_fragment.py
    ├── step3_split.py
    ├── step4_triple_to_number.py
    ├── step1_clean_triple.py
    ├── step5_get_entity_fragment.py
    └── How_to_generate_fragments.md
├── LICENSE
├── README_CH.md
└── README.md


/docs/gAnswer_help.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/gAnswer_help.pdf


--------------------------------------------------------------------------------
/docs/TKDE18_gAnswer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/TKDE18_gAnswer.pdf


--------------------------------------------------------------------------------
/docs/gAnswer_help_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/gAnswer_help_en.pdf


--------------------------------------------------------------------------------
/docs/TKDE18_gAnswer_supplementary.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pkumod/gAnswer/HEAD/docs/TKDE18_gAnswer_supplementary.pdf


--------------------------------------------------------------------------------
/src/qa/Matches.java:
--------------------------------------------------------------------------------
 1 | package qa;
 2 | 
 3 | public class Matches {
 4 | 	public String[][] answers = null;
 5 | 	public int answersNum = 0;
 6 | 	public long time = 0;
 7 | 		
 8 | 	public static final int pageNum = 3000;
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/fgmt/Fragment.java:
--------------------------------------------------------------------------------
1 | package fgmt;
2 | 
3 | public abstract class Fragment {
4 | 	public enum typeEnum {ENTITY_FRAGMENT, RELATION_FRAGMENT, TYPE_FRAGMENT, VAR_FRAGMENT}; 
5 | 	
6 | 	public typeEnum fragmentType;
7 | 	public int fragmentId;
8 | };
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 


--------------------------------------------------------------------------------
/src/rdf/NodeSelectedWithScore.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import java.util.ArrayList;
 4 | 
 5 | public class NodeSelectedWithScore implements Comparable<NodeSelectedWithScore>
 6 | {
 7 | 	public ArrayList<Integer> selected;
 8 | 	int size; //split key to st and ed
 9 | 	public double score = 0;
10 | 	
11 | 	public NodeSelectedWithScore(ArrayList<Integer> a, double b)
12 | 	{
13 | 		selected = a;
14 | 		score = b;
15 | 	}
16 | 	
17 | 	// In descending order: big --> small
18 | 	public int compareTo(NodeSelectedWithScore o) {
19 | 		double diff = this.score - o.score;
20 | 		if (diff > 0) return -1;
21 | 		else if (diff < 0) return 1;
22 | 		else return 0;
23 | 	}
24 | }


--------------------------------------------------------------------------------
/src/lcn/EntityNameAndScore.java:
--------------------------------------------------------------------------------
 1 | package lcn;
 2 | 
 3 | public class EntityNameAndScore implements Comparable<EntityNameAndScore> {
 4 | 	public int entityID;
 5 | 	public String entityName;
 6 | 	public double score;
 7 | 	
 8 | 	public EntityNameAndScore(int id, String n, double s) {
 9 | 		entityID = id;
10 | 		entityName = n;
11 | 		score = s;		
12 | 	}
13 | 	
14 | 	@Override
15 | 	public String toString() {
16 | 		return entityID + ":<" + entityName + ">\t" + score;
17 | 	}
18 | 
19 | 	public int compareTo(EntityNameAndScore o) {
20 | 		if(this.score < o.score) {
21 | 			return 1;
22 | 		}
23 | 		else if (this.score > o.score) {
24 | 			return -1;
25 | 		}
26 | 		else {
27 | 			return 0;
28 | 		}
29 | 	}
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/src/rdf/PredicateMapping.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | public class PredicateMapping implements Comparable<PredicateMapping> {
 4 | 	public int pid = -1;
 5 | 	public double score = 0;
 6 | 	public String parapharase = null;
 7 | 	
 8 | 	public PredicateMapping (int pid, double sco, String para) {
 9 | 		this.pid = pid;
10 | 		score = sco;
11 | 		parapharase = para;
12 | 	}
13 | 	
14 | 	// In descending order: big --> small
15 | 	public int compareTo(PredicateMapping o) {
16 | 		double diff = this.score - o.score;
17 | 		if (diff > 0) return -1;
18 | 		else if (diff < 0) return 1;
19 | 		else return 0;
20 | 	}
21 | 	
22 | 	@Override
23 | 	public String toString() {
24 | 		String ret = "";
25 | 		ret = "<"+pid+" : "+parapharase+" : "+score+">";
26 | 		return ret;
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/src/paradict/PredicateIDAndSupport.java:
--------------------------------------------------------------------------------
 1 | package paradict;
 2 | 
 3 | public class PredicateIDAndSupport implements Comparable<PredicateIDAndSupport> {
 4 | 	public int predicateID;
 5 | 	public int support;
 6 | 	public double[] wordSelectivity = null;	// wordSelectivity helps PATTY patterns ranking more accurate.
 7 | 	
 8 | 	public PredicateIDAndSupport(int _pid, int _support, double[] _slct) {
 9 | 		predicateID = _pid;
10 | 		support = _support;
11 | 		wordSelectivity = _slct;
12 | 	}
13 | 
14 | 	public int compareTo(PredicateIDAndSupport o) {
15 | 		return o.support - this.support;
16 | 	}
17 | 
18 | 	// only use for predicate itself and handwriting paraphrase
19 | 	public static double[] genSlct(int size) {
20 | 		double[] ret = new double[size];
21 | 		for (int i=0;i<size;i++) ret[i] = 1.0;
22 | 		return ret;
23 | 	}
24 | }
25 | 


--------------------------------------------------------------------------------
/genrate_fragments/extra_get_basic_and_yago.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf-8
 2 | basic = []
 3 | yago = []
 4 | b = 0
 5 | y = 100000
 6 | '''
 7 | 	In dbpedia dataset we use two sorts of type: yago type and basic type
 8 | 	yago type refers to type with yago prefix
 9 | 	basic type refers to objects pointed to by rdf:type
10 | 	this script divide this two kinds of types into different files.
11 | '''
12 | with open('type id file here') as f:
13 | 	for line in f:
14 | 		dou = line[:-1].split('\t')
15 | 		if dou[0][:6] == '<yago:':
16 | 			yago.append(dou[0]+"\t%d\n"%y)
17 | 			y+=1
18 | 		else:
19 | 			basic.append(dou[0]+"\t%d\n"%b)
20 | 			b+=1
21 | 
22 | with open('basic types id file here','w') as f:
23 | 	for str in basic:
24 | 		f.write(str)
25 | with open("yago type id file here",'w') as f:
26 | 	for str in yago:
27 | 		f.write(str)
28 | 
29 | 


--------------------------------------------------------------------------------
/genrate_fragments/step2_dedubplicate.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | '''
 3 | Step2: remove the dubplicate triples.
 4 | '''
 5 | triples = set()
 6 | j = 1
 7 | i = 1
 8 | with open('./pkubase/pkubase-triples.txt','r') as f:
 9 |     while 1:
10 |         line = f.readline()
11 |         if not line:
12 |             break
13 |         triples.add(line)
14 |         if j % 100000 == 0:
15 |             print("%d:%d"%(i,j))
16 |         j += 1
17 | j = 1
18 | i = 2
19 | with open('./pkubase/pkubase-types.txt','r') as f:
20 |     while 1:
21 |         line = f.readline()
22 |         if not line:
23 |             break
24 |         triples.add(line)
25 |         if j % 100000 == 0:
26 |             print("%d:%d"%(i,j))
27 |         j += 1
28 | print(len(triples))
29 | wf = open('./pkubase/pkubase_clean.txt','w')
30 | for item in triples:
31 |     wf.write(item)
32 | 


--------------------------------------------------------------------------------
/src/rdf/MergedWord.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import java.util.ArrayList;
 4 | 
 5 | import rdf.EntityMapping;
 6 | import rdf.TypeMapping;
 7 | 
 8 | public class MergedWord implements Comparable<MergedWord> 
 9 | {
10 | 	//original position
11 | 	public int st,ed;
12 | 	//position after merge (unselected is -1)
13 | 	public int mergedPos = -1;
14 | 	public String name;
15 | 	public boolean mayCategory = false;
16 | 	public boolean mayLiteral = false;
17 | 	public boolean mayEnt = false;
18 | 	public boolean mayType = false;
19 | 	public ArrayList<EntityMapping> emList = null;
20 | 	public ArrayList<TypeMapping> tmList = null;
21 | 	public String category = null;
22 | 	
23 | 	public MergedWord(int s,int e,String n)
24 | 	{
25 | 		st = s;
26 | 		ed = e;
27 | 		name = n;
28 | 	}
29 | 	
30 | 	@Override
31 | 	//long to short
32 | 	public int compareTo(MergedWord o) 
33 | 	{
34 | 		int lenDiff = (this.ed-this.st) - (o.ed-o.st);
35 | 		
36 | 		if (lenDiff > 0) return -1;
37 | 		else if (lenDiff < 0) return 1;
38 | 		return 0;
39 | 	}
40 | 	
41 | }
42 | 


--------------------------------------------------------------------------------
/src/rdf/EntityMapping.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import fgmt.EntityFragment;
 4 | 
 5 | public class EntityMapping implements Comparable<EntityMapping> {
 6 | 	public int entityID = -1;
 7 | 	public String entityName = null;
 8 | 	public double score = 0;
 9 | 	
10 | 	public EntityFragment entityFragment = null;
11 | 	
12 | 	public EntityMapping(int eid, String en, double sco) {
13 | 		entityID = eid;
14 | 		entityName = en;
15 | 		score = sco;
16 | 		
17 | 		//punishment if entity start with "?"
18 | 		if (entityName.startsWith("?"))
19 | 			score *=0.5;
20 | 	}
21 | 	
22 | 	// In descending order: big --> small
23 | 	public int compareTo(EntityMapping o) {
24 | 		double diff = this.score - o.score;
25 | 		if (diff > 0) return -1;
26 | 		else if (diff < 0) return 1;
27 | 		else return 0;
28 | 	}
29 | 	
30 | 	public int hashCode()
31 | 	{
32 | 		return new Integer(entityID).hashCode();
33 | 	}
34 | 	
35 | 	public String toString() 
36 | 	{
37 | 		StringBuilder res = new StringBuilder(entityName+"("+score+")");
38 | 		return res.toString();
39 | 	}
40 | }


--------------------------------------------------------------------------------
/genrate_fragments/step7_get_predicate_fragment.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf-8
 2 | en2t = {}
 3 | with open('input entity fragment','r') as f:
 4 | 	for line in f:
 5 | 		dou = line[:-1].split('\t')
 6 | 		types = dou[1].replace('|','#').split('#')[4]
 7 | 		typeset = types.split(',')
 8 | 		en2t[dou[0]] = set()
 9 | 		for t in typeset:
10 | 			if len(t)<6 and t!='-1' and len(t)>0:
11 | 				en2t[dou[0]].add(t)
12 | sen = set()
13 | lisen = {}
14 | for i in range(408261):#iterate every predicate
15 | 	lisen['%d'%i] = set()
16 | 
17 | with open('triple file represented by ids here','r') as f:
18 | 	i = 1
19 | 	for line in f:
20 | 		if i%100000==0:
21 | 			print(i)
22 | 		tri = line[:-1].split('\t')
23 | 		if tri[0]!='-1':
24 | 			pre = '['+','.join(en2t[tri[0]])+']'
25 | 		else:
26 | 			pre = '[]'
27 | 		if tri[2]!='-1':
28 | 			pos = '['+','.join(en2t[tri[2]])+']\n'
29 | 			str = pre + '\t' + tri[1] + '\t' + pos
30 | 			sen.add(str)
31 | 		else:
32 | 			lisen[tri[1]].add(tri[0])
33 | 
34 | for k in lisen.keys():
35 | 	str = '['+','.join(lisen[k])+']\t'+k+'\tliteral\n'
36 | 	sen.add(str)
37 | 
38 | with open('output predicate fragment file','w') as f:
39 | 	for item in sen:
40 | 		f.write(item)
41 | 	print(len(sen))
42 | 		
43 | 		
44 | 


--------------------------------------------------------------------------------
/genrate_fragments/step6_get_type_fragment.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf-8
 2 | en2t = {}
 3 | with open('input entity fragment file here','r') as f:
 4 | 	for line in f:
 5 | 		dou = line[:-1].split('\t')
 6 | 		types = dou[1].replace('|','#').split('#')[4]
 7 | 		typeset = types.split(',')
 8 | 		en2t[dou[0]] = set()
 9 | 		for t in typeset:
10 | 			if len(t)<6 and t!='-1' and len(t)>0:
11 | 				en2t[dou[0]].add(t)
12 | print("en2t loaded\n")
13 | lisen = {}
14 | for i in range(26043):#iterate every basic type
15 | 	lisen['%d'%i] = [set(),set(),set()]
16 | 
17 | with open('triple file represented by ids here','r') as f:
18 | 	i = 1
19 | 	for line in f:
20 | 		if i%100000 == 0:
21 | 			print(i)
22 | 		i += 1
23 | 		tri = line[:-1].split('\t')
24 | 		if tri[1]!='208518':
25 | 			for t in en2t[tri[0]]:
26 | 				if len(t)<=5:
27 | 					lisen[t][1].add(tri[1])
28 | 					lisen[t][2].add(tri[0])
29 | 			if tri[2]!='-1':
30 | 				for t in en2t[tri[2]]:
31 | 					if len(t)<=5:
32 | 						lisen[t][0].add(tri[1])
33 | 						lisen[t][2].add(tri[2])
34 | 
35 | with open('output type fragment','w') as f:
36 | 	for k in lisen.keys():
37 | 		f.write(k+'\t'+','.join(lisen[k][0])+'|'+','.join(lisen[k][1])+'|'+','.join(lisen[k][2])+'\n')
38 | 	print(len(lisen))
39 | 		
40 | 		
41 | 


--------------------------------------------------------------------------------
/src/nlp/tool/Main.java:
--------------------------------------------------------------------------------
 1 | package nlp.tool;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.IOException;
 5 | import java.io.InputStreamReader;
 6 | 
 7 | import nlp.ds.DependencyTree;
 8 | import nlp.ds.Sentence;
 9 | import qa.Globals;
10 | 
11 | public class Main {
12 | 	public static void main (String[] args) {
13 | 		Globals.init();
14 | 		BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
15 | 		try {
16 | 			while (true) {
17 | 				System.out.println("Test maltparser.");
18 | 				System.out.print("Please input the NL question: ");
19 | 				String question = br.readLine();
20 | 				if (question.length() <= 3)
21 | 					break;
22 | 				try {
23 | 					long t1 = System.currentTimeMillis();
24 | 					Sentence s = new Sentence(question);
25 | 					DependencyTree dt = new DependencyTree(s, Globals.stanfordParser);
26 | 					System.out.println("====StanfordDependencies====");
27 | 					System.out.println(dt);
28 | 					DependencyTree dt2 = new DependencyTree(s, Globals.maltParser);
29 | 					System.out.println("====MaltDependencies====");
30 | 					System.out.println(dt2);
31 | 					long t2 = System.currentTimeMillis();
32 | 					System.out.println("time=" + (t2-t1) + "ms");
33 | 				} catch (Exception e) {
34 | 					e.printStackTrace();
35 | 				}
36 | 			}
37 | 		} catch (IOException e) {
38 | 			e.printStackTrace();	
39 | 		}
40 | 	}
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/src/qa/mapping/EntityFragmentDict.java:
--------------------------------------------------------------------------------
 1 | package qa.mapping;
 2 | 
 3 | import java.util.HashMap;
 4 | 
 5 | import fgmt.EntityFragment;
 6 | 
 7 | public class EntityFragmentDict {
 8 | 	//public HashMap<String, EntityFragment> entityFragmentDictionary = new HashMap<String, EntityFragment>();
 9 | 	public HashMap<Integer, EntityFragment> entityFragmentDictionary = new HashMap<Integer, EntityFragment>();
10 | 	
11 | 	public EntityFragment getEntityFragmentByEid (Integer eid) 
12 | 	{
13 | 		if (!entityFragmentDictionary.containsKey(eid)) 
14 | 		{
15 | 			entityFragmentDictionary.put(eid, EntityFragment.getEntityFragmentByEntityId(eid));
16 | 		}
17 | 		return entityFragmentDictionary.get(eid);
18 | 
19 | 	}
20 | 	
21 | 	/*
22 | 	 * Old version, search by name
23 | 	 * */
24 | //	public EntityFragment getEntityFragmentByName (String name) {
25 | //		if (name.startsWith("?")) {
26 | //			return null;
27 | //		}
28 | //		if (!entityFragmentDictionary.containsKey(name)) {
29 | //			String fgmt = EntityFragment.getEntityFgmtStringByName(name);
30 | //			if (fgmt != null) 
31 | //			{
32 | //				int eid = EntityFragmentFields.entityName2Id.get(name);
33 | //				entityFragmentDictionary.put(name, new EntityFragment(eid, fgmt));
34 | //			}
35 | //			else {
36 | //				entityFragmentDictionary.put(name, null);
37 | //			}
38 | //		}
39 | //		return entityFragmentDictionary.get(name);
40 | //
41 | //	}
42 | }
43 | 


--------------------------------------------------------------------------------
/genrate_fragments/step3_split.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | '''
 3 | Step3: extract entity, type and predicate out of the original triple files and allocate ids
 4 | '''
 5 | entities = set()
 6 | types = set()
 7 | predicate = set()
 8 | with open('triple file here','r') as f:
 9 | 	i = 1
10 | 	k = 0
11 | 	for line in f.readlines():
12 | 		tri = line[:-2].split('\t')
13 | 		entities.add(tri[0])
14 | 		predicate.add(tri[1])
15 | 		if len(tri)==2:
16 | 			print("%s:%d"%(line,i))
17 | 			i += 1
18 | 			k += 1
19 | 			print(tri)
20 | 			continue
21 | 		if '"' in tri[2][0] or '"' in tri[2][0]:
22 | 			continue
23 | 		entities.add(tri[2])
24 | 		if tri[1]=='<type>':
25 | 			types.add(tri[2])
26 | 		if i%10000 == 0:
27 | 			print(i)
28 | 		i += 1
29 | 	print(i)
30 | 	print(k)
31 | 
32 | e = open('entity id file','w')
33 | t = open('type id file','w')
34 | p = open('predicate id file','w')
35 | 
36 | k = 0
37 | for item in entities:
38 | 	if item[-1]!='\n':
39 | 		e.write(item+'\t%d'%k+'\n')
40 | 	else:
41 | 		e.write(item[:-1]+'\t%d'%k+'\n')
42 | 	k += 1
43 | 
44 | k = 0	
45 | for item in types:
46 | 	if item[-1]!='\n':
47 | 		t.write(item+'\t%d'%k+'\n')
48 | 	else:
49 | 		t.write(item[:-1]+'\t%d'%k+'\n')
50 | 	k += 1
51 | 
52 | k = 0	
53 | for item in predicate:
54 | 	if item[-1]!='\n':
55 | 		p.write(item+'\t%d'%k+'\n')
56 | 	else:
57 | 		p.write(item[:-1]+'\t%d'%k+'\n')
58 | 	k += 1
59 | 


--------------------------------------------------------------------------------
/genrate_fragments/step4_triple_to_number.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf-8
 2 | '''
 3 | Step4: transform the triples and represent entity, type and predicate with id
 4 | '''
 5 | eid = {}
 6 | tid = {}
 7 | pid = {}
 8 | 
 9 | with open('entity id file here','r') as e:
10 | 	for line in e:
11 | 		dub = line[:-1].split('\t')
12 | 		eid[dub[0]] = dub[1]
13 | 		
14 | 
15 | with open('type id file here','r') as t:
16 | 	for line in t:
17 | 		dub = line[:-1].split('\t')
18 | 		tid[dub[0]] = dub[1]
19 | 		
20 | 
21 | with open('predicate id file here','r') as p:
22 | 	for line in p:
23 | 		dub = line[:-1].split('\t')
24 | 		pid[dub[0]] = dub[1]
25 | 
26 | print("%d %d %d"%(len(eid),len(tid),len(pid)))
27 | 
28 | rt = open("output triple file here",'w')
29 | with open('input triple file here','r') as f:
30 | 	i = 1;
31 | 	for line in f:
32 | 		tri = line[:-2].split('\t')
33 | 		if tri[1] == '<type>':
34 | 			if not tid.has_key(tri[2]):
35 | 				tid[tri[2]] = '-1'
36 | 			try:
37 | 				rt.write("%s\t%s\t%s\n"%(eid[tri[0]],pid[tri[1]],tid[tri[2]]))
38 | 			except KeyError:
39 | 				print(line)
40 | 				print(i)
41 | 		else:
42 | 			if tri[2][0]=='"':
43 | 				try:
44 | 					rt.write("%s\t%s\t-1\n"%(eid[tri[0]],pid[tri[1]]))
45 | 				except KeyError:
46 | 					print(line)
47 | 					print(i)
48 | 			else:
49 | 				try:
50 | 					rt.write("%s\t%s\t%s\n"%(eid[tri[0]],pid[tri[1]],eid[tri[2]]))
51 | 				except KeyError:
52 | 					print(line)
53 | 					print(i)
54 | 	
55 | 


--------------------------------------------------------------------------------
/src/fgmt/VariableFragment.java:
--------------------------------------------------------------------------------
 1 | package fgmt;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Collections;
 5 | import java.util.HashSet;
 6 | import java.util.Iterator;
 7 | 
 8 | public class VariableFragment extends Fragment {
 9 | 	public static final int magic_number = -265;
10 | 
11 | 	public ArrayList<HashSet<Integer>> candTypes = null;
12 | 	public HashSet<Integer> candEntities = null;
13 | 	public boolean mayLiteral = false;
14 | 	
15 | 	public VariableFragment() 
16 | 	{
17 | 		fragmentType = typeEnum.VAR_FRAGMENT;
18 | 		candTypes = new ArrayList<HashSet<Integer>>();
19 | 		candEntities = new HashSet<Integer>();
20 | 	}
21 | 	
22 | 	@Override
23 | 	public String toString() 
24 | 	{
25 | 		return "("+ candEntities.size() +")";
26 | 	}
27 | 	
28 | 	public boolean containsAll(HashSet<Integer> s1) {
29 | 		Iterator<HashSet<Integer>> it = candTypes.iterator();
30 | 		while(it.hasNext()) {
31 | 			HashSet<Integer> s2 = it.next();
32 | 			if (s2.contains(magic_number)) {
33 | 				if (!Collections.disjoint(s1, s2)) {
34 | 					return true;
35 | 				}
36 | 			}
37 | 			else {
38 | 				if (s1.containsAll(s2) && s2.containsAll(s1)) {
39 | 					return true;
40 | 				}
41 | 			}
42 | 		}
43 | 		return false;
44 | 	}
45 | 	
46 | 	public boolean contains(Integer i) {
47 | 		Iterator<HashSet<Integer>> it = candTypes.iterator();
48 | 		while(it.hasNext()) {
49 | 			HashSet<Integer> s = it.next();
50 | 			if (s.contains(i)) {
51 | 				return true;
52 | 			}
53 | 		}
54 | 		return false;		
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/src/rdf/TypeMapping.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import qa.Globals;
 4 | 
 5 | public class TypeMapping implements Comparable<TypeMapping> 
 6 | {
 7 | 	public Integer typeID = null;
 8 | 	public String typeName = null;
 9 | 	public double score = 0;
10 | 	
11 | 	/*
12 | 	 * 1, For standard type (DBO type in DBpedia), relation = typePredicateID (rdf:type)
13 | 	 * 2, For nonstandard type, typeID = -1
14 | 	 * 3, If add type into triples, need relation | eg, Which professional surfers were born in Australia? (?uri dbo:occupation res:Surfing) relation = dbo:occupation
15 | 	 * 4, If needn't add type, relation = -1 | eg, Who was the father of [Queen] Elizabeth II
16 | 	 * */
17 | 	public int prefferdRelation = Globals.pd.typePredicateID; 
18 | 	
19 | 	public TypeMapping(Integer tid, String type, double sco) 
20 | 	{
21 | 		typeID = tid;
22 | 		typeName = type;
23 | 		score = sco;
24 | 	}
25 | 	
26 | 	public TypeMapping(Integer tid, String type, Integer relation, double sco) 
27 | 	{
28 | 		typeID = tid;
29 | 		typeName = type.replace("_", "");
30 | 		score = sco;
31 | 		prefferdRelation = relation;
32 | 	}
33 | 	
34 | 	// In descending order: big --> small
35 | 	public int compareTo(TypeMapping o) 
36 | 	{
37 | 		double diff = this.score - o.score;
38 | 		if (diff > 0) return -1;
39 | 		else if (diff < 0) return 1;
40 | 		else return 0;
41 | 	}
42 | 	
43 | 	public int hashCode()
44 | 	{
45 | 		return typeID.hashCode();
46 | 	}
47 | 	
48 | 	public String toString() 
49 | 	{
50 | 		StringBuilder res = new StringBuilder(typeName+"("+score+")");
51 | 		return res.toString();
52 | 	}
53 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018, 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/src/lcn/Main.java:
--------------------------------------------------------------------------------
 1 | package lcn;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Scanner;
 5 | 
 6 | import qa.Globals;
 7 | import qa.mapping.EntityFragmentDict;
 8 | 
 9 | 
10 | public class Main {
11 | 	//Test: searching Entities and Types through Lucene Index.
12 | 	public static void main(String[] aStrings) throws Exception{
13 | 		
14 | 		//SearchInLiteralSubset se = new SearchInLiteralSubset();
15 | 		SearchInTypeShortName st = new SearchInTypeShortName();
16 | 		SearchInEntityFragments sf = new SearchInEntityFragments();
17 | 		EntityFragmentDict  efd = new EntityFragmentDict();
18 | 		EntityFragmentFields eff = null;
19 | 		Globals.localPath = "D:/husen/gAnswer/";
20 | 		Scanner sc = new Scanner(System.in);
21 | 		System.out.print("input name: ");
22 | 		
23 | 		while(sc.hasNextLine())
24 | 		{	
25 | 			String literal = sc.nextLine();
26 | 			System.out.println(literal);
27 | 			
28 | 			//literal = cnlp.getBaseFormOfPattern(literal);
29 | 			
30 | //search Type	
31 | 			ArrayList<String> result = st.searchType(literal, 0.4, 0.8, 10);
32 | 			System.out.println("TypeShortName-->RESULT:");
33 | 			for (String s : result) {
34 | 				System.out.println("<"+s + ">");
35 | 			}
36 | 
37 | //search Ent Fragment
38 | //			int eId = EntityFragmentFields.entityName2Id.get(literal);
39 | //			EntityFragment ef = EntityFragment.getEntityFragmentByEntityId(eId);
40 | //			System.out.println(ef);
41 | 
42 | //search Ent Name
43 | //			ArrayList<EntityNameAndScore> result = sf.searchName(literal, 0.4, 0.8, 50);
44 | //			System.out.println("EntityName-->RESULT:");
45 | //			for(EntityNameAndScore enas: result)
46 | //			{
47 | //				System.out.println(enas);
48 | //			}
49 | 			
50 | 			System.out.print("input name: ");
51 | 		}
52 | 		sc.close();
53 | 	}	
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/rdf/SemanticUnit.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.HashMap;
 5 | 
 6 | import rdf.SemanticRelation;
 7 | import nlp.ds.DependencyTreeNode;
 8 | import nlp.ds.Word;
 9 | 
10 | public class SemanticUnit 
11 | {
12 | 	public Word centerWord = null;
13 | 	public ArrayList<DependencyTreeNode> describeNodeList = new ArrayList<DependencyTreeNode>();
14 | 	public ArrayList<SemanticUnit> neighborUnitList = new ArrayList<SemanticUnit>();
15 | 	public HashMap<Word, SemanticRelation> RelationList = new HashMap<Word, SemanticRelation>();
16 | 	
17 | 	public boolean isSubj = true;
18 | 	public Integer prefferdType = null;
19 | 	
20 | 	public SemanticUnit(Word center, boolean isSubJ)
21 | 	{
22 | 		centerWord = center;
23 | 		isSubj = isSubJ;
24 | 	}
25 | 	
26 | 	public SemanticUnit copy()
27 | 	{
28 | 		SemanticUnit su = new SemanticUnit(this.centerWord, this.isSubj);
29 | 		su.describeNodeList = (ArrayList<DependencyTreeNode>) this.describeNodeList.clone();
30 | 		su.neighborUnitList = (ArrayList<SemanticUnit>) this.neighborUnitList.clone();
31 | 		su.RelationList = (HashMap<Word, SemanticRelation>) this.RelationList.clone();
32 | 		return su;
33 | 	}
34 | 	
35 | 	@Override
36 | 	public int hashCode() {
37 | 		return centerWord.hashCode();
38 | 	}
39 | 	
40 | 	@Override
41 | 	public boolean equals(Object o) {
42 | 		if (o instanceof SemanticUnit) {
43 | 			SemanticUnit su2 = (SemanticUnit) o;
44 | 			if(this.centerWord.equals(su2.centerWord))
45 | 				return true;
46 | 		}
47 | 		return false;
48 | 	}
49 | 	
50 | 	@Override
51 | 	public String toString() 
52 | 	{
53 | 		String ret = "<" + centerWord + ", {";
54 | 		for(SemanticUnit su: neighborUnitList)
55 | 			ret += su.centerWord + ", ";
56 | 		ret += "}>";
57 | 		
58 | 		return ret;
59 | 	}
60 | 	
61 | }
62 | 


--------------------------------------------------------------------------------
/src/rdf/ImplicitRelation.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import fgmt.TypeFragment;
 4 | import qa.Globals;
 5 | import lcn.EntityFragmentFields;
 6 | 
 7 | public class ImplicitRelation {
 8 | 
 9 | 	public String subj = null;
10 | 	public String obj = null;
11 | 	
12 | 	public int pId = -1;
13 | 	public double score = 0;
14 | 	
15 | 	//Role :  1|ent , 2|type_ , 3|var
16 | 	public enum roleEnum {ENTITY, TYPE_CONSTANT, TYPE_VARIABLE, VARIABLE}; 
17 | 	public int subjRole = -1;
18 | 	public int objRole = -1;
19 | 	public int subjId = -1;
20 | 	public int objId = -1;
21 | 	
22 | 	public ImplicitRelation(String s, String o, int pid, double sc)
23 | 	{
24 | 		pId = pid;
25 | 		subj = s;
26 | 		obj = o;
27 | 		score = sc;
28 | 		subjId = EntityFragmentFields.entityName2Id.get(s);
29 | 		if(pId != Globals.pd.typePredicateID)
30 | 			objId = EntityFragmentFields.entityName2Id.get(o);
31 | 		else
32 | 			objId = TypeFragment.typeShortName2IdList.get(o).get(0);
33 | 	}
34 | 	
35 | 	public ImplicitRelation(Integer sId, Integer oId, int pid, double sc)
36 | 	{
37 | 		pId = pid;
38 | 		subjId = sId;
39 | 		objId = oId;
40 | 		score = sc;
41 | 	}
42 | 	
43 | 	public void setSubjectId(Integer s)
44 | 	{
45 | 		subjId = s;
46 | 	}
47 | 	
48 | 	public void setObjectId(Integer o)
49 | 	{
50 | 		objId = o;
51 | 	}
52 | 	
53 | 	public void setSubject(String s)
54 | 	{
55 | 		subj = s;
56 | 	}
57 | 	
58 | 	public void setObject(String o)
59 | 	{
60 | 		obj = o;
61 | 	}
62 | 	
63 | 	public int hashCode() 
64 | 	{
65 | 		return new Integer(pId).hashCode() ^ new Integer(subjId).hashCode() ^ new Integer(objId).hashCode();
66 | 	} 
67 | 	
68 |     @Override 
69 |     public boolean equals(Object ir) 
70 |     { 
71 |         ImplicitRelation tmpIr = (ImplicitRelation) ir; 
72 |         if (pId == tmpIr.pId && subjId == tmpIr.subjId && objId == tmpIr.objId) 
73 |         	return true; 
74 |         else return false; 
75 |     } 
76 |     
77 | }
78 | 


--------------------------------------------------------------------------------
/src/nlp/tool/NERecognizer.java:
--------------------------------------------------------------------------------
 1 | package nlp.tool;
 2 | 
 3 | import java.util.List;
 4 | 
 5 | import qa.Globals;
 6 | 
 7 | import nlp.ds.Sentence;
 8 | import nlp.ds.Word;
 9 | 
10 | import edu.stanford.nlp.ie.AbstractSequenceClassifier;
11 | import edu.stanford.nlp.ie.crf.CRFClassifier;
12 | import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
13 | import edu.stanford.nlp.ling.CoreAnnotations.PositionAnnotation;
14 | import edu.stanford.nlp.ling.CoreLabel;
15 | 
16 | public class NERecognizer {
17 | 	
18 | 	static String serializedClassifier;
19 | 	static AbstractSequenceClassifier<CoreLabel> classifier;
20 | 	//public static String localPath="E:\\Hanshuo\\gAnswer\\";
21 | 		
22 | 	public NERecognizer() {
23 | 		serializedClassifier = Globals.localPath+"lib/stanford-ner-2012-11-11/classifiers/english.all.3class.distsim.crf.ser.gz";
24 | 		classifier  = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
25 | 	}
26 | 	
27 | 	/*public NERecognizer(String basePath, boolean flag) {
28 | 		serializedClassifier = "WEB-INF\\lib\\stanford-ner-2012-11-11\\stanford-ner-2012-11-11\\classifiers\\english.all.3class.distsim.crf.ser.gz";
29 | 	}*/
30 | 	
31 | 	public void recognize(Sentence sentence) {
32 | 		List<CoreLabel> lcl = classifier.classify(sentence.plainText).get(0);
33 | 		for (CoreLabel cl : lcl) {
34 | 			int position = Integer.parseInt(cl.get(PositionAnnotation.class))+1;
35 | 			Word w = sentence.getWordByIndex(position);
36 | 			String ner = cl.get(AnswerAnnotation.class);
37 | 			if (ner.equals("O")) w.ner = null;
38 | 			else w.ner = ner;
39 | 		}
40 | 	}
41 | 			
42 | 	public static void main(String[] args) {
43 | 		System.out.println("Test NER");
44 | 		Globals.init();
45 | 		
46 | 		Sentence s = new Sentence("I go to school at Stanford University, which is located in California.");//"Which states of Germany are governed by the Social Democratic Party?"
47 | 		Globals.nerRecognizer.recognize(s);
48 | 		for (Word word : s.words) {
49 | 			System.out.print(word + "   ");
50 | 			System.out.println("ner=" + word.ner);
51 | 		}
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/genrate_fragments/step1_clean_triple.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | '''
 3 | Step 1: Clean the triple file. In the dbpedia case, we just need the  part of resource URI that indicate entity/type/predicate names.
 4 | '''
 5 | fileName = []#List of triple files to be process
 6 | notRdf = open('./notRdf.txt','w')#Record the lines that refers to a type but not rdf:type 
 7 | for index2,fname in enumerate(fileName):
 8 | 	f = open('./'+fname)
 9 | 	triple = open('output triple files here','w')
10 | 	prefix_f = open('output prefix files here','w')# save the prefix in files in case of it may be useful in the future. 
11 | 	i = 0
12 | 	count = 0
13 | 	prefix_set = set()
14 | 	for line in f:
15 | 		if line[0] != '<':
16 | 			print(i)
17 | 			i = i + 1
18 | 			count += 1
19 | 			continue
20 | 		line = line[:-3].replace('> <','>$-$-$<').replace('> "','>$-$-$"')
21 | 		line = line.split('$-$-$')
22 | 		if i==0:
23 | 			i += 1
24 | 			continue
25 | 		new_line=[]
26 | 		if "type>" in line[1]:
27 | 			if "rdf" not in line[1]:
28 | 				notRdf.write(str(line)+'\n')
29 | 				continue
30 | 		for index,item in enumerate(line):
31 | 			if not item:
32 | 				count +=1
33 | 				break  
34 | 			if item[0]=='<':
35 | 				pos = item.rfind('/')
36 | 				word = item[pos+1:-1].split("#")
37 | 				if len(word)<2:
38 | 					new_line.append('<'+word[0]+'>')
39 | 				else:
40 | 					new_line.append('<'+word[1]+'>')
41 | 				if index == 1:
42 | 					tmp = new_line[1][1:len(new_line[1])-1]
43 | 					pos2 = line[1].rfind(tmp)
44 | 					prefix = line[1][1:pos2-1]
45 | 					prefix_set.add(tmp + '^^^'+prefix+'\n')
46 | 					continue
47 | 			elif item.count('"') >=2:
48 | 				item = item.split('^^')[0].split('@')[0]
49 | 				pattern = re.compile('"(.*)"')
50 | 				word = '"'+''.join(pattern.findall(item))+'"'
51 | 				new_line.append(word)
52 | 				continue
53 | 			else:
54 | 				print(i)
55 | 		i += 1
56 | 		#print('\t'.join(new_line))
57 | 		if i%1000000==0:
58 | 			print("%d:%d"%(8,i))
59 | 		triple.write('\t'.join(new_line)+'\n')
60 | 	for item in prefix_set:
61 | 		prefix_f.write(item)
62 | 	f.close()
63 | 	triple.close()
64 | 	prefix_f.close()
65 |     
66 | 


--------------------------------------------------------------------------------
/src/nlp/tool/StanfordParser.java:
--------------------------------------------------------------------------------
 1 | package nlp.tool;
 2 | 
 3 | import java.io.StringReader;
 4 | import java.util.List;
 5 | 
 6 | import edu.stanford.nlp.ling.CoreLabel;
 7 | import edu.stanford.nlp.objectbank.TokenizerFactory;
 8 | import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
 9 | import edu.stanford.nlp.process.CoreLabelTokenFactory;
10 | import edu.stanford.nlp.process.PTBTokenizer;
11 | import edu.stanford.nlp.trees.GrammaticalStructure;
12 | import edu.stanford.nlp.trees.GrammaticalStructureFactory;
13 | import edu.stanford.nlp.trees.PennTreebankLanguagePack;
14 | import edu.stanford.nlp.trees.Tree;
15 | import edu.stanford.nlp.trees.TreebankLanguagePack;
16 | 
17 | public class StanfordParser {
18 | 	private LexicalizedParser lp;
19 | 	private TokenizerFactory<CoreLabel> tokenizerFactory;
20 | 	private TreebankLanguagePack tlp;
21 | 	private GrammaticalStructureFactory gsf;
22 | 	
23 | 	public StanfordParser() {
24 | 		lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
25 | 	    tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
26 | 	    tlp = new PennTreebankLanguagePack();
27 | 	    gsf = tlp.grammaticalStructureFactory();
28 | 	}
29 | 	
30 | 	public GrammaticalStructure getGrammaticalStructure (String sentence) {
31 | 	    List<CoreLabel> rawWords2 = 
32 | 		      tokenizerFactory.getTokenizer(new StringReader(sentence)).tokenize();
33 | 	    // Converts a Sentence/List/String into a Tree.
34 | 	    // In all circumstances, the input will be treated as a single sentence to be parsed.
35 | 	    Tree parse = lp.apply(rawWords2);
36 | 
37 | 	    return gsf.newGrammaticalStructure(parse);
38 | 	    /*List<TypedDependency> tdl = gs.typedDependencies(false);
39 | 	    for (TypedDependency td : tdl) {
40 | 	    	System.out.println(td.reln().getShortName()+"("+td.gov()+","+td.dep()+")");
41 | 	    	System.out.println("gov="+td.gov()
42 | 	    			+"\tgov.index="
43 | 	    			+td.gov().index()
44 | 	    			+"\tgov.value="
45 | 	    			+td.gov().value()
46 | 	    			+"\tgov.pos="
47 | 	    			+((TreeGraphNode)td.gov().parent()).value());
48 | 	    }*/
49 | 	    //System.out.println(tdl);
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/src/nlp/ds/Sentence.java:
--------------------------------------------------------------------------------
 1 | package nlp.ds;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.HashMap;
 5 | 
 6 | import qa.Globals;
 7 | import qa.Query;
 8 | import rdf.MergedWord;
 9 | 
10 | public class Sentence {
11 | 	public String plainText = null;
12 | 	public Word[] words = null;
13 | 	public HashMap<String, Word> map = null;
14 | 	
15 | 	public DependencyTree dependencyTreeStanford = null;
16 | 	public DependencyTree dependencyTreeMalt = null;
17 | 	
18 | 	public enum SentenceType {SpecialQuestion,GeneralQuestion,ImperativeSentence}
19 | 	public SentenceType sentenceType = SentenceType.SpecialQuestion;
20 | 	
21 | 	public Sentence (String s) 
22 | 	{
23 | 		plainText = s;
24 | 		words = Globals.coreNLP.getTaggedWords(plainText);
25 | 		map = new HashMap<String, Word>();
26 | 		for (Word w : words)
27 | 			map.put(w.key, w);
28 | 	}
29 | 	
30 | 	public Sentence (Query query, String s)
31 | 	{
32 | 		plainText = s;
33 | 		words = Globals.coreNLP.getTaggedWords(plainText);
34 | 		// inherit NodeRecognition's information
35 | 		for(Word word: words)
36 | 		{
37 | 			for(MergedWord mWord: query.mWordList)
38 | 			{
39 | 				if(word.originalForm.equals(mWord.name))
40 | 				{
41 | 					word.mayLiteral = mWord.mayLiteral;
42 | 					word.mayEnt = mWord.mayEnt;
43 | 					word.mayType = mWord.mayType;
44 | 					word.mayCategory = mWord.mayCategory;
45 | 					word.tmList = mWord.tmList;
46 | 					word.emList = mWord.emList;
47 | 					word.category = mWord.category;
48 | 				}
49 | 			}
50 | 		}
51 | 		map = new HashMap<String, Word>();
52 | 		for (Word w : words)
53 | 			map.put(w.key, w);
54 | 	}
55 | 	public ArrayList<Word> getWordsByString (String w) {
56 | 		ArrayList<Word> ret = new ArrayList<Word>();
57 | 		for (Word wo: words) {
58 | 			if (wo.originalForm.equals(w)) ret.add(wo);
59 | 		}
60 | 		return ret;
61 | 	}
62 | 	
63 | 	public Word getWordByIndex (int idx) {
64 | 		return words[idx-1];
65 | 	}
66 | 	
67 | 	public Word getWordByKey (String k) {
68 | 		return map.get(k);
69 | 	}
70 | 	
71 | 	public boolean hasModifier(Word w)
72 | 	{
73 | 		for(Word word: words)
74 | 			if(word!=w && word.modifiedWord==w)
75 | 				return true;
76 | 		return false;
77 | 	}
78 | 	
79 | 	public void printNERResult () {
80 | 		for (Word word : words) {
81 | 			System.out.print(word + "   ");
82 | 			System.out.println("ner=" + word.ner);
83 | 		}
84 | 	}		
85 | }
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/src/lcn/EntityFragmentFields.java:
--------------------------------------------------------------------------------
 1 | package lcn;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.File;
 5 | import java.io.FileInputStream;
 6 | import java.io.IOException;
 7 | import java.io.InputStreamReader;
 8 | import java.util.HashMap;
 9 | 
10 | import qa.Globals;
11 | 
12 | public class EntityFragmentFields {
13 | 		
14 | 	// entity dictionary
15 | 	public static HashMap<String, Integer> entityName2Id = null;
16 | 	public static HashMap<Integer, String> entityId2Name = null;
17 | 	public static HashMap<Integer, String> entityFragmentString = null;
18 | 	
19 | 	public static void load() throws IOException 
20 | 	{
21 | 		String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16entity_id.txt";
22 | 		String fragmentFileName = Globals.localPath+"data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt";
23 | 		File file = new File(filename);
24 | 		BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file),"utf-8"));
25 | 
26 | 		entityName2Id = new HashMap<String, Integer>();
27 | 		entityId2Name = new HashMap<Integer, String>();
28 | 
29 | 		long t1, t2, t3;
30 | 		
31 | 		t1 = System.currentTimeMillis();
32 | 		// load entity id
33 | 		System.out.println("Loading entity id ...");
34 | 		String line;
35 | 		while((line = br.readLine()) != null) 
36 | 		{
37 | 			String[] lines = line.split("\t");
38 | 			String entName = lines[0].substring(1, lines[0].length()-1);
39 | 	
40 | 			entityName2Id.put(entName, Integer.parseInt(lines[1]));	
41 | 			entityId2Name.put(Integer.parseInt(lines[1]), entName);
42 | 		}
43 | 		br.close();
44 | 		t2 = System.currentTimeMillis();
45 | 		System.out.println("Load "+entityId2Name.size()+" entity ids in "+ (t2-t1) + "ms.");
46 | 		
47 | 		// load entity fragment
48 | 		System.out.println("Loading entity fragments ...");
49 | 		br = new BufferedReader(new InputStreamReader(new FileInputStream(fragmentFileName),"utf-8"));
50 | 		entityFragmentString = new HashMap<Integer, String>();
51 | 		while((line = br.readLine()) != null)
52 | 		{
53 | 			String[] lines = line.split("\t");
54 | 			if(lines.length != 2)
55 | 				continue;
56 | 			int eId = Integer.parseInt(lines[0]);
57 | 			entityFragmentString.put(eId, lines[1]);
58 | 		}
59 | 		t3 = System.currentTimeMillis();
60 | 		System.out.println("Load "+entityFragmentString.size()+" entity fragments in "+ (t3-t2) + "ms.");
61 | 		
62 | 		br.close();
63 | 	}
64 | }
65 | 


--------------------------------------------------------------------------------
/src/nlp/tool/MaltParser.java:
--------------------------------------------------------------------------------
 1 | package nlp.tool;
 2 | 
 3 | 
 4 | import nlp.ds.Sentence;
 5 | import nlp.ds.Word;
 6 | 
 7 | import org.maltparser.MaltParserService;
 8 | import org.maltparser.core.exception.MaltChainedException;
 9 | import org.maltparser.core.syntaxgraph.DependencyStructure;
10 | 
11 | import qa.Globals;
12 | 
13 | public class MaltParser {
14 | 	private MaltParserService service = null;
15 | 	public MaltParser() {
16 | 		try
17 | 		{
18 | 			System.out.print("Loading MaltParser ...");
19 | 			service = new MaltParserService();
20 | 			// Inititalize the parser model 'model0' and sets the working directory to '.' and sets the logging file to 'parser.log'
21 | 			//service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w . -lfi parser.log");
22 | 			service.initializeParserModel("-c engmalt.linear-1.7 -m parse -w "+Globals.localPath+"lib/maltparser-1.9.1 -lfi parser.log");
23 | 			firstParse();
24 | 			System.out.println("ok!");
25 | 		} catch (MaltChainedException e) {
26 | 			e.printStackTrace();
27 | 			System.err.println("MaltParser exception: " + e.getMessage());
28 | 		}
29 | 	}
30 | 	
31 | 	private void firstParse() {
32 | 		String[] tokens = new String[12];
33 | 		tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 
34 | 		tokens[1] = "2\twhich\t_\tWDT\tWDT\t_";
35 | 		tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_";
36 | 		tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_";
37 | 		tokens[4] = "5\tby\t_\tIN\tIN\t_";
38 | 		tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_";
39 | 		tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_";
40 | 		tokens[7] = "8\twas\t_\tVBD\tVBD\t_";
41 | 		tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_";
42 | 		tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_";
43 | 		tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_";
44 | 		tokens[11] = "12\t?\t_\t.\t.\t_";
45 | 		try {
46 | 			service.parse(tokens);
47 | 		} catch (MaltChainedException e) {
48 | 			e.printStackTrace();
49 | 		}
50 | 	}
51 | 	
52 | 	public DependencyStructure getDependencyStructure (Sentence sentence) {
53 | 		try {
54 | 			return service.parse(getTaggedTokens(sentence));
55 | 		} catch (MaltChainedException e) {
56 | 			e.printStackTrace();
57 | 		}
58 | 		return null;
59 | 	}
60 | 	
61 | 	private String[] getTaggedTokens (Sentence sentence) {
62 | 		String[] ret = new String[sentence.words.length];
63 | 		int count = 0;
64 | 		for (Word w : sentence.words) {
65 | 			ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_");
66 | 			count ++;
67 | 		}
68 | 		return ret;
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/src/nlp/tool/MaltParserCon.java:
--------------------------------------------------------------------------------
 1 | package nlp.tool;
 2 | 
 3 | import java.io.File;
 4 | import java.net.URL;
 5 | 
 6 | import nlp.ds.Sentence;
 7 | import nlp.ds.Word;
 8 | 
 9 | import org.maltparser.concurrent.ConcurrentMaltParserModel;
10 | import org.maltparser.concurrent.ConcurrentMaltParserService;
11 | import org.maltparser.concurrent.graph.ConcurrentDependencyGraph;
12 | import org.maltparser.core.exception.MaltChainedException;
13 | //import org.maltparser.core.syntaxgraph.DependencyStructure;
14 | 
15 | 
16 | public class MaltParserCon {
17 | 	private ConcurrentMaltParserModel model = null;
18 | 	public ConcurrentDependencyGraph outputGraph = null;
19 | 	
20 | 	public MaltParserCon(){
21 | 		try{
22 | 			System.out.println("Loading Maltparser...\n");
23 | 			URL ModelURL = new File("output/engmalt.linear-1.7.mco").toURI().toURL();
24 | 			model = ConcurrentMaltParserService.initializeParserModel(ModelURL);
25 | 			firstTest();
26 | 			System.out.println("ok!\n");
27 | 		}catch(Exception e){
28 | 			e.printStackTrace();
29 | 			System.err.println("MaltParser exception: " + e.getMessage());
30 | 		}
31 | 	}
32 | 	
33 | 	private void firstTest(){
34 | 		String[] tokens = new String[12];
35 | 		tokens[0] = "1\tIn\t_\tIN\tIN\t_"; 
36 | 		tokens[1] = "2\twhich\t_\tWDT\tWDT\t_";
37 | 		tokens[2] = "3\tmovies\t_\tNNS\tNNS\t_";
38 | 		tokens[3] = "4\tdirected\t_\tVBN\tVBN\t_";
39 | 		tokens[4] = "5\tby\t_\tIN\tIN\t_";
40 | 		tokens[5] = "6\tGarry\t_\tNNP\tNNP\t_";
41 | 		tokens[6] = "7\tMarshall\t_\tNNP\tNNP\t_";
42 | 		tokens[7] = "8\twas\t_\tVBD\tVBD\t_";
43 | 		tokens[8] = "9\tJulia\t_\tNNP\tNNP\t_";
44 | 		tokens[9] = "10\tRoberts\t_\tNNP\tNNP\t_";
45 | 		tokens[10] = "11\tstarring\t_\tVBG\tVBG\t_";
46 | 		tokens[11] = "12\t?\t_\t.\t.\t_";
47 | 		try {
48 | 			outputGraph = model.parse(tokens);
49 | 		} catch (Exception e) {
50 | 			e.printStackTrace();
51 | 		}
52 | 		System.out.println(outputGraph);
53 | 	}
54 | 	
55 | 	public ConcurrentDependencyGraph getDependencyStructure (Sentence sentence) {
56 | 		try {
57 | 			return model.parse(getTaggedTokens(sentence));
58 | 		} catch (MaltChainedException e) {
59 | 			e.printStackTrace();
60 | 		}
61 | 		return null;
62 | 	}
63 | 	
64 | 	private String[] getTaggedTokens (Sentence sentence) {
65 | 		String[] ret = new String[sentence.words.length];
66 | 		int count = 0;
67 | 		for (Word w : sentence.words) {
68 | 			ret[count] = new String(""+w.position+"\t"+w.originalForm+"\t_\t"+w.posTag+"\t"+w.posTag+"\t_");
69 | 			count ++;
70 | 		}
71 | 		return ret;
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/src/application/GinfoHandler.java:
--------------------------------------------------------------------------------
 1 | package application;
 2 | import java.io.IOException;  
 3 | 
 4 | import javax.servlet.ServletException;  
 5 | import javax.servlet.http.HttpServletRequest;  
 6 | import javax.servlet.http.HttpServletResponse;  
 7 | 
 8 | import log.QueryLogger;
 9 | 
10 | import org.json.*;
11 | import org.eclipse.jetty.server.Request;  
12 | import org.eclipse.jetty.server.handler.AbstractHandler;
13 | 
14 | import qa.Globals;
15 | 
16 | public class GinfoHandler  extends AbstractHandler{
17 | 
18 | 	public static String errorHandle(String status,String message,String question,QueryLogger qlog){
19 | 		JSONObject exobj = new JSONObject();
20 | 		try {
21 | 			exobj.put("status", status);
22 | 			exobj.put("message", message);
23 | 			exobj.put("query", question);
24 | 			if(qlog!=null&&qlog.rankedSparqls!=null&&qlog.rankedSparqls.size()>0){
25 | 				exobj.put("sparql", qlog.rankedSparqls.get(0).toStringForGStore2());
26 | 			}
27 | 		} catch (Exception e1) {
28 | 		}
29 | 		return exobj.toString();
30 | 	}
31 | 	
32 | 	public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response)  
33 |             throws IOException, ServletException {
34 | 		try{
35 | 			response.setContentType("text/html;charset=utf-8");  
36 | 	        response.setStatus(HttpServletResponse.SC_OK);
37 | 	        JSONObject infoobj = new JSONObject();
38 | 	        
39 | 			infoobj.put("version", Globals.Version);
40 | 		    infoobj.put("dataset", Globals.Dataset);
41 | 		    infoobj.put("GDB system", Globals.GDBsystem);
42 | 
43 | 	        //TODO add more info
44 | 	        baseRequest.setHandled(true);  
45 | 	        response.getWriter().println(infoobj.toString());  
46 | 		}
47 | 		catch(Exception e){
48 | 			if(e instanceof IOException){
49 | 				try {
50 | 					baseRequest.setHandled(true);
51 | 					response.getWriter().println(errorHandle("500","IOException","",null));
52 | 				} catch (Exception e1) {
53 | 				}
54 | 			}
55 | 			else if(e instanceof JSONException){
56 | 				try {
57 | 					baseRequest.setHandled(true);
58 | 					response.getWriter().println(errorHandle("500","JSONException","",null));
59 | 				} catch (Exception e1) {
60 | 				}
61 | 			}
62 | 			else if(e instanceof ServletException){
63 | 				try {
64 | 					baseRequest.setHandled(true);
65 | 					response.getWriter().println(errorHandle("500","ServletException","",null));
66 | 				} catch (Exception e1) {
67 | 				}
68 | 			}
69 | 			else {
70 | 				try {
71 | 					baseRequest.setHandled(true);
72 | 					response.getWriter().println(errorHandle("500","Unkown Exception","",null));
73 | 				} catch (Exception e1) {
74 | 				}
75 | 			}
76 | 		}
77 | 	}
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/src/lcn/SearchInEntityFragments.java:
--------------------------------------------------------------------------------
 1 | package lcn;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.ArrayList;
 5 | 
 6 | import org.apache.lucene.analysis.Analyzer;
 7 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
 8 | import org.apache.lucene.queryParser.ParseException;
 9 | import org.apache.lucene.queryParser.QueryParser;
10 | import org.apache.lucene.search.Hits;
11 | import org.apache.lucene.search.IndexSearcher;
12 | import org.apache.lucene.search.Query;
13 | 
14 | import qa.Globals;
15 | 
16 | 
17 | public class SearchInEntityFragments {
18 | 
19 | 	/*
20 | 	 * Search entity in Lucene
21 | 	 * */
22 | 	public ArrayList<EntityNameAndScore> searchName(String literal, double thres1, double thres2, int k) throws IOException {
23 | 		Hits hits = null;
24 | 		String queryString = null;
25 | 		Query query = null;
26 | 	
27 | 		IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/entity_fragment_index");
28 | 		
29 | 		ArrayList<EntityNameAndScore> result = new ArrayList<EntityNameAndScore>(); 
30 | 
31 | 		queryString = literal;
32 | 		
33 | 		Analyzer analyzer = new StandardAnalyzer();
34 | 		try
35 | 		{
36 | 			QueryParser qp = new QueryParser("EntityName", analyzer);
37 | 			query = qp.parse(queryString);
38 | 		} catch (ParseException e)
39 | 		{
40 | 			e.printStackTrace();
41 | 		}
42 | 		
43 | 		if (searcher != null)
44 | 		{
45 | 			hits = searcher.search(query);
46 | 			//System.out.println("search for entity fragment hits.length=" + hits.length());
47 | 			if (hits.length() > 0) 
48 | 			{
49 | 				//System.out.println("find " + hits.length() + " result!");
50 | 				for (int i=0; i<hits.length(); i++) {
51 | 				    //System.out.println(i+": <"+hits.doc(i).get("EntityName") +">;"
52 | 				    //		  +hits.doc(i).get("EntityFragment")
53 | 				    //		  + "; Score: " + hits.score(i)
54 | 				    //		  + "; Score2: " + hits.score(i)*(literalLength/hits.doc(i).get("EntityName").length()));    
55 | 				    if(i<k) {
56 | 				    	if (hits.score(i) >= thres1) {
57 | 					    	String en = hits.doc(i).get("EntityName");
58 | 					    	int id = Integer.parseInt(hits.doc(i).get("EntityId"));
59 | 					    	result.add(new EntityNameAndScore(id, en, hits.score(i)));
60 | 				    	}
61 | 				    	else {
62 | 				    		break;
63 | 				    	}
64 | 				    }
65 | 				    else {
66 | 				    	if (hits.score(i) >= thres2) {
67 | 					    	String en = hits.doc(i).get("EntityName");
68 | 					    	int id = Integer.parseInt(hits.doc(i).get("EntityId"));
69 | 					    	result.add(new EntityNameAndScore(id, en, hits.score(i)));
70 | 				    	}
71 | 				    	else {
72 | 				    		break;
73 | 				    	}
74 | 				    }
75 | 				}				    	  
76 | 			}				
77 | 		}
78 | 		
79 | 		//Collections.sort(result);
80 | 		return result;
81 | 
82 | 	}
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/src/application/GanswerHttp.java:
--------------------------------------------------------------------------------
 1 | package application;
 2 | import org.eclipse.jetty.server.Server;
 3 | import org.eclipse.jetty.server.handler.ContextHandler; 
 4 | import org.eclipse.jetty.server.handler.ContextHandlerCollection;
 5 | import org.eclipse.jetty.server.handler.ErrorHandler;
 6 | import org.eclipse.jetty.server.Handler;
 7 | 
 8 | import qa.Globals;
 9 | 
10 | public class GanswerHttp {
11 | 	static int maxAnswerNum = 100;
12 | 	static int maxSparqlNum = 3;
13 | 	static int defaultPort = 9999;
14 | 	public static void main(String[] args) throws Exception {  
15 | 	//step 1: initialize the server with a given port
16 | 	if(args.length>0){
17 | 		for(int k=0;k<args.length;k++){
18 | 			String[] paras = args[k].split("=");
19 | 			if(paras[0].startsWith("port")){
20 | 				try{
21 | 					defaultPort = Integer.parseInt(paras[1]);
22 | 				}
23 | 				catch(Exception e){
24 | 					System.out.println("Port should be an Integer. Using default port 9999 instead.");
25 | 					defaultPort = 9999;
26 | 				}
27 | 			}
28 | 			else if(paras[0].startsWith("maxAnswerNum")){
29 | 				try{
30 | 					maxAnswerNum = Integer.parseInt(paras[1]);
31 | 				}
32 | 				catch(Exception e){
33 | 					System.out.println("maxAnswerNum should be an Integer. Using default value 100 instead.");
34 | 					maxAnswerNum = 100;
35 | 				}
36 | 			}
37 | 			else if(paras[0].startsWith("maxSparqlNum")){
38 | 				try{
39 | 					maxSparqlNum = Integer.parseInt(paras[1]);
40 | 				}
41 | 				catch(Exception e){
42 | 					System.out.println("maxSparqlNum should be an Integer. Using default value 3 instead.");
43 | 					maxSparqlNum = 9999;
44 | 				}
45 | 			}
46 | 			else{
47 | 				System.out.println("Args "+k+" is not a valid parameter!");
48 | 			}
49 | 		}
50 | 	}
51 |         Server server = new Server(defaultPort);  
52 |         
53 |         //step 2: attach gAnswer function handler to the server
54 |         ContextHandler contextGS = new ContextHandler("/gSolve");
55 |         contextGS.setHandler(new GanswerHandler());
56 |         ContextHandler contextGI = new ContextHandler("/gInfo");
57 |         contextGI.setHandler(new GinfoHandler());
58 |         ContextHandlerCollection contexts = new ContextHandlerCollection();
59 |         contexts.setHandlers(new Handler[] {contextGS, contextGI});
60 |         server.setHandler(contexts);
61 |         
62 |         //step 3: attach gAnswer error handler to the server
63 |         //TODO: using default error handler currently. Should replace it with a custom one
64 |         ErrorHandler errorHandler = new ErrorHandler();
65 |         errorHandler.setShowStacks(false);
66 |         server.addBean(errorHandler);
67 |         
68 |         //step 4: start the server and initialize gAnswer
69 |         server.start();
70 |         server.dumpStdErr();
71 |         Globals.init();
72 |         System.out.println("Server ready!");
73 |         server.join();  
74 |     }  
75 | }
76 | 


--------------------------------------------------------------------------------
/src/utils/FileUtil.java:
--------------------------------------------------------------------------------
 1 | package utils;
 2 | 
 3 | import java.io.*;
 4 | import java.util.ArrayList;
 5 | import java.util.HashSet;
 6 | import java.util.List;
 7 | import java.util.Set;
 8 | 
 9 | 
10 | public class FileUtil {
11 |     public static List<String> readFile(String filePath){
12 |         List<String> lines = new ArrayList<String>();
13 |         try {
14 |             BufferedReader br = new BufferedReader(new FileReader(filePath));
15 |             String line = null;
16 |             while( (line = br.readLine()) != null ){
17 |                 lines.add(line);
18 |             }
19 |             br.close();
20 |         }catch(Exception e){
21 |             e.printStackTrace();
22 |         }finally {
23 |             return lines;
24 |         }
25 |     }
26 | 
27 |     public static Set<String> readFileAsSet(String filePath){
28 |         Set<String> lines = new HashSet<String>();
29 |         try {
30 |             BufferedReader br = new BufferedReader(new FileReader(filePath));
31 |             String line = null;
32 |             while( (line = br.readLine()) != null ){
33 |                 lines.add(line);
34 |             }
35 |             br.close();
36 |         }catch(Exception e){
37 |             e.printStackTrace();
38 |         }finally {
39 |             return lines;
40 |         }
41 |     }
42 | 
43 |     public static List<String> readFile(InputStream is){
44 |         List<String> lines = new ArrayList<String>();
45 |         try {
46 |             BufferedReader br = new BufferedReader(new InputStreamReader(is));
47 |             String line = null;
48 |             while( (line = br.readLine()) != null ){
49 |                 lines.add(line);
50 |             }
51 |             br.close();
52 |         }catch(Exception e){
53 |             e.printStackTrace();
54 |         }finally {
55 |             return lines;
56 |         }
57 |     }
58 | 
59 |     public static String readFileAsALine(InputStream is){
60 |         List<String> lines = readFile(is);
61 |         StringBuffer buffer = new StringBuffer();
62 |         for(String line : lines){
63 |             buffer.append(line);
64 |         }
65 |         return buffer.toString();
66 |     }
67 | 
68 |     public static void writeFile(List<String> lines, String filePath){
69 |         try{
70 |             BufferedWriter bw = new BufferedWriter(new FileWriter(filePath));
71 |             for(String line : lines){
72 |                 bw.write(line+"\n");
73 |             }
74 |             bw.close();
75 |         }catch(Exception e){
76 |             e.printStackTrace();
77 |         }
78 |     }
79 | 
80 |     public static void writeFile(List<String> lines, String filePath, boolean ifContinueWrite){
81 |         try{
82 |             BufferedWriter bw = new BufferedWriter(new FileWriter(filePath, ifContinueWrite));
83 |             for(String line : lines){
84 |                 bw.write(line+"\n");
85 |             }
86 |             bw.close();
87 |         }catch(Exception e){
88 |             e.printStackTrace();
89 |         }
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/qa/Answer.java:
--------------------------------------------------------------------------------
  1 | package qa;
  2 | 
  3 | import java.util.ArrayList;
  4 | 
  5 | 
  6 | public class Answer implements Comparable<Answer>{
  7 | 	public String questionFocusKey=null;
  8 | 	public String questionFocusValue=null;
  9 | 	public ArrayList<String> otherInformationKey = null;
 10 | 	public ArrayList<String> otherInformationValue = null;
 11 | 	
 12 | 	public Answer(String qf, String[] ans) {
 13 | 		otherInformationKey = new ArrayList<String>();
 14 | 		otherInformationValue = new ArrayList<String>();
 15 | 		int p1, p2;
 16 | 		for (String line : ans) {
 17 | 			System.out.println("line=" + line);
 18 | 			if (line.startsWith(qf)) {
 19 | 				questionFocusKey  = qf;
 20 | 				p1 = line.indexOf('<');
 21 | 				p2 = line.lastIndexOf('>');
 22 | 				String value = null;
 23 | 				if (p1 != -1 && p2 != -1) {
 24 | 					value = line.substring(p1+1, p2);
 25 | 				}
 26 | 				else {
 27 | 					p1 = line.indexOf('\"');
 28 | 					p2 = line.lastIndexOf('\"');
 29 | 					if(p1 != -1 && p2 != -1)
 30 | 						value = line.substring(p1+1, p2);
 31 | 					else
 32 | 					{
 33 | 						p1 = line.indexOf(':');
 34 | 						value = line.substring(p1+1);
 35 | 					}
 36 | 				}
 37 | 				questionFocusValue = value;
 38 | 			}
 39 | 			else {
 40 | 				
 41 | 				p1 = line.indexOf(':');
 42 | 				String key = line.substring(0, p1);
 43 | 
 44 | 				p1 = line.indexOf('<');
 45 | 				p2 = line.lastIndexOf('>');
 46 | 				String value = null;
 47 | 				if (p1 != -1 && p2 != -1) {
 48 | 					value = line.substring(p1+1, p2);
 49 | 				}
 50 | 				else {
 51 | 					p1 = line.indexOf('\"');
 52 | 					p2 = line.lastIndexOf('\"');
 53 | 					if(p1 != -1 && p2 != -1)
 54 | 						value = line.substring(p1+1, p2);
 55 | 					else
 56 | 					{
 57 | 						p1 = line.indexOf(':');
 58 | 						value = line.substring(p1+1);
 59 | 					}
 60 | 				}
 61 | 				
 62 | 				otherInformationKey.add(key);
 63 | 				otherInformationValue.add(value);
 64 | 			}
 65 | 		}
 66 | 		
 67 | 		// Sove BUG: GStore return messy code in questionFocusKey
 68 | 		if (questionFocusKey==null || questionFocusValue==null)
 69 | 		{
 70 | 			questionFocusKey  = qf;
 71 | 			String line = ans[0];
 72 | 			p1 = line.indexOf('<');
 73 | 			p2 = line.lastIndexOf('>');
 74 | 			String value = null;
 75 | 			if (p1 != -1 && p2 != -1) {
 76 | 				value = line.substring(p1+1, p2);
 77 | 			}
 78 | 			else {
 79 | 				p1 = line.indexOf('\"');
 80 | 				p2 = line.lastIndexOf('\"');
 81 | 				if(p1 != -1 && p2 != -1)
 82 | 					value = line.substring(p1+1, p2);
 83 | 				else
 84 | 				{
 85 | 					p1 = line.indexOf(':');
 86 | 					value = line.substring(p1+1);
 87 | 				}
 88 | 			}
 89 | 			questionFocusValue = value;			
 90 | 			otherInformationKey.clear();
 91 | 			otherInformationValue.clear();
 92 | 		}
 93 | 		
 94 | 		/*System.out.println("otherInformationKey.size=" + otherInformationKey.size());
 95 | 		for (String k : otherInformationKey) {
 96 | 			System.out.println("otherInfoKey = " + k);
 97 | 		}*/
 98 | 	}
 99 | 	
100 | 	public int compareTo (Answer p)
101 | 	{
102 | 		return questionFocusValue.compareTo(p.questionFocusValue);
103 | 	}
104 | 	
105 | }
106 | 


--------------------------------------------------------------------------------
/src/rdf/SimpleRelation.java:
--------------------------------------------------------------------------------
 1 | package rdf;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.HashMap;
 5 | 
 6 | import paradict.PredicateIDAndSupport;
 7 | import qa.Globals;
 8 | 
 9 | import nlp.ds.DependencyTree;
10 | import nlp.ds.DependencyTreeNode;
11 | import nlp.ds.Word;
12 | 
13 | // allow repetition
14 | public class SimpleRelation {
15 | 	public Word arg1Word = null;
16 | 	public Word arg2Word = null;
17 | 	public String relationParaphrase = null;
18 | 	public double matchingScore = 0;
19 | 	
20 | 	public Word arg1Word_beforeCRR = null;
21 | 	public Word arg2Word_beforeCRR = null;
22 | 	
23 | 	public HashMap<Integer, Double> pasList = new HashMap<Integer, Double>();
24 | 	
25 | 	public Word preferredSubj = null;
26 | 	
27 | 	public char extractingMethod = ' ';	// S: StanfordParser; M: MaltParser; N: N-gram; R: rules
28 | 	
29 | 	public SimpleRelation()
30 | 	{
31 | 		
32 | 	}
33 | 	
34 | 	public SimpleRelation(SimpleRelation sr) 
35 | 	{
36 | 		arg1Word = sr.arg1Word;
37 | 		arg2Word = sr.arg2Word;
38 | 		relationParaphrase = sr.relationParaphrase;
39 | 		matchingScore = sr.matchingScore;
40 | 		arg1Word_beforeCRR = sr.arg1Word_beforeCRR;
41 | 		arg2Word_beforeCRR = sr.arg2Word_beforeCRR;
42 | 		pasList = sr.pasList;
43 | 		preferredSubj = sr.preferredSubj;
44 | 		extractingMethod = 'R';
45 | 	}
46 | 
47 | 	@Override
48 | 	public String toString() {
49 | 		return arg1Word.originalForm + "," + arg2Word.originalForm + "," + relationParaphrase + "," + matchingScore + "["+extractingMethod+"]";
50 | 		//return arg1Word.getFullEntityName() + "," + arg2Word.getFullEntityName() + "," + relationParaphrase + "," + matchingScore + "["+extractingMethod+"]";
51 | 	}
52 | 	
53 | 	public int getHashCode() {
54 | 		return arg1Word.hashCode() ^ arg2Word.hashCode();
55 | 	}
56 | 	
57 | 	public void setPasList (String pattern, double matchingScore, boolean[] matchedFlag) {
58 | 		ArrayList<PredicateIDAndSupport> list = Globals.pd.nlPattern_2_predicateList.get(pattern);
59 | 		for (PredicateIDAndSupport pidsup : list) {
60 | 			double sumSelectivity = 0;
61 | 			for (int i = 0; i < matchedFlag.length; i ++) {
62 | 				if (matchedFlag[i]) {
63 | 					sumSelectivity += pidsup.wordSelectivity[i];
64 | 				}
65 | 			}
66 | 			sumSelectivity = matchingScore*sumSelectivity*pidsup.support;			
67 | 			int pid = pidsup.predicateID;
68 | 			if (Globals.pd.dbo_predicate_id.contains(pid)) sumSelectivity *= 1.5; 
69 | 			
70 | 			if (!pasList.containsKey(pid))
71 | 				pasList.put(pid, sumSelectivity);
72 | 			else if (sumSelectivity > pasList.get(pid))
73 | 				pasList.put(pid, sumSelectivity);
74 | 		}
75 | 	}
76 | 	
77 | 	public void setPreferedSubjObjOrder(DependencyTree tree) {
78 | 		DependencyTreeNode n1 = tree.getNodeByIndex(this.arg1Word.position).getNNTopTreeNode(tree);
79 | 		DependencyTreeNode n2 = tree.getNodeByIndex(this.arg2Word.position).getNNTopTreeNode(tree);
80 | 		if (n1.father != null && n1.father.word.baseForm.equals("of") && n1.dep_father2child.equals("pobj")) {
81 | 			this.preferredSubj = this.arg1Word;
82 | 		}
83 | 		else if (n2.father != null && n2.father.word.baseForm.equals("of") && n2.dep_father2child.equals("pobj")) {
84 | 			this.preferredSubj = this.arg2Word;
85 | 		}
86 | 	}
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/genrate_fragments/step5_get_entity_fragment.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf-8
  2 | inEnEdge = {}
  3 | outEnEdge = {}
  4 | inEdge={}
  5 | outEdge = {}
  6 | types = {}
  7 | with open('triple file represented by ids here','r') as f:
  8 | 	i = 1
  9 | 	for line in f:
 10 | 		tri = line[:-1].split('\t')
 11 | 		
 12 | 		if tri[1] == 'id of <type>' and tri[2]!='-1':
 13 | 			if types.has_key(tri[0]):
 14 | 				types[tri[0]].add(tri[2])
 15 | 			else:
 16 | 				types[tri[0]] = set()
 17 | 				types[tri[0]].add(tri[2])
 18 | 		else:
 19 | 			if outEdge.has_key(tri[0]):
 20 | 				outEdge[tri[0]].add(tri[1])
 21 | 			else:
 22 | 				outEdge[tri[0]] = set()
 23 | 				outEdge[tri[0]].add(tri[1])
 24 | 				
 25 | 			if tri[2]!='-1':
 26 | 				if outEnEdge.has_key(tri[0]):
 27 | 					if outEnEdge[tri[0]].has_key(tri[2]):
 28 | 						outEnEdge[tri[0]][tri[2]].add(tri[1])
 29 | 					else:
 30 | 						outEnEdge[tri[0]][tri[2]] = set()
 31 | 						outEnEdge[tri[0]][tri[2]].add(tri[1])
 32 | 				else:
 33 | 					outEnEdge[tri[0]]={}
 34 | 					outEnEdge[tri[0]][tri[2]] = set()
 35 | 					outEnEdge[tri[0]][tri[2]].add(tri[1])
 36 | 			
 37 | 				if inEdge.has_key(tri[2]):
 38 | 					inEdge[tri[2]].add(tri[1])
 39 | 				else:
 40 | 					inEdge[tri[2]] = set()
 41 | 					inEdge[tri[2]].add(tri[1])
 42 | 				if inEnEdge.has_key(tri[2]):
 43 | 					if inEnEdge[tri[2]].has_key(tri[0]):
 44 | 						inEnEdge[tri[2]][tri[0]].add(tri[1])
 45 | 					else:
 46 | 						inEnEdge[tri[2]][tri[0]] = set()
 47 | 						inEnEdge[tri[2]][tri[0]].add(tri[1])
 48 | 				else:
 49 | 					inEnEdge[tri[2]] = {}
 50 | 					inEnEdge[tri[2]][tri[0]] = set()
 51 | 					inEnEdge[tri[2]][tri[0]].add(tri[1])
 52 | 		if i%10000 == 0:
 53 | 			print(i)
 54 | 		i += 1
 55 | print(len(inEnEdge))
 56 | print(len(outEnEdge))
 57 | print(len(inEdge))
 58 | print(len(outEdge))
 59 | print(len(types))
 60 | wr = open('output fragment file','w')
 61 | for i in range(12301050):#here we should iterate every entitiy
 62 | 	if i%10000 == 0:
 63 | 		print(i)
 64 | 	eid = "%d"%i
 65 | 	ret = ""
 66 | 	tmp = ""
 67 | 	if inEnEdge.has_key(eid):
 68 | 		tmp = ""
 69 | 		for k in inEnEdge[eid].keys():
 70 | 			tmp += k
 71 | 			tmp += ':'
 72 | 			for item in inEnEdge[eid][k]:
 73 | 				if item == '-1':
 74 | 					continue
 75 | 				tmp += item + ';'
 76 | 			tmp += ','
 77 | 	ret += tmp
 78 | 	tmp = ""
 79 | 	ret += '|'
 80 | 	
 81 | 	if outEnEdge.has_key(eid):
 82 | 		tmp = ""
 83 | 		for k in outEnEdge[eid].keys():
 84 | 			tmp += k
 85 | 			tmp += ':'
 86 | 			for item in outEnEdge[eid][k]:
 87 | 				if item == '-1':
 88 | 					continue
 89 | 				tmp += item + ';'
 90 | 			tmp += ','
 91 | 	ret += tmp
 92 | 	tmp = ""
 93 | 	ret += '|'
 94 | 	
 95 | 	if inEdge.has_key(eid):
 96 | 		tmp = ""
 97 | 		for item in inEdge[eid]:
 98 | 			if item == '-1':
 99 | 				continue
100 | 			tmp += item + ','
101 | 	ret += tmp
102 | 	tmp=""
103 | 	ret += '|'
104 | 	
105 | 	if outEdge.has_key(eid):
106 | 		tmp = ""
107 | 		for item in outEdge[eid]:
108 | 			if item == '-1':
109 | 				continue
110 | 			tmp += item + ','
111 | 	ret += tmp
112 | 	tmp=""
113 | 	ret += '|'
114 | 	
115 | 	if types.has_key(eid):
116 | 		tmp = ""
117 | 		for item in types[eid]:
118 | 			if item == '-1':
119 | 				continue
120 | 			tmp += item + ','
121 | 	ret += tmp
122 | 	tmp=""
123 | 	wr.write("%s\t%s\n"%(eid,ret))
124 | 	
125 | 	
126 | 			
127 | 	
128 | 			
129 | 			
130 | 			
131 | 				
132 | 	
133 | 


--------------------------------------------------------------------------------
/src/fgmt/RelationFragment.java:
--------------------------------------------------------------------------------
  1 | package fgmt;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.List;
  8 | 
  9 | import qa.Globals;
 10 | import utils.FileUtil;
 11 | 
 12 | public class RelationFragment extends Fragment 
 13 | {
 14 | 	public static HashMap<Integer, ArrayList<RelationFragment>> relFragments = null;
 15 | 	public static HashMap<String, ArrayList<Integer>> relationShortName2IdList = null;
 16 | 	public static HashSet<Integer> literalRelationSet = null;
 17 | 	
 18 | 	public HashSet<Integer> inTypes = new HashSet<Integer>();
 19 | 	public HashSet<Integer> outTypes = new HashSet<Integer>();
 20 | 	
 21 | 	public static final int literalTypeId = -176;
 22 | 	
 23 | 	public RelationFragment(String inFgmt, String outFgmt, int fid) 
 24 | 	{
 25 | 		fragmentId = fid;
 26 | 		fragmentType = typeEnum.RELATION_FRAGMENT;
 27 | 		String[] nums;
 28 | 		
 29 | 		// in
 30 | 		nums = inFgmt.split(",");
 31 | 		for(String s: nums) 
 32 | 			if(s.length() > 0) 
 33 | 				inTypes.add(Integer.parseInt(s));
 34 | 		
 35 | 		// out
 36 | 		if(outFgmt.equals("itera"))
 37 | 			outTypes.add(literalTypeId);
 38 | 		else 
 39 | 		{
 40 | 			nums = outFgmt.split(",");
 41 | 			for(String s: nums)
 42 | 				if(s.length() > 0)
 43 | 					outTypes.add(Integer.parseInt(s));		
 44 | 		}
 45 | 	}
 46 | 	
 47 | 	public static void load() throws Exception 
 48 | 	{		
 49 | 		String filename = Globals.localPath + "data/DBpedia2016/fragments/predicate_RDF_fragment/predicate_fragment.txt"; 
 50 | 		List<String> inputs = FileUtil.readFile(filename);
 51 | 		relFragments = new HashMap<Integer, ArrayList<RelationFragment>>();
 52 | 		literalRelationSet = new HashSet<Integer>();
 53 | 		
 54 | 		for(String line: inputs)
 55 | 		{
 56 | 			String[] lines = line.split("\t");
 57 | 			String inString = lines[0].substring(1, lines[0].length()-1);
 58 | 			int pid = Integer.parseInt(lines[1]);
 59 | 			String outString = lines[2].substring(1, lines[2].length()-1);
 60 | 			
 61 | 			// Record which relations can connect LITERAL objects.
 62 | 			if(outString.equals("itera"))	// "literal".substring(1, length()-1)
 63 | 				literalRelationSet.add(pid);
 64 | 			
 65 | 			if(!relFragments.containsKey(pid))
 66 | 				relFragments.put(pid, new ArrayList<RelationFragment>());
 67 | 			relFragments.get(pid).add(new RelationFragment(inString, outString, pid));
 68 | 		}		
 69 | 
 70 | 		loadId();
 71 | 	}
 72 | 	
 73 | 	public static void loadId() throws IOException 
 74 | 	{
 75 | 		String filename = Globals.localPath + "data/DBpedia2016/fragments/id_mappings/16predicate_id.txt";
 76 | 		List<String> inputs = FileUtil.readFile(filename);
 77 | 		relationShortName2IdList = new HashMap<String, ArrayList<Integer>>();
 78 | 
 79 | 		for(String line: inputs)
 80 | 		{
 81 | 			String[] lines = line.split("\t");
 82 | 			String rlnShortName = lines[0];
 83 | 			
 84 | 			if (!relationShortName2IdList.containsKey(rlnShortName))
 85 | 				relationShortName2IdList.put(rlnShortName, new ArrayList<Integer>());
 86 | 			relationShortName2IdList.get(rlnShortName).add(Integer.parseInt(lines[1]));
 87 | 		}
 88 | 	}
 89 | 	
 90 | 	public static boolean isLiteral (String p) 
 91 | 	{
 92 | 		for (Integer i : relationShortName2IdList.get(p))
 93 | 			if (literalRelationSet.contains(i)) 
 94 | 				return true;
 95 | 		return false;
 96 | 	}
 97 | 	
 98 | 	public static boolean isLiteral (int pid) 
 99 | 	{
100 | 		if (literalRelationSet.contains(pid)) 
101 | 			return true;
102 | 		else 
103 | 			return false;
104 | 	}
105 | }
106 | 


--------------------------------------------------------------------------------
/src/lcn/BuildIndexForTypeShortName.java:
--------------------------------------------------------------------------------
  1 | package lcn;
  2 | 
  3 | import java.io.File;
  4 | import java.util.ArrayList;
  5 | import java.util.Date;
  6 | import java.util.HashMap;
  7 | import java.util.Iterator;
  8 | 
  9 | import org.apache.lucene.analysis.Analyzer;
 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
 11 | import org.apache.lucene.document.Document;
 12 | import org.apache.lucene.document.Field;
 13 | import org.apache.lucene.index.IndexWriter;
 14 | 
 15 | import qa.Globals;
 16 | import fgmt.TypeFragment;
 17 | 
 18 | public class BuildIndexForTypeShortName {
 19 | 	public static void buildIndex(HashMap<String, ArrayList<Integer>> typeShortName2IdList) throws Exception
 20 | 	{
 21 | 		long startTime = new Date().getTime();
 22 | 		File indexDir_li = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/type_fragment_index");
 23 | 		
 24 | 		Analyzer luceneAnalyzer_li = new StandardAnalyzer();  
 25 | 		IndexWriter indexWriter_li = new IndexWriter(indexDir_li, luceneAnalyzer_li,true); 
 26 | 		
 27 | 		int mergeFactor = 100000;
 28 | 		int maxBufferedDoc = 1000;
 29 | 		int maxMergeDoc = Integer.MAX_VALUE;
 30 | 		
 31 | 		//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
 32 | 		indexWriter_li.setMergeFactor(mergeFactor);
 33 | 		indexWriter_li.setMaxBufferedDocs(maxBufferedDoc);
 34 | 		indexWriter_li.setMaxMergeDocs(maxMergeDoc);
 35 | 		
 36 | 		int count = 0;
 37 | 		Iterator<String> it = typeShortName2IdList.keySet().iterator();
 38 | 		while (it.hasNext()) 
 39 | 		{
 40 | 			String sn = it.next();
 41 | 			if (sn.length() == 0) {
 42 | 				continue;
 43 | 			}
 44 | 			
 45 | 			count ++;
 46 | 		
 47 | 			StringBuilder splittedSn = new StringBuilder("");
 48 | 			
 49 | 			if(sn.contains("_"))
 50 | 			{
 51 | 				String nsn = sn.replace("_", " ");
 52 | 				splittedSn.append(nsn.toLowerCase());
 53 | 			}
 54 | 			else
 55 | 			{
 56 | 				int last = 0, i = 0;
 57 | 				for(i = 0; i < sn.length(); i ++) 
 58 | 				{
 59 | 					// if it were not a small letter, then break it.
 60 | 					if(!(sn.charAt(i)>='a' && sn.charAt(i)<='z')) 
 61 | 					{
 62 | 						splittedSn.append(sn.substring(last, i).toLowerCase());
 63 | 						splittedSn.append(' ');
 64 | 						last = i;
 65 | 					}
 66 | 				}
 67 | 				splittedSn.append(sn.substring(last, i).toLowerCase());
 68 | 				while(splittedSn.charAt(0) == ' ') {
 69 | 					splittedSn.deleteCharAt(0);
 70 | 				}
 71 | 			}
 72 | 			
 73 | 			System.out.println("SplitttedType: "+splittedSn);
 74 | 			
 75 | 			Document document = new Document(); 
 76 | 
 77 | 			Field SplittedTypeShortName = new Field("SplittedTypeShortName", splittedSn.toString(), 
 78 | 					Field.Store.YES,
 79 | 					Field.Index.TOKENIZED,
 80 | 					Field.TermVector.WITH_POSITIONS_OFFSETS);			
 81 | 			Field TypeShortName = new Field("TypeShortName", sn,
 82 | 					Field.Store.YES, Field.Index.NO);
 83 | 			
 84 | 			document.add(SplittedTypeShortName);
 85 | 			document.add(TypeShortName);
 86 | 			indexWriter_li.addDocument(document);	
 87 | 		}
 88 | 				
 89 | 		indexWriter_li.optimize();
 90 | 		indexWriter_li.close();
 91 | 
 92 | 		// input the time of Build index
 93 | 		long endTime = new Date().getTime();
 94 | 		System.out.println("TypeShortName index has build ->" + count + " " + "Time:" + (endTime - startTime));
 95 | 	}
 96 | 	
 97 | 	public static void main (String[] args) {
 98 | 		try {
 99 | 			Globals.localPath="D:/husen/gAnswer/";
100 | 			TypeFragment.load();
101 | 			BuildIndexForTypeShortName.buildIndex(TypeFragment.typeShortName2IdList);
102 | 		} catch (Exception e) {
103 | 			e.printStackTrace();
104 | 		}
105 | 	}
106 | 
107 | }
108 | 


--------------------------------------------------------------------------------
/src/nlp/ds/Word.java:
--------------------------------------------------------------------------------
  1 | package nlp.ds;
  2 | 
  3 | import java.util.ArrayList;
  4 | 
  5 | import rdf.EntityMapping;
  6 | import rdf.Triple;
  7 | import rdf.TypeMapping;
  8 | 
  9 | public class Word implements Comparable<Word> 
 10 | {
 11 | 	public boolean mayCategory = false;
 12 | 	public boolean mayLiteral = false;
 13 | 	public boolean mayEnt = false;
 14 | 	public boolean mayType = false;
 15 | 	public boolean mayExtendVariable = false;
 16 | 	public String category = null;
 17 | 	public ArrayList<EntityMapping> emList = null;
 18 | 	public ArrayList<TypeMapping> tmList = null;
 19 | 	public Triple embbededTriple = null;
 20 | 	
 21 | 	public String baseForm = null;
 22 | 	public String originalForm = null;
 23 | 	public String posTag = null;
 24 | 	public int position = -1;	// Notice the first word's position = 1
 25 | 	public String key = null;
 26 | 	
 27 | 	public boolean isCovered = false;
 28 | 	public boolean isIgnored = false;
 29 | 	
 30 | 	//Notice: These variables are not used because we merge a phrase to a word if it is a node now.
 31 | 	public String ner = null;	// record NER result
 32 | 	public Word nnNext = null;
 33 | 	public Word nnPrev = null;
 34 | 	public Word crr	= null;		// coreference resolution result
 35 | 	
 36 | 	public Word represent = null; // This word is represented by others, eg, "which book is ..." "which"
 37 | 	public boolean omitNode = false; // This word can not be node
 38 | 	public Word modifiedWord = null; // This word modify which word (it modify itself if it is not a modified word)
 39 | 	
 40 | 	public Word (String base, String original, String pos, int posi) {
 41 | 		baseForm = base;
 42 | 		originalForm = original;
 43 | 		posTag = pos;
 44 | 		position = posi;		
 45 | 		key = new String(originalForm+"["+position+"]");
 46 | 	}
 47 | 	
 48 | 	@Override
 49 | 	public String toString() {
 50 | 		return key;
 51 | 	}
 52 | 
 53 | 	public int compareTo(Word another) {
 54 | 		return this.position-another.position;
 55 | 	}
 56 | 	
 57 | 	@Override
 58 | 	public int hashCode() {
 59 | 		return key.hashCode();
 60 | 	}
 61 | 	
 62 | 	@Override
 63 | 	public boolean equals(Object o) {
 64 | 		return (o instanceof Word) 
 65 | 			&& originalForm.equals(((Word)o).originalForm)
 66 | 			&& position == ((Word)o).position;
 67 | 	}
 68 | 	
 69 | 	// We now discard all NN information and return the word itself. | husen 2016
 70 | 	public Word getNnHead() {
 71 | 		Word w = this;
 72 | 		return w;
 73 | 		
 74 | //		if(w.mayEnt || w.mayType)
 75 | //			return w;
 76 | //		
 77 | //		while (w.nnPrev != null) {
 78 | //			w = w.nnPrev;
 79 | //		}
 80 | //		return w;
 81 | 	}
 82 | 	
 83 | 	public String getFullEntityName() {
 84 | 		Word w = this.getNnHead();
 85 | 		return w.originalForm;
 86 | 		
 87 | //		if(w.mayEnt || w.mayType)
 88 | //			return w.originalForm;
 89 | //		
 90 | //		StringBuilder sb = new StringBuilder("");
 91 | //		while (w != null) {
 92 | //			sb.append(w.originalForm);			
 93 | //			sb.append(' ');
 94 | //			w = w.nnNext;
 95 | //		}
 96 | //		sb.deleteCharAt(sb.length()-1);
 97 | //		return sb.toString();
 98 | 	}
 99 | 	
100 | 	public String getBaseFormEntityName() {
101 | 		Word w = this.getNnHead();
102 | 		if(w.mayEnt || w.mayType)
103 | 			return w.baseForm;
104 | 				
105 | 		StringBuilder sb = new StringBuilder("");
106 | 		while (w != null) {
107 | 			sb.append(w.baseForm);
108 | 			sb.append(' ');
109 | 			w = w.nnNext;
110 | 		}
111 | 		sb.deleteCharAt(sb.length()-1);
112 | 		return sb.toString();
113 | 	}
114 | 	
115 | 	public String isNER () {
116 | 		return this.getNnHead().ner;
117 | 	}
118 | 		
119 | 	public void setIsCovered () {
120 | 		Word w = this.getNnHead();
121 | 		while (w != null) {
122 | 			w.isCovered = true;
123 | 			w = w.nnNext;
124 | 		}
125 | 	}	
126 | }
127 | 


--------------------------------------------------------------------------------
/src/qa/Query.java:
--------------------------------------------------------------------------------
  1 | package qa;
  2 | 
  3 | import java.util.ArrayList;
  4 | 
  5 | import nlp.ds.Sentence;
  6 | import qa.extract.EntityRecognition;
  7 | import rdf.MergedWord;
  8 | 
  9 | /**
 10 |  * 1. preprocessing of question
 11 |  * 2. Node Recognition
 12 |  * @author husen
 13 |  */
 14 | public class Query 
 15 | {
 16 | 	public String NLQuestion = null;
 17 | 	public String TransferedQuestion = null;
 18 | 	public ArrayList<String> MergedQuestionList = null;
 19 | 	public ArrayList<Sentence> sList  = null;
 20 | 	
 21 | 	public String queryId = null;
 22 | 	public String preLog = "";
 23 | 	
 24 | 	public ArrayList<MergedWord> mWordList = null;
 25 | 	
 26 | 	public Query(){}
 27 | 	public Query(String _question)
 28 | 	{
 29 | 		NLQuestion = _question;
 30 | 		NLQuestion = removeQueryId(NLQuestion);
 31 | 				
 32 | 		TransferedQuestion = getTransferedQuestion(NLQuestion);	
 33 | 		
 34 | 		// step1. NODE Recognition
 35 | 		MergedQuestionList = getMergedQuestionList(TransferedQuestion);
 36 | 		
 37 | 		// build Sentence
 38 | 		sList = new ArrayList<Sentence>();
 39 | 		for(String mergedQuestion: MergedQuestionList)
 40 | 		{
 41 | 			Sentence sentence = new Sentence(this, mergedQuestion);
 42 | 			sList.add(sentence);
 43 | 		}
 44 | 	}
 45 | 	
 46 | 	public boolean isDigit(char ch)
 47 | 	{
 48 | 		if(ch>='0' && ch<='9')
 49 | 			return true;
 50 | 		return false;
 51 | 	}
 52 | 	
 53 | 	public boolean isUpperWord(char ch)
 54 | 	{
 55 | 		if(ch>='A' && ch<='Z')
 56 | 			return true;
 57 | 		return false;
 58 | 	}
 59 | 	
 60 | 	/**
 61 | 	 * some words -> equivalent words
 62 | 	 * 1、stanfordParser often parse incorrect.
 63 | 	 * 2、Synonyms unify. eg, movie->film
 64 | 	 * @param question
 65 | 	 * @return transfered question
 66 | 	 */
 67 | 	public String getTransferedQuestion(String question)
 68 | 	{
 69 | 		//rule1: discard ".", because "." and "_" will be disconnected by parser. Discard word tail's "'", which may pollutes NER
 70 | 		question = question.replace("' ", " ");
 71 | 		String [] words = question.split(" ");
 72 | 		String ret = "";
 73 | 		for(String word: words)
 74 | 		{
 75 | 			String retWord = word;
 76 | 			//TODO: now just check NUM in head/tail
 77 | 			if(word.length()>=2 && !isDigit(word.charAt(0)) && !isDigit(word.charAt(word.length()-1)))
 78 | 			{
 79 | 				retWord = retWord.replace(".", "");
 80 | 			}
 81 | 			ret += retWord + " ";
 82 | 		}
 83 | 		if(ret.length()>1)
 84 | 			ret = ret.substring(0,ret.length()-1);
 85 | 		
 86 | 		ret = ret.replace("-", " ");
 87 | 		ret = ret.replace("in america", "in United States");
 88 | 		
 89 | 		//rule2: as well as -> and
 90 | 		ret = ret.replace("as well as", "and");
 91 | 		
 92 | 		//rule3: movie -> film
 93 | 		ret = ret.replace(" movie", " film");
 94 | 		ret = ret.replace(" movies", " films");
 95 | 		
 96 | 		return ret;
 97 | 	}
 98 | 	
 99 | 	/**
100 | 	 * Recognize entity & type & literal in KB and replace " " in Phrases with "_"
101 | 	 * @param question
102 | 	 * @return merged question list
103 | 	 */
104 | 	public ArrayList<String> getMergedQuestionList(String question)
105 | 	{
106 | 		ArrayList<String> mergedQuestionList = null;
107 | 		//entity & type recognize
108 | 		EntityRecognition er = new EntityRecognition(); 
109 | 		mergedQuestionList = er.process(question);
110 | 		preLog = er.preLog;
111 | 		mWordList = er.mWordList;
112 | 
113 | 		return mergedQuestionList;
114 | 	}
115 | 	
116 | 	public String removeQueryId(String question)
117 | 	{
118 | 		String ret = question;
119 | 		int st = question.indexOf("\t");
120 | 		if(st!=-1 && question.length()>1 && question.charAt(0)>='0' && question.charAt(0)<='9')
121 | 		{
122 | 			queryId = question.substring(0,st);
123 | 			ret = question.substring(st+1);
124 | 			System.out.println("Extract QueryId :"+queryId);
125 | 		}
126 | 		return ret;
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/src/log/QueryLogger.java:
--------------------------------------------------------------------------------
  1 | package log;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | 
  8 | import javax.servlet.http.HttpServletRequest;
  9 | 
 10 | import qa.Matches;
 11 | import qa.Query;
 12 | import rdf.EntityMapping;
 13 | import rdf.SemanticRelation;
 14 | import rdf.Sparql;
 15 | import rdf.MergedWord;
 16 | import rdf.SemanticUnit;
 17 | import qa.Answer;
 18 | import nlp.ds.Sentence;
 19 | import nlp.ds.Word;
 20 | 
 21 | public class QueryLogger {
 22 | 	public Sentence s = null;
 23 | 	public String ipAdress = null;
 24 | 	
 25 | 	public Word target = null;
 26 | 	public Sparql sparql = null;
 27 | 	public Matches match = null;
 28 | 	public ArrayList<Answer> answers = null;	
 29 | 	
 30 | 	public boolean MODE_debug = false;
 31 | 	public boolean MODE_log = true;
 32 | 	public boolean MODE_fragment = true;
 33 | 	public boolean isMaltParserUsed = true;	// Notice, we utilize Malt Parser as default parser, which is different from the older version. TODO: some coref rules need changed to fit Malt Parser.
 34 | 	
 35 | 	public HashMap<String, Integer> timeTable = null;
 36 | 	public ArrayList<MergedWord> mWordList = null;
 37 | 	public ArrayList<SemanticUnit> semanticUnitList = null;
 38 | 	public HashMap<Integer, SemanticRelation> semanticRelations = null;
 39 | 	public HashMap<Integer, SemanticRelation> potentialSemanticRelations = null;
 40 | 	public HashMap<Word, ArrayList<EntityMapping>> entityDictionary = null;
 41 | 	public ArrayList<Sparql> rankedSparqls = null;
 42 | 		
 43 | 	public String NRlog = "";
 44 | 	public String SQGlog = "";
 45 | 	public int gStoreCallTimes = 0;
 46 | 	
 47 | 	public QueryLogger (Query query) 
 48 | 	{
 49 | 		timeTable = new HashMap<String, Integer>();
 50 | 		rankedSparqls = new ArrayList<Sparql>();
 51 | 		mWordList = query.mWordList;
 52 | 	}
 53 | 	
 54 | 	public void reloadSentence(Sentence sentence)
 55 | 	{
 56 | 		this.s = sentence;
 57 | 		if(this.semanticUnitList != null)
 58 | 			this.semanticUnitList.clear();
 59 | 		if(this.semanticRelations != null)
 60 | 			this.semanticRelations.clear();
 61 | 		if(this.rankedSparqls != null)
 62 | 			this.rankedSparqls.clear();
 63 | 	}
 64 | 		
 65 | 	// Source code: http://edu.21cn.com/java/g_189_755584-1.htm
 66 | 	public static String getIpAddr(HttpServletRequest request) {
 67 | 		String ip = request.getHeader("x-forwarded-for");
 68 | 		if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
 69 | 			ip = request.getHeader("Proxy-Client-IP");
 70 | 		}
 71 | 		if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
 72 | 			ip = request.getHeader("WL-Proxy-Client-IP");
 73 | 		}
 74 | 		if(ip == null || ip.length() == 0 || "unknown".equalsIgnoreCase(ip)) {
 75 | 			ip = request.getRemoteAddr();
 76 | 		}
 77 | 		
 78 | 		int idx;
 79 | 		if((idx = ip.indexOf(',')) != -1) {
 80 | 			ip = ip.substring(0, idx);
 81 | 		}
 82 | 		return ip;
 83 | 	}
 84 | 	
 85 | 	public void reviseAnswers()
 86 | 	{	
 87 | 		System.out.println("Revise Answers:");
 88 | 		answers = new ArrayList<Answer>();
 89 | 		if (match == null || sparql == null || match.answers == null || sparql.questionFocus == null)
 90 | 			return;
 91 | 		
 92 | 		HashSet<Answer> answerSet = new HashSet<Answer>();
 93 | 		String questionFocus = sparql.questionFocus;
 94 | 		String sparqlString = sparql.toStringForGStore();		
 95 | 		//System.out.println("mal="+match.answers.length);
 96 | 		for (int i=0;i<match.answers.length;i++)
 97 | 		{
 98 | 			Answer ans = new Answer(questionFocus, match.answers[i]);
 99 | 			if (!sparqlString.contains(ans.questionFocusValue))
100 | 				answerSet.add(ans);
101 | 		}
102 | 		
103 | 		for (Answer ans : answerSet)
104 | 			answers.add(ans);	
105 | 		
106 | 		Collections.sort(answers);
107 | 	}
108 | 	
109 | 	
110 | }
111 | 


--------------------------------------------------------------------------------
/src/application/GanswerHttpConnector.java:
--------------------------------------------------------------------------------
  1 | package application;
  2 | 
  3 | import java.io.*;
  4 | import java.net.*;
  5 | import java.lang.*;
  6 | import java.net.URL;
  7 | import java.net.URLConnection;
  8 | import java.net.URLEncoder;
  9 | import java.net.URLDecoder;
 10 | import java.io.UnsupportedEncodingException;
 11 | import java.util.List;
 12 | import java.util.Map;
 13 | 
 14 | public class GanswerHttpConnector {
 15 | 	public static final String defaultServerIP = "127.0.0.1";
 16 |     public static final int defaultServerPort = 9999;
 17 |     
 18 |     private String serverIP;
 19 |     private int serverPort;
 20 |     
 21 |     public GanswerHttpConnector() {
 22 |         this.serverIP = GanswerHttpConnector.defaultServerIP;
 23 |         this.serverPort = GanswerHttpConnector.defaultServerPort;
 24 |     }
 25 | 
 26 |     public GanswerHttpConnector(int _port) {
 27 |         this.serverIP = GanswerHttpConnector.defaultServerIP;
 28 |         this.serverPort = _port;
 29 |     }
 30 | 
 31 |     public GanswerHttpConnector(String _ip, int _port) {
 32 |         this.serverIP = _ip;
 33 |         this.serverPort = _port;
 34 |     }
 35 |     
 36 |     public String sendGet(String param,String context) {
 37 | 		String url = "http://" + this.serverIP + ":" + this.serverPort+context;
 38 |         StringBuffer result = new StringBuffer();
 39 |         BufferedReader in = null;
 40 | 		System.out.println("parameter: "+param);
 41 | 
 42 | 		try {
 43 | 			param = URLEncoder.encode(param, "UTF-8");
 44 | 		}
 45 | 		catch (UnsupportedEncodingException ex) {
 46 | 			throw new RuntimeException("Broken VM does not support UTF-8");
 47 | 		}
 48 | 
 49 |         try {
 50 |         	//careful: if you encode the "?data=" part, jetty may not accept such a encoding
 51 |             String urlNameString = url + "/?data=" + param;
 52 |             System.out.println("request: "+urlNameString);
 53 |             URL realUrl = new URL(urlNameString);
 54 |             URLConnection connection = realUrl.openConnection();
 55 |             connection.setRequestProperty("accept", "*/*");
 56 |             connection.setRequestProperty("connection", "Keep-Alive");
 57 | 			//set agent to avoid: speed limited by server if server think the client not a browser
 58 |             connection.setRequestProperty("user-agent",
 59 |                     "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
 60 |             connection.connect();
 61 | 
 62 |             in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "utf-8"));
 63 |             String line;
 64 |             while ((line = in.readLine()) != null) {
 65 | 
 66 | 				result.append(line+"\n");
 67 | 
 68 |             }
 69 | 
 70 |         } catch (Exception e) {
 71 |             System.out.println("error in get request: " + e);
 72 |             e.printStackTrace();
 73 |         }
 74 |         finally {
 75 |             try {
 76 |                 if (in != null) {
 77 |                     in.close();
 78 |                 }
 79 |             } catch (Exception e2) {
 80 |                 e2.printStackTrace();
 81 |             }
 82 |         }
 83 |         return result.toString();
 84 |     }
 85 |     
 86 |     public String gSolve(String data){
 87 |     	String param = data;
 88 |     	String rst = sendGet(param,"/gSolve");
 89 |     	System.out.println(rst);
 90 |     	return rst;
 91 |     }
 92 |     
 93 |     public String gInfo(){
 94 |     	String param = "";
 95 |     	String rst = sendGet(param,"/gInfo");
 96 |     	System.out.println(rst);
 97 |     	return rst;
 98 |     }
 99 |     
100 |     public static void main(String[] args){
101 |     	GanswerHttpConnector ghc = new GanswerHttpConnector();
102 |     	String data = "{\"maxAnswerNum\":\"3\",\"needSparql\":\"2\",\"question\":\"Who is the president of China?\"}";
103 |     	ghc.gInfo();
104 |     	ghc.gSolve(data);
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/README_CH.md:
--------------------------------------------------------------------------------
 1 | # GAnswer系统
 2 | 
 3 | GAnswer系统是由北京大学计算机技术研究所数据管理实验室邹磊老师牵头开发的自然语言问答系统。gAnswer能够将自然语言问题转化成包含语义信息的查询图，然后，将查询图转化成标准的SPARQL查询，并将这些查询在图数据库中执行，最终得到用户的答案。我们使用数据驱动的消歧方式，具体来讲，在生成查询图的阶段保留多种实体和谓词的链接方案，在查询执行的阶段根据匹配情况消除歧义（错误链接）。
 4 | 
 5 | 这是TKDE 2018论文 [Answering Natural Language Questions by Subgraph Matching over Knowledge Graphs](docs/TKDE18_gAnswer.pdf) 的代码实现。
 6 | 
 7 | **帮助文档请点击此处 [中文(ZH)](docs/gAnswer_help.pdf) [English](docs/gAnswer_help_en.pdf)**
 8 | 
 9 | ## 快速开始
10 | 首先您需要从[此处](https://pan.baidu.com/s/1LHGO0cU5et5o5nQWc3UvVg)下载系统必需的数据文件dbpedia16.rar，提取码为1mcr，您需要将其解压到data文件夹下。
11 | 由于完整的数据文件需要较大的内存支持（20GB），您也可以选择下载我们从DBpedia 2016中抽取生成的[小规模数据](https://pan.baidu.com/s/1Txe_cwpuoohJXH70yfxB-Q)（需要5GB内存），提取码为zuue。注意，问答系统在小规模数据集上的能力是受限的，在[官网](http://ganswer.gstore-pku.com/)的问题样例和QALD系列问题之外，您可能需要根据数据文件自行选择合适的问题来进行测试。
12 | 
13 | 如果您希望建立您自己的gstore服务，您可以从[此处](https://pan.baidu.com/s/1jQ_jGTniflzoqBhpz5tjnw)下载建库用的dbpedia三元组，提取码为rpev。
14 | 
15 | ### 使用jar包部署
16 | 我们推荐您使用我们提供的打包好的jar文件部署gAnswer，具体步骤为：
17 | 
18 | - 下载Ganswer.jar与dbpedia16.rar两个文件，我们推荐您从github的release页面下载最新版的Ganswer.jar，以保证稳定性。
19 | - 在控制台下解压Ganswer.jar
20 | ```java
21 | jar -xvf Ganswer.jar
22 | ```
23 | - 您应该解压到主工程目录下，请保证Ganswer.jar文件与解压得到的文件处在同一路径下。
24 | - 在控制台下解压dbpedia16.rar，注意，这时，您需要把解压得到的文件置于Ganswer.jar文件所在的路径的data文件夹下。下方的示例默认dbpedia16.rar与Ganswer.jar已经处于同一文件夹下。
25 | ```java
26 | unrar x DBpedia2016.rar ./data/
27 | ```
28 | - 这时，您现在的工程目录结构应该是这样的：
29 | >Main_project_directory
30 | >>Ganswer.jar<br />
31 | >>unzipped files from Ganswer.jar<br />
32 | >>data
33 | >>>unzipped files from dbpedia16.rar<br />
34 | - 在控制台下运行jar包。
35 | ```java
36 | java -jar Ganswer.jar
37 | ```
38 | - 等待系统初始化结束，出现Server Ready！字样后，则说明初始化成功，您可以开始通过Http请求访问gAnswer的服务了。
39 | 
40 | ### 通过http请求使用GAnswer
41 | 我们为您提供了一个简单的样例，以说明如何通过http请求，获取GAnswer服务。
42 | 您可以通过类似下面的url来访问GAnswer：
43 | http://[ip]:[port]/gSolve/?data={maxAnswerNum:1, maxSparqlNum:2, question:Who is the wife of Donald Trump?}
44 | <br />其中，[ip]和[port]分别为您启动GAnswer服务的ip地址和端口（端口系统默认为9999），您需要通过在http请求中添加“data”参数，传递一个json字符串给GAnswer。
45 | 在这个样例中，您实际传递的json数据为：
46 | ```json
47 | {
48 |   "maxAnswerNum":"1",
49 |   "maxSparqlNum":"2",
50 |   "question":"Whos is the wife of Donald Trump?"
51 | }
52 | ```
53 | 其中，maxAnswerNum和maxSparqlNum分别规定了返回的答案和sparql的数量上限，这两个数据项都是可选的。
54 | 一般情况下，这时GAnswer会返回一个json字符串，其中包含了系统生成的sparql和问题答案。
55 | ```json
56 | {
57 |   "question":"Who is the wife of Donald Trump?",
58 |   "vars":["?wife"],
59 |   "sparql":["select DISTINCT ?wife  where { <Donald_Trump>\t<spouse>\t?wife. } LIMIT 1","select DISTINCT ?wife  where { ?wife\t<spouse>\t<Donald_Trump>. } LIMIT 1"],
60 |   "results":{"bindings":[{"?wife":{"type":"uri","value":"<Ivana_Trump>"}}]},
61 |   "status":"200"
62 | }
63 | ```
64 | 详细信息可以在帮助文档的‘“2.1.1 开始使用”’一章找到。
65 | 
66 | 
67 | 
68 | ### 使用eclipse运行
69 | 当您使用eclipse运行gAnswer系统时，只需要通过clone或者download获取工程源码，然后按正常步骤导入Eclipse工程，同时将lib中的jar包加入Build Path中即可。由于外部jar包过大，无法上传github，您可以从[此处](https://pan.baidu.com/s/18IegmEgj02fF9KQFwaQr0g)下载所有需要的外部jar包,提取码为64jd。或者通过[Google Drive](https://drive.google.com/file/d/1tEsi4pBOBHd2gmwVgIOgt-ypJZQH9G3S)下载。
70 | 这时，您同样需要下载解压dbpedia16.rar,并解压到工程文件根目录下的data文件夹中。与数据路径相关的参数，您可以在qa.Globals.localPath中找到
71 | 
72 | ### 注意事项
73 | 要运行gAnswer系统，需要较多的包依赖、文件依赖和外部接口依赖，关于这部分要求，请您参阅帮助文档的“2.4 安装指南”。
74 | 在生成SPARQL查询后，系统默认调用部署在远程服务器上的gStore查询引擎来查找答案。这意味着额外的网络传输开销和可能存在的排队等待开销。
75 | 因此我们强烈建议您在自己的服务器上部署gStore查询引擎并建立对应的知识库。您需要：
76 | 
77 | - 下载[DBpedia2016 triples文件](https://pan.baidu.com/s/1l5Oui65sDn8QPYmA0rUvuA)，提取码89yy。
78 | - 部署[gStore](http://gstore-pku.com)查询引擎，并使用下载的triples文件来构建数据库。值得提醒的是，DBpedia 2016 triples文件大小为9.9GB，构建数据库需要较大的内存(>10GB)和较长的时间(10小时左右)。
79 | 
80 | ## 其他事项
81 | 
82 | 我们非常欢迎您使用gAnswer，并向我们提出您的宝贵意见或者bug报告。
83 | 
84 | 如果您的意见或者报告被采纳，我们会将您的贡献记录在我们的帮助文档中。
85 | 
86 | 我们针对QA任务和gAnswer系统发表了多篇论文，您可以在帮助文档的“3.2 出版物”一章找到相关信息。
87 | 
88 | 
89 | ## 在gAnswer上使用你自己的数据
90 | 如果您希望将您自己的三元组数据集移植到gAnswer上，那么您需要利用这些三元组为gAnswer重新生成fragments。 我们提供了一个[详细的教程](genrate_fragments/How_to_generate_fragments.md)来帮助您完成这项工作。
91 | 


--------------------------------------------------------------------------------
/src/qa/Globals.java:
--------------------------------------------------------------------------------
  1 | package qa;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.IOException;
  5 | import java.io.InputStreamReader;
  6 | 
  7 | import lcn.EntityFragmentFields;
  8 | import fgmt.RelationFragment;
  9 | import fgmt.TypeFragment;
 10 | import paradict.ParaphraseDictionary;
 11 | import qa.mapping.DBpediaLookup;
 12 | import nlp.tool.NERecognizer;
 13 | import nlp.tool.CoreNLP;
 14 | import nlp.tool.MaltParser;
 15 | import nlp.tool.StanfordParser;
 16 | import nlp.tool.StopWordsList;
 17 | 
 18 | public class Globals {
 19 | 	// nlp tools
 20 | 	public static CoreNLP coreNLP;
 21 | 	public static StanfordParser stanfordParser;
 22 | 	public static StopWordsList stopWordsList;
 23 | 	public static MaltParser maltParser;
 24 | 	public static NERecognizer nerRecognizer;
 25 | 	// relation paraphrase dictionary
 26 | 	public static ParaphraseDictionary pd;
 27 | 	// entity linking system
 28 | 	public static DBpediaLookup dblk;
 29 | 	public static int MaxAnswerNum = 100;
 30 | 	public static String Dataset = "dbpedia 2016";
 31 | 	public static String Version = "0.1.2";
 32 | 	public static String GDBsystem = "gStore v0.7.2";
 33 | 	
 34 | 	/*
 35 | 	 * evaluationMethod:
 36 | 	 * 1. baseline(SQG), does not allow CIRCLE and WRONG edge. The structure may be different by changing the TARGET.
 37 | 	 * 2. super SQG, allow CIRCLE and WRONG edge. The structure is decided by DS tree, and can be changed in query evaluation(TOP-K match) stage. 
 38 | 	 * */
 39 | 	public static int evaluationMethod = 2; 
 40 | 	
 41 | 	public static String localPath = "./././";
 42 | 	public static String QueryEngineIP = "dbpedia16.gstore-pku.com";	// Notice, PORT number is in the evaluation function.
 43 | 	public static int QueryEnginePort = 80;
 44 | 	
 45 | 	public static void init () 
 46 | 	{
 47 | 		System.out.println("====== gAnswer2.0 over DBpedia ======");
 48 | 
 49 | 		long t1, t2, t3, t4, t5, t6, t7, t8, t9;
 50 | 		
 51 | 		t1 = System.currentTimeMillis();
 52 | 		coreNLP = new CoreNLP();
 53 | 		
 54 | 		t2 = System.currentTimeMillis();
 55 | 		stanfordParser = new StanfordParser();
 56 | 		
 57 | 		t3 = System.currentTimeMillis();
 58 | 		maltParser = new MaltParser();
 59 | 		
 60 | 		t4 = System.currentTimeMillis();
 61 | 		nerRecognizer = new NERecognizer();
 62 | 		
 63 | 		t5 = System.currentTimeMillis();
 64 | 		stopWordsList = new StopWordsList();
 65 | 		
 66 | 		t6 = System.currentTimeMillis();
 67 | 		pd = new ParaphraseDictionary();
 68 | 		
 69 | 		t7 = System.currentTimeMillis();
 70 | 		try 
 71 | 		{	
 72 | 			EntityFragmentFields.load();
 73 | 			RelationFragment.load();
 74 | 			TypeFragment.load();
 75 | 		} 
 76 | 		catch (Exception e1) {
 77 | 			System.out.println("EntityIDs and RelationFragment and TypeFragment loading error!");
 78 | 			e1.printStackTrace();
 79 | 		}
 80 | 		
 81 | 		t8 = System.currentTimeMillis();
 82 | 		dblk = new DBpediaLookup();
 83 | 		
 84 | 		t9 = System.currentTimeMillis();
 85 | 		System.out.println("======Initialization======");
 86 | 		System.out.println("CoreNLP(Lemma): " + (t2-t1) + "ms.");
 87 | 		System.out.println("StanfordParser: " + (t3-t2) + "ms.");
 88 | 		System.out.println("MaltParser: " + (t4-t3) + "ms.");
 89 | 		System.out.println("NERecognizer: " + (t5-t4) + "ms.");
 90 | 		System.out.println("StopWordsList: " + (t6-t5) + "ms.");
 91 | 		System.out.println("ParaphraseDict & posTagPattern: " + (t7-t6) + "ms.");
 92 | 		System.out.println("GraphFragments: " + (t8-t7) + "ms.");
 93 | 		System.out.println("DBpediaLookup: " + (t9-t8) + "ms.");
 94 | 		System.out.println("* Total *: " + (t9-t1) + "ms.");
 95 | 		System.out.println("==========================");
 96 | 	}
 97 | 
 98 | 	
 99 | 	/**
100 | 	 * Use as system("pause") in C
101 | 	 */
102 | 	public static void systemPause () {
103 | 		System.out.println("System pause ...");
104 | 		BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
105 | 		try {
106 | 			br.readLine();
107 | 		} catch (IOException e) {
108 | 			e.printStackTrace();
109 | 		}
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/src/lcn/BuildIndexForEntityFragments.java:
--------------------------------------------------------------------------------
  1 | package lcn;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.File;
  5 | import java.io.FileInputStream;
  6 | import java.io.InputStreamReader;
  7 | import java.util.Date;
  8 | 
  9 | import org.apache.lucene.analysis.Analyzer;
 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
 11 | import org.apache.lucene.document.Document;
 12 | import org.apache.lucene.document.Field;
 13 | import org.apache.lucene.index.IndexWriter;
 14 | 
 15 | import qa.Globals;
 16 | 
 17 | 
 18 | public class BuildIndexForEntityFragments{
 19 | 	public void indexforentity() throws Exception
 20 | 	{
 21 | 		if(EntityFragmentFields.entityId2Name == null)
 22 | 			EntityFragmentFields.load();
 23 | 		
 24 | 		long startTime = new Date().getTime();
 25 | 		
 26 | 		//Try update KB index to DBpedia2015. by husen 2016-04-08
 27 | 		//Try update KB index to DBpedia2016. by husen 2018-8-22
 28 | 		File indexDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/lucene/entity_fragment_index");
 29 | 		File sourceDir_en = new File("D:/husen/gAnswer/data/DBpedia2016/fragments/entity_RDF_fragment/16entity_fragment.txt");
 30 | 		
 31 | 		Analyzer luceneAnalyzer_en = new StandardAnalyzer();  
 32 | 		IndexWriter indexWriter_en = new IndexWriter(indexDir_en, luceneAnalyzer_en,true); 
 33 | 		
 34 | 		int mergeFactor = 100000;    //default 10
 35 | 		int maxBufferedDoc = 1000;   //default 10
 36 | 		int maxMergeDoc = Integer.MAX_VALUE;  //INF
 37 | 		
 38 | 		//indexWriter.DEFAULT_MERGE_FACTOR = mergeFactor;
 39 | 		indexWriter_en.setMergeFactor(mergeFactor);
 40 | 		indexWriter_en.setMaxBufferedDocs(maxBufferedDoc);
 41 | 		indexWriter_en.setMaxMergeDocs(maxMergeDoc);		
 42 | 		
 43 | 		
 44 | 		FileInputStream file = new FileInputStream(sourceDir_en);		
 45 | 		InputStreamReader in = new InputStreamReader(file,"UTF-8");	
 46 | 		BufferedReader br = new BufferedReader(in);		
 47 | 		
 48 | 		int count = 0;
 49 | 		while(true)
 50 | 		{			
 51 | 			String _line = br.readLine();
 52 | 			{
 53 | 				if(_line == null) break;
 54 | 			}
 55 | 			count++;
 56 | 			if(count % 100000 == 0)
 57 | 				System.out.println(count);				
 58 | 			
 59 | 			String line = _line;		
 60 | 			String temp[] = line.split("\t");
 61 | 			
 62 | 			if(temp.length != 2)
 63 | 				continue;
 64 | 			else
 65 | 			{
 66 | 				int entity_id = Integer.parseInt(temp[0]);
 67 | 				if(!EntityFragmentFields.entityId2Name.containsKey(entity_id))
 68 | 					continue;
 69 | 				
 70 | 				String entity_name = EntityFragmentFields.entityId2Name.get(entity_id);
 71 | 				String entity_fragment = temp[1];
 72 | 				entity_name = entity_name.replace("____", " ");
 73 | 				entity_name = entity_name.replace("__", " ");
 74 | 				entity_name = entity_name.replace("_", " ");
 75 | 			
 76 | 					
 77 | 				Document document = new Document(); 
 78 | 				
 79 | 				Field EntityName = new Field("EntityName", entity_name, Field.Store.YES,
 80 | 						Field.Index.TOKENIZED,
 81 | 						Field.TermVector.WITH_POSITIONS_OFFSETS);	
 82 | 				Field EntityId = new Field("EntityId", String.valueOf(entity_id),
 83 | 						Field.Store.YES, Field.Index.NO);
 84 | 				Field EntityFragment = new Field("EntityFragment", entity_fragment,
 85 | 						Field.Store.YES, Field.Index.NO);
 86 | 				
 87 | 				document.add(EntityName);
 88 | 				document.add(EntityId);
 89 | 				document.add(EntityFragment);
 90 | 				indexWriter_en.addDocument(document);
 91 | 			}			
 92 | 		}
 93 | 		
 94 | 		indexWriter_en.optimize();
 95 | 		indexWriter_en.close();
 96 | 		br.close();
 97 | 
 98 | 		// input the time of Build index
 99 | 		long endTime = new Date().getTime();
100 | 		System.out.println("entity_name index has build ->" + count + " " + "Time:" + (endTime - startTime));
101 | 	}
102 | 	
103 | 	public static void main(String[] args)
104 | 	{
105 | 		BuildIndexForEntityFragments bef = new BuildIndexForEntityFragments();
106 | 		
107 | 		try
108 | 		{
109 | 			Globals.localPath="D:/husen/gAnswer/";
110 | 			bef.indexforentity();
111 | 		}
112 | 		catch (Exception e) 
113 | 		{
114 | 			e.printStackTrace();
115 | 		}
116 | 	}
117 | }
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/src/nlp/ds/DependencyTreeNode.java:
--------------------------------------------------------------------------------
  1 | package nlp.ds;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.Comparator;  
  6 | import java.util.Stack;
  7 | 
  8 | public class DependencyTreeNode {
  9 | 	public Word word = null;
 10 | 	public String dep_father2child = null;
 11 | 	
 12 | 	public DependencyTreeNode father = null;
 13 | 	public ArrayList<DependencyTreeNode> childrenList = null;
 14 | 	
 15 | 	public int levelInTree = -1;
 16 | 	
 17 | 	/**
 18 | 	 * The constructor for knowing its father
 19 | 	 * 
 20 | 	 * @param w
 21 | 	 * @param dep_father2child
 22 | 	 * @param father
 23 | 	 */
 24 | 	public DependencyTreeNode(Word w, String dep_father2child, DependencyTreeNode father) 
 25 | 	{
 26 | 		word = w;
 27 | 		this.dep_father2child = dep_father2child;
 28 | 		this.father = father;
 29 | 		this.childrenList = new ArrayList<DependencyTreeNode>();
 30 | 		
 31 | 		if(father==null) levelInTree = 0;
 32 | 		else levelInTree = father.levelInTree+1;
 33 | 	}
 34 | 
 35 | 	/**
 36 | 	 * The constructor for not knowing the father
 37 | 	 * 
 38 | 	 * @param word
 39 | 	 */
 40 | 	public DependencyTreeNode(Word w)
 41 | 	{
 42 | 		this.word = w;
 43 | 		this.childrenList = new ArrayList<DependencyTreeNode>();
 44 | 	}
 45 | 	
 46 | 	public void sortChildrenList () {
 47 | 		childrenList.trimToSize();
 48 | 		Collections.sort(childrenList, new DependencyTreeNodeComparator());
 49 | 	}
 50 | 	
 51 | 	@Override
 52 | 	public String toString(){
 53 | 		return word.originalForm + "-" + word.posTag + "(" + dep_father2child + ")[" + word.position + "]";
 54 | 	}
 55 | 	
 56 | 	public static void sortArrayList(ArrayList<DependencyTreeNode> list) {
 57 | 		Collections.sort(list, new DependencyTreeNodeComparator());
 58 | 	}
 59 | 	
 60 | 	public DependencyTreeNode containDependencyWithChildren (String dep) {
 61 | 		for (DependencyTreeNode son : childrenList) {
 62 | 			if (son.dep_father2child.equals(dep)) return son;
 63 | 		}
 64 | 		return null;
 65 | 	}
 66 | 
 67 | 	/**
 68 | 	 * equal_or_startWith = true:   equal
 69 | 	 * equal_or_startWith = false:  startWith
 70 | 	 * 
 71 | 	 * @param posChild
 72 | 	 * @param equal_or_startWith
 73 | 	 * @return
 74 | 	 */
 75 | 	public DependencyTreeNode containPosInChildren (String posChild, boolean equal_or_startWith) {
 76 | 		for (DependencyTreeNode son : childrenList) {
 77 | 			if (equal_or_startWith) {
 78 | 				if (son.word.posTag.equals(posChild)) return son;
 79 | 			}
 80 | 			else {
 81 | 				if (son.word.posTag.startsWith(posChild)) return son;
 82 | 			}
 83 | 		}
 84 | 		return null;		
 85 | 	}
 86 | 	
 87 | 	public DependencyTreeNode containWordBaseFormInChildren (String wordBaseFormChild) {
 88 | 		for (DependencyTreeNode son : childrenList) {
 89 | 			if (son.word.baseForm.equals(wordBaseFormChild)) return son;
 90 | 		}
 91 | 		return null;		
 92 | 	}
 93 | 	
 94 | 	public DependencyTreeNode getNNTopTreeNode (DependencyTree T) {
 95 | 		if(this.father != null && (this.dep_father2child.equals("nn") || (this.word.posTag.startsWith("NN") && this.dep_father2child.equals("dep")))) {
 96 | 			return this.father.getNNTopTreeNode(T);
 97 | 		}
 98 | 		else return this;
 99 | 	}
100 | 	
101 | 	public Word linkNN(DependencyTree T) {
102 | 		// (Now useless) backtracking the NN connections.
103 | 		ArrayList<DependencyTreeNode> nn = new ArrayList<DependencyTreeNode>();
104 | 		
105 | 		nn.add(this);
106 | 
107 | 		if(this.father != null && (this.dep_father2child.equals("nn") 
108 | 				|| (this.word.posTag.startsWith("NN") && this.dep_father2child.equals("dep") && this.father.word.posTag.startsWith("NN")))) {
109 | 			nn.add(this.father);
110 | 			for(DependencyTreeNode son : this.father.childrenList) {
111 | 				if (son != this && son.dep_father2child.equals("nn")) {
112 | 					nn.add(son);
113 | 				}
114 | 			}
115 | 		}
116 | 		
117 | 		Stack<DependencyTreeNode> stack = new Stack<DependencyTreeNode>();
118 | 		stack.push(this);
119 | 		while (!stack.empty()) {
120 | 			DependencyTreeNode curNode = stack.pop();
121 | 			for(DependencyTreeNode son : curNode.childrenList) {
122 | 				if (son.dep_father2child.equals("nn") 
123 | 						|| (son.word.posTag.startsWith("NN") && son.dep_father2child.equals("dep") && son.father.word.posTag.startsWith("NN"))) {
124 | 					nn.add(son);
125 | 					stack.push(son);
126 | 				}
127 | 			}
128 | 		}
129 | 		
130 | 		DependencyTreeNode.sortArrayList(nn);
131 | 
132 | 		int size = nn.size() - 1;
133 | 		for (int i = 0; i < size; i ++) {
134 | 			nn.get(i).word.nnNext = nn.get(i+1).word;
135 | 			nn.get(i+1).word.nnPrev = nn.get(i).word; 
136 | 		}
137 | 		
138 | 		return this.word.getNnHead();
139 | 	}
140 | 
141 | };
142 | 
143 | 
144 | class DependencyTreeNodeComparator implements Comparator<DependencyTreeNode> {
145 |   
146 |     public int compare(DependencyTreeNode n1, DependencyTreeNode n2) { 
147 |     	return n1.word.position - n2.word.position;
148 |     }  
149 |   
150 | }  
151 | 


--------------------------------------------------------------------------------
/src/addition/AggregationRecognition.java:
--------------------------------------------------------------------------------
  1 | package addition;
  2 | 
  3 | import nlp.ds.DependencyTree;
  4 | import nlp.ds.DependencyTreeNode;
  5 | import nlp.ds.Word;
  6 | import qa.Globals;
  7 | import rdf.Sparql;
  8 | import rdf.Triple;
  9 | import log.QueryLogger;
 10 | 
 11 | public class AggregationRecognition {
 12 | 
 13 | 	// Numbers
 14 |     static String x[]={"zero","one","two","three","four","five","six","seven","eight","nine"};
 15 | 	static String y[]={"ten","eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen","eighteen","nineteen"};
 16 | 	static String z[]={"twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"};
 17 | 	static int b;
 18 | 
 19 |     public static Integer translateNumbers(String str) // 1~100
 20 |     {
 21 |     	int flag;
 22 |     	try {		
 23 |     	     b=Integer.valueOf(str);
 24 |     	     flag=1;
 25 |     	} 
 26 |     	catch (Exception e){
 27 |     	      flag=2;
 28 |     	}
 29 |     	int i,j;
 30 |     	switch(flag)
 31 |     	{
 32 | 			case 1:
 33 | 				return b;          		
 34 | 			case 2:	                     // Words need to be translated into numbers
 35 | 			   for(i=0;i<8;i++)                // 20~99
 36 | 			    {
 37 | 			    	for(j=0;j<10;j++)
 38 | 			    	{
 39 | 			    		String str1=z[i],str2=x[j];
 40 | 			    		if(str.equals((str1))){     
 41 | 			    			return i*10+20; // 1x   		
 42 | 			    	    }       
 43 | 			    		           		
 44 | 			    		else if(str.equals((str1+" "+str2))){
 45 | 			    			return i*10+j+20;
 46 | 			            }     
 47 | 			        }
 48 | 			    }
 49 | 			   
 50 | 				for(i=0;i<10;i++){             
 51 | 					if(str.equals(x[i])){
 52 | 						return i;
 53 | 			     	}            	
 54 | 			     	else if(str.equals(y[i])){
 55 | 			     		return 10+i;
 56 | 			     	}                	
 57 | 				} 
 58 | 				
 59 | 				System.out.println("Warning: Can not Translate Number: " + str);
 60 | 		 }
 61 |     	return 1;
 62 |     }
 63 | 
 64 | 	
 65 | 	public void recognize(QueryLogger qlog)
 66 | 	{
 67 | 		DependencyTree ds = qlog.s.dependencyTreeStanford;
 68 | 		if(qlog.isMaltParserUsed)
 69 | 			ds = qlog.s.dependencyTreeMalt;
 70 | 		
 71 | 		Word[] words = qlog.s.words;
 72 | 		
 73 | 		// how often | how many
 74 | 		if(qlog.s.plainText.indexOf("How many")!=-1||qlog.s.plainText.indexOf("How often")!=-1||qlog.s.plainText.indexOf("how many")!=-1||qlog.s.plainText.indexOf("how often")!=-1)
 75 | 		{
 76 | 			for(Sparql sp: qlog.rankedSparqls)
 77 | 			{
 78 | 				sp.countTarget = true;
 79 | 				//  How many pages does War and Peace have? --> res:War_and_Peace dbo:numberOfPages ?n . 
 80 | 				//	 ?uri dbo:populationTotal ?inhabitants . 
 81 | 				for(Triple triple: sp.tripleList)
 82 | 				{
 83 | 					String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase();
 84 | 					if(p.contains("number") || p.contains("total") || p.contains("calories") || p.contains("satellites"))
 85 | 					{
 86 | 						sp.countTarget = false;
 87 | 					}
 88 | 				}
 89 | 			}
 90 | 		}
 91 | 		
 92 | 		// more than [num] [node]
 93 | 		for(DependencyTreeNode dtn: ds.nodesList)
 94 | 		{
 95 | 			if(dtn.word.baseForm.equals("more"))
 96 | 			{
 97 | 				if(dtn.father!=null && dtn.father.word.baseForm.equals("than"))
 98 | 				{
 99 | 					DependencyTreeNode tmp = dtn.father;
100 | 					if(tmp.father!=null && tmp.father.word.posTag.equals("CD") && tmp.father.father!=null && tmp.father.father.word.posTag.startsWith("N"))
101 | 					{
102 | 						DependencyTreeNode target = tmp.father.father;
103 | 						
104 | 						// Which caves have more than 3 entrances | entranceCount | filter
105 | 						for(Sparql sp: qlog.rankedSparqls)
106 | 						{
107 | 							if(target.father !=null && target.father.word.baseForm.equals("have"))
108 | 							{
109 | 								sp.moreThanStr = "GROUP BY ?" + qlog.target.originalForm + "\nHAVING (COUNT(?"+target.word.originalForm + ") > "+tmp.father.word.baseForm+")";
110 | 							}
111 | 							else
112 | 							{
113 | 								int num = translateNumbers(tmp.father.word.baseForm);
114 | 								sp.moreThanStr = "FILTER (?"+target.word.originalForm+"> " + num + ")";
115 | 							}
116 | 						}
117 | 					}
118 | 				}
119 | 			}
120 | 		}
121 | 		
122 | 		// most
123 | 		for(Word word: words)
124 | 		{
125 | 			if(word.baseForm.equals("most"))
126 | 			{
127 | 				Word modifiedWord = word.modifiedWord;
128 | 				if(modifiedWord != null)
129 | 				{
130 | 					for(Sparql sp: qlog.rankedSparqls)
131 | 					{
132 | 						//  Which Indian company has the most employees? --> ... dbo:numberOfEmployees ?n . || ?employees dbo:company ...
133 | 						sp.mostStr = "ORDER BY DESC(COUNT(?"+modifiedWord.originalForm+"))\nOFFSET 0 LIMIT 1";
134 | 						for(Triple triple: sp.tripleList)
135 | 						{
136 | 							String p = Globals.pd.getPredicateById(triple.predicateID).toLowerCase();
137 | 							if(p.contains("number") || p.contains("total"))
138 | 							{
139 | 								sp.mostStr = "ORDER BY DESC(?"+modifiedWord.originalForm+")\nOFFSET 0 LIMIT 1";
140 | 							}
141 | 						}
142 | 					}
143 | 				}
144 | 			}
145 | 		}
146 | 	}
147 | 	
148 | 	public static void main(String[] args) {
149 | 		System.out.println(translateNumbers("Twelve"));
150 | 		System.out.println(translateNumbers("thirty two"));
151 | 	}
152 | 
153 | }
154 | 


--------------------------------------------------------------------------------
/src/rdf/SemanticQueryGraph.java:
--------------------------------------------------------------------------------
  1 | package rdf;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.HashMap;
  5 | import java.util.HashSet;
  6 | 
  7 | import nlp.ds.Word;
  8 | 
  9 | public class SemanticQueryGraph implements Comparable<SemanticQueryGraph>
 10 | {
 11 | 	public ArrayList<SemanticUnit> semanticUnitList = null;
 12 | 	public HashMap<Integer, SemanticRelation> semanticRelations = new HashMap<>();
 13 | 	public double score = 0;
 14 | 	
 15 | 	public SemanticQueryGraph(ArrayList<SemanticUnit> suList) 
 16 | 	{
 17 | 		semanticUnitList = suList;	//TODO: need copy?
 18 | 		// Calculate Score by a reward function (TODO: using SVM-Rank)
 19 | 	}
 20 | 
 21 | 	public SemanticQueryGraph(SemanticQueryGraph head) 
 22 | 	{
 23 | 		semanticUnitList = new ArrayList<>();
 24 | 		for(SemanticUnit su: head.semanticUnitList)
 25 | 			semanticUnitList.add(su.copy());
 26 | 		score = head.score;
 27 | 	}
 28 | 	
 29 | 	public void connect(SemanticUnit u, SemanticUnit v)
 30 | 	{
 31 | 		if(u.equals(v))
 32 | 			return;
 33 | 		
 34 | 		SemanticUnit su1 = null, su2 = null;
 35 | 		for(SemanticUnit su: this.semanticUnitList)
 36 | 			if(su.equals(u))
 37 | 				su1 = su;
 38 | 			else if(su.equals(v))
 39 | 				su2 = su;
 40 | 		if(su1 != null && su2 != null)
 41 | 			if(!su1.neighborUnitList.contains(su2) && !su2.neighborUnitList.contains(su1))
 42 | 			{
 43 | 				su1.neighborUnitList.add(su2);
 44 | 				su2.neighborUnitList.add(su1);
 45 | 			}
 46 | 	}
 47 | 	
 48 | 	public void merge(SemanticUnit u, SemanticUnit v)
 49 | 	{
 50 | 		SemanticUnit su1 = null, su2 = null;
 51 | 		for(SemanticUnit su: this.semanticUnitList)
 52 | 			if(su.equals(u))
 53 | 				su1 = su;
 54 | 			else if(su.equals(v))
 55 | 				su2 = su;
 56 | 		if(su1 != null && su2 != null)
 57 | 		{
 58 | 			for(SemanticUnit su: this.semanticUnitList)
 59 | 				if(su != su2 && su.neighborUnitList.contains(su1) && !su.neighborUnitList.contains(su2))	//TODO: Notice, now REJECT multi-edges; The hash function of SR should be modified to allow multi-edges.
 60 | 					su.neighborUnitList.add(su2);
 61 | 			
 62 | 			this.semanticUnitList.remove(su1);
 63 | 			su2.neighborUnitList.remove(su1);
 64 | 		}
 65 | 	}
 66 | 
 67 | 	@Override
 68 | 	public int hashCode() {
 69 | 		int code = 0;
 70 | 		for(SemanticUnit su: this.semanticUnitList)
 71 | 			code ^= su.hashCode();
 72 | 		return code;
 73 | 	}
 74 | 	
 75 | 	@Override
 76 | 	public boolean equals(Object o) 
 77 | 	{
 78 | 		if (o instanceof SemanticQueryGraph) 
 79 | 		{
 80 | 			int matchCnt = 0;
 81 | 			for(SemanticUnit su1: ((SemanticQueryGraph) o).semanticUnitList)
 82 | 				for(SemanticUnit su2: this.semanticUnitList)
 83 | 				{
 84 | 					if(su1.equals(su2))
 85 | 					{
 86 | 						if(su1.neighborUnitList.containsAll(su2.neighborUnitList) && su2.neighborUnitList.containsAll(su1.neighborUnitList))
 87 | 							matchCnt++;
 88 | 					}
 89 | 				}
 90 | 			if(matchCnt == ((SemanticQueryGraph) o).semanticUnitList.size() && matchCnt == this.semanticUnitList.size())
 91 | 				return true;
 92 | 		}
 93 | 		return false;
 94 | 	}
 95 | 	
 96 | 	@Override
 97 | 	public int compareTo(SemanticQueryGraph o) 
 98 | 	{
 99 | 		double diff = this.score - o.score;
100 | 		if (diff > 0) return -1;
101 | 		else if (diff < 0) return 1;
102 | 		else return 0;
103 | 	}
104 | 	
105 | 	public boolean isFinalState()
106 | 	{
107 | 		if(semanticUnitList == null || semanticUnitList.isEmpty())
108 | 			return false;
109 | 		
110 | 		// Basic assumption: a final Semantic Query Graph should be Connected.
111 | 		HashSet<SemanticUnit> visited = new HashSet<>();
112 | 		SemanticUnit start = semanticUnitList.get(0);
113 | 		visited.add(start);
114 | 		dfs(start, visited);
115 | 		
116 | 		if(visited.size() == semanticUnitList.size())
117 | 			return true;
118 | 		return false;
119 | 	}
120 | 	
121 | 	private void dfs(SemanticUnit headNode, HashSet<SemanticUnit> visited)
122 | 	{
123 | 		for(SemanticUnit curNode: headNode.neighborUnitList)
124 | 			if(!visited.contains(curNode))
125 | 			{
126 | 				visited.add(curNode);
127 | 				dfs(curNode, visited);
128 | 			}
129 | 			
130 | 		for(SemanticUnit curNode: semanticUnitList)
131 | 		{
132 | 			if(curNode.neighborUnitList.contains(headNode) || headNode.neighborUnitList.contains(curNode))
133 | 			{
134 | 				if(!visited.contains(curNode))
135 | 				{
136 | 					visited.add(curNode);
137 | 					dfs(curNode, visited);
138 | 				}
139 | 			}
140 | 		}
141 | 	}
142 | 
143 | 	public void calculateScore(HashMap<Integer, SemanticRelation> potentialSemanticRelations) 
144 | 	{
145 | 		// 1. entity/type score
146 | 		double entSco = 0;
147 | 		for(SemanticUnit su: this.semanticUnitList)
148 | 		{
149 | 			Word w = su.centerWord;
150 | 			if(w.mayEnt && w.emList.size()>0)
151 | 				entSco += w.emList.get(0).score * 100;
152 | 			if(w.mayType && w.tmList.size()>0)
153 | 				entSco += w.tmList.get(0).score;
154 | 		}
155 | 		// 2. relation score
156 | 		double relSco = 0;
157 | 		int relCnt = 0;
158 | 		for(SemanticUnit su1: this.semanticUnitList)
159 | 			for(SemanticUnit su2: su1.neighborUnitList)
160 | 			{
161 | 				//Deduplicate
162 | 				if(su1.centerWord.position > su2.centerWord.position)
163 | 					continue;
164 | 				
165 | 				relCnt++;
166 | 				int key = su1.centerWord.getNnHead().hashCode() ^ su2.centerWord.getNnHead().hashCode();
167 | 				SemanticRelation sr = potentialSemanticRelations.get(key);
168 | 				if(sr == null)
169 | 					System.err.println("No semantic relation for: " + su1 + " & " + su2);
170 | 				else
171 | 				{
172 | 					relSco += sr.predicateMappings.get(0).score;
173 | 					semanticRelations.put(key, sr);
174 | 				}
175 | 			}
176 | 		relSco/=relCnt;	//average
177 | 		this.score = entSco + relSco;
178 | 	}
179 | }
180 | 


--------------------------------------------------------------------------------
/src/rdf/SemanticRelation.java:
--------------------------------------------------------------------------------
  1 | package rdf;
  2 | 
  3 | import java.util.ArrayList;
  4 | 
  5 | import rdf.SimpleRelation;
  6 | 
  7 | import nlp.ds.Word;
  8 | 
  9 | public class SemanticRelation {
 10 | 	public Word arg1Word = null;
 11 | 	public Word arg2Word = null;
 12 | 	public String relationParaphrase = null;	// longest match
 13 | 	public double LongestMatchingScore = 0;		// longest match score
 14 | 	
 15 | 	//judge difference when copy semantic relation from special pattern
 16 | 	public int arg1SuffixId = 0;
 17 | 	public int arg2SuffixId = 0;
 18 | 	
 19 | 	public Word arg1Word_beforeCRR = null;
 20 | 	public Word arg2Word_beforeCRR = null;
 21 | 	
 22 | 	public ArrayList<PredicateMapping> predicateMappings = null;
 23 | 
 24 | 	public boolean isArg1Constant = false;
 25 | 	public boolean isArg2Constant = false;
 26 | 	
 27 | 	public char extractingMethod = ' ';	// S: StanfordParser; M: MaltParser; N: N-gram; R: rules
 28 | 	
 29 | 	public SemanticRelation dependOnSemanticRelation = null;
 30 | 	public Word preferredSubj = null;
 31 | 	
 32 | 	public boolean isSteadyEdge = true;
 33 | 	
 34 | 	public SemanticRelation(SemanticRelation r2) {
 35 | 		arg1Word = r2.arg1Word;
 36 | 		arg2Word = r2.arg2Word;
 37 | 		relationParaphrase = r2.relationParaphrase;
 38 | 		LongestMatchingScore = r2.LongestMatchingScore;
 39 | 		
 40 | 		arg1SuffixId = r2.arg1SuffixId;
 41 | 		arg2SuffixId = r2.arg2SuffixId;
 42 | 		
 43 | 		arg1Word_beforeCRR = r2.arg1Word_beforeCRR;
 44 | 		arg2Word_beforeCRR = r2.arg2Word_beforeCRR;
 45 | 		
 46 | 		arg1Word.emList = r2.arg1Word.emList;
 47 | 		arg2Word.emList = r2.arg2Word.emList;
 48 | 		predicateMappings = r2.predicateMappings;
 49 | 		
 50 | //		arg1Types = r2.arg1Types;
 51 | //		arg2Types = r2.arg2Types;
 52 | 		
 53 | 		isArg1Constant = r2.isArg1Constant;
 54 | 		isArg2Constant = r2.isArg2Constant;		
 55 | 		
 56 | 		extractingMethod = r2.extractingMethod;
 57 | 		
 58 | 		dependOnSemanticRelation = r2.dependOnSemanticRelation;
 59 | 		preferredSubj = r2.preferredSubj;
 60 | 	}
 61 | 	
 62 | 	public void swapArg1Arg2()
 63 | 	{
 64 | 		Word tmpWord = arg1Word;
 65 | 		arg1Word = arg2Word;
 66 | 		arg2Word = tmpWord;
 67 | 		int tmpSuffixId = arg1SuffixId;
 68 | 		arg1SuffixId = arg2SuffixId;
 69 | 		arg2SuffixId = tmpSuffixId;
 70 | 		tmpWord = arg1Word_beforeCRR;
 71 | 		arg1Word_beforeCRR = arg2Word_beforeCRR;
 72 | 		arg2Word_beforeCRR = tmpWord;
 73 | 		boolean tmpBool = isArg1Constant;
 74 | 		isArg1Constant = isArg2Constant;
 75 | 		isArg2Constant = tmpBool;
 76 | 	}
 77 | 	
 78 | 	public SemanticRelation (SimpleRelation simr) {
 79 | 		if (simr.preferredSubj == null) {
 80 | 			if (simr.arg1Word.compareTo(simr.arg2Word) < 0) {
 81 | 				this.arg1Word = simr.arg1Word;
 82 | 				this.arg2Word = simr.arg2Word;
 83 | 				this.arg1Word_beforeCRR = simr.arg1Word_beforeCRR;
 84 | 				this.arg2Word_beforeCRR = simr.arg2Word_beforeCRR;
 85 | 			}
 86 | 			else {
 87 | 				this.arg1Word = simr.arg2Word;
 88 | 				this.arg2Word = simr.arg1Word;
 89 | 				this.arg1Word_beforeCRR = simr.arg2Word_beforeCRR;
 90 | 				this.arg2Word_beforeCRR = simr.arg1Word_beforeCRR;			
 91 | 			}
 92 | 			this.extractingMethod = simr.extractingMethod;
 93 | 		}
 94 | 		else {
 95 | 			if (simr.arg1Word == simr.preferredSubj) {
 96 | 				this.arg1Word = simr.arg1Word;
 97 | 				this.arg2Word = simr.arg2Word;
 98 | 				this.arg1Word_beforeCRR = simr.arg1Word_beforeCRR;
 99 | 				this.arg2Word_beforeCRR = simr.arg2Word_beforeCRR;
100 | 				this.preferredSubj = simr.preferredSubj;
101 | 			}
102 | 			else {
103 | 				this.arg1Word = simr.arg2Word;
104 | 				this.arg2Word = simr.arg1Word;
105 | 				this.arg1Word_beforeCRR = simr.arg2Word_beforeCRR;
106 | 				this.arg2Word_beforeCRR = simr.arg1Word_beforeCRR;
107 | 				this.preferredSubj = simr.preferredSubj;
108 | 			}
109 | 			this.extractingMethod = simr.extractingMethod;
110 | 		}
111 | 	}
112 | 	
113 | 	@Override
114 | 	public int hashCode() {
115 | 		return arg1Word.hashCode() ^ arg2Word.hashCode() + arg1SuffixId + arg2SuffixId;
116 | 	}
117 | 	
118 | 	@Override
119 | 	public boolean equals(Object o) {
120 | 		if (o instanceof SemanticRelation) {
121 | 			SemanticRelation sr2 = (SemanticRelation) o;
122 | 			if (this.arg1Word.equals(sr2.arg1Word)
123 | 			&&	this.arg2Word.equals(sr2.arg2Word)
124 | 			&&	this.arg1SuffixId == sr2.arg1SuffixId
125 | 			&&	this.arg2SuffixId == sr2.arg2SuffixId
126 | 			&&	this.relationParaphrase.equals(sr2.relationParaphrase)
127 | 			&&	this.LongestMatchingScore == sr2.LongestMatchingScore) {
128 | 				return true;
129 | 			}
130 | 		}
131 | 		return false;
132 | 	}
133 | 	
134 | 	@Override
135 | 	public String toString() {
136 | 		return arg1Word.originalForm + "," + arg2Word.originalForm + "," + relationParaphrase + "," + LongestMatchingScore + "["+extractingMethod+"]";
137 | //		return arg1Word.getFullEntityName() + "," + arg2Word.getFullEntityName() + "," + relationParaphrase + "," + LongestMatchingScore + "["+extractingMethod+"]";
138 | 	}
139 | 	
140 | 	public void normalizeScore()
141 | 	{
142 | 		double maxScore;
143 | 		
144 | 		if (arg1Word.emList!=null && !arg1Word.emList.isEmpty())
145 | 		{
146 | 			maxScore=0.0;
147 | 			for (EntityMapping em : arg1Word.emList)		
148 | 				maxScore = Math.max(maxScore, em.score);
149 | 			for (EntityMapping em : arg1Word.emList)
150 | 				em.score = em.score/maxScore;
151 | 		}
152 | 
153 | 		if (arg2Word.emList!=null && !arg2Word.emList.isEmpty())
154 | 		{
155 | 			maxScore=0.0;
156 | 			for (EntityMapping em : arg2Word.emList)		
157 | 				maxScore = Math.max(maxScore, em.score);
158 | 			for (EntityMapping em : arg2Word.emList)
159 | 				em.score = em.score/maxScore;	
160 | 		}
161 | 		
162 | 		if (predicateMappings!=null && !predicateMappings.isEmpty())
163 | 		{
164 | 			maxScore=0.0;
165 | 			for (PredicateMapping pm : predicateMappings)
166 | 				maxScore = Math.max(maxScore, pm.score);
167 | 			for (PredicateMapping pm : predicateMappings)
168 | 				pm.score = pm.score/maxScore;	
169 | 		}
170 | 	}
171 | }
172 | 


--------------------------------------------------------------------------------
/src/qa/mapping/DBpediaLookup.java:
--------------------------------------------------------------------------------
  1 | package qa.mapping;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.IOException;
  5 | import java.io.InputStreamReader;
  6 | import java.util.ArrayList;
  7 | import java.util.HashMap;
  8 | 
  9 | import lcn.EntityFragmentFields;
 10 | import log.QueryLogger;
 11 | 
 12 | import org.apache.commons.httpclient.HttpClient;
 13 | import org.apache.commons.httpclient.HttpException;
 14 | import org.apache.commons.httpclient.methods.GetMethod;
 15 | 
 16 | import fgmt.EntityFragment;
 17 | import rdf.EntityMapping;
 18 | 
 19 | public class DBpediaLookup {
 20 | 	//There are two websites of the DBpediaLookup online service.
 21 | 	//public static final String baseURL = "http://en.wikipedia.org/w/api.php?action=opensearch&format=xml&limit=10&search=";
 22 | 	public static final String baseURL = "http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?MaxHits=5&QueryString=";
 23 | 	
 24 | 	public HttpClient ctripHttpClient = null;
 25 | 	
 26 | 	//public static final String begin = "<Text xml:space=\"preserve\">";
 27 | 	//public static final String begin = "<Result>\n        <Label>";
 28 | 	public static final String begin = "<Result>\n      <Label>";
 29 | 	public static final int begin_length = begin.length();
 30 | 	//public static final String end = "</Text>";
 31 | 	public static final String end = "</Label>";
 32 | 	public static final int end_length = end.length();
 33 | 	
 34 | 	public static HashMap<String, String>entMentionDict = null;	// TODO: base on redirect data & wikipedia click data to build mention2ent's dictionary, now just manually
 35 | 	
 36 | 	public DBpediaLookup() 
 37 | 	{
 38 | 		ctripHttpClient = new HttpClient();		
 39 | 		ctripHttpClient.setTimeout(3000);
 40 | 		
 41 | 		entMentionDict = new HashMap<String, String>();
 42 | 		entMentionDict.put("Prince_Charles", "Charles,_Prince_of_Wales");
 43 | 	}
 44 | 	
 45 | 	public ArrayList<EntityMapping> getEntityMappings(String searchString, QueryLogger qlog) 
 46 | 	{
 47 | 		ArrayList<String> slist = new ArrayList<String>();
 48 | 		if(entMentionDict.containsKey(searchString))
 49 | 			slist.add(entMentionDict.get(searchString));
 50 | 		else
 51 | 			slist = lookForEntityNames(searchString, qlog);
 52 | 		
 53 | 		if (slist.size() == 0 && searchString.contains(". "))		
 54 | 			slist.addAll(lookForEntityNames(searchString.replaceAll(". ", "."), qlog));		
 55 | 		
 56 | 		ArrayList<EntityMapping> emlist = new ArrayList<EntityMapping>();
 57 | 		
 58 | 		// Now string use "_" as delimiter (original)
 59 | 		String[] sa = searchString.split("_");
 60 | 		int UpperCnt = 0;
 61 | 		for(String str: sa)
 62 | 		{
 63 | 			if( (str.charAt(0)>='A'&&str.charAt(0)<='Z') || (str.charAt(0)>='0'&&str.charAt(0)<='9') )
 64 | 				UpperCnt ++;
 65 | 		}
 66 | 		
 67 | 		System.out.print("DBpediaLookup find: " + slist + ", ");
 68 | 		
 69 | 		int count = 40;
 70 | 		for (String s : slist) 
 71 | 		{
 72 | 			//consider ABBR only when all UPPER; drop when too long edit distance
 73 | 			if(UpperCnt < sa.length && EntityFragment.calEditDistance(s, searchString.replace("_", ""))>searchString.length()/2)
 74 | 				continue;
 75 | 			
 76 | 			int eid = -1;
 77 | 			s = s.replace(" ", "_");
 78 | 			if(EntityFragmentFields.entityName2Id.containsKey(s))
 79 | 			{
 80 | 				eid = EntityFragmentFields.entityName2Id.get(s);
 81 | 				emlist.add(new EntityMapping(eid, s, count));
 82 | 				count -=2 ;
 83 | 			}
 84 | 			else
 85 | 			{
 86 | 				System.out.print("Drop "+s+" because it not in Entity Dictionary. ");
 87 | 			}
 88 | 		}
 89 | 		System.out.println("DBpediaLookup select: " + emlist);
 90 | 		
 91 | 		return emlist;
 92 | 	}
 93 | 	
 94 | 	public ArrayList<String> lookForEntityNames (String searchString, QueryLogger qlog) {
 95 | 		// URL transition: " " -> %20
 96 | 		GetMethod getMethod = new GetMethod((baseURL+searchString).replaceAll(" ", "%20"));
 97 | 		ArrayList<String> ret = new ArrayList<String>();
 98 | 		int statusCode;
 99 | 		
100 | 		try {
101 | 			statusCode = ctripHttpClient.executeMethod(getMethod);
102 | 		} catch (HttpException e) {
103 | 			e.printStackTrace();
104 | 			return ret;
105 | 		} catch (IOException e) {
106 | 			e.printStackTrace();
107 | 			return ret;
108 | 		}
109 | 		
110 | 		if (statusCode!=200) return null;
111 | 		
112 | 		String response = getMethod.getResponseBodyAsString();
113 | 		if (qlog != null && qlog.MODE_debug) {
114 | 			System.out.println("searchString=" + searchString);
115 | 			System.out.println("statusCode=" + statusCode);
116 | 			System.out.println("response=" + getMethod.getResponseBodyAsString());
117 | 		}
118 | 		getMethod.releaseConnection();
119 | 		
120 | 		//System.out.println(response);
121 | 				
122 | 		if (response == null || response.isEmpty())
123 | 			return ret;
124 | 		int idx1  = response.indexOf(begin);
125 | 		while (idx1 != -1) {
126 | 			int idx2 = response.indexOf(end, idx1+begin_length);
127 | 			String ss = response.substring(idx1+begin_length, idx2);
128 | 			ret.add(ss);
129 | 			//System.out.println(ss);
130 | 			idx1 = response.indexOf(begin, idx2 + end_length);
131 | 		}		
132 | 
133 | 		return ret;
134 | 	}
135 | 	
136 | 	public static void main(String argv[]){
137 | 		
138 | 		DBpediaLookup dbplook = new DBpediaLookup();
139 | 		
140 | 		BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
141 | 		try {
142 | 			while (true) {
143 | 				System.out.println("Test DBpediaLookup.");
144 | 				System.out.print("Please input the search string: ");
145 | 				String searchString = br.readLine();
146 | 				try {
147 | 					long t1 = System.currentTimeMillis();
148 | 					ArrayList<String> res = dbplook.lookForEntityNames(searchString, null);
149 | 					long t2 = System.currentTimeMillis();
150 | 					System.out.println(res);
151 | 					System.out.println("time=" + (t2-t1) + "ms");
152 | 				} catch (Exception e) {
153 | 					e.printStackTrace();
154 | 				}
155 | 			}
156 | 		} catch (IOException e) {
157 | 			e.printStackTrace();	
158 | 		}
159 | 
160 | 		
161 | 		return;
162 | 	}
163 | }
164 | 


--------------------------------------------------------------------------------
/src/fgmt/TypeFragment.java:
--------------------------------------------------------------------------------
  1 | package fgmt;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.File;
  5 | import java.io.FileInputStream;
  6 | import java.io.IOException;
  7 | import java.io.InputStreamReader;
  8 | import java.util.ArrayList;
  9 | import java.util.HashMap;
 10 | import java.util.HashSet;
 11 | 
 12 | import qa.Globals;
 13 | 
 14 | 
 15 | public class TypeFragment extends Fragment {
 16 | 
 17 | 	public static HashMap<Integer, TypeFragment> typeFragments = null;
 18 | 	public static HashMap<String, ArrayList<Integer>> typeShortName2IdList = null;
 19 | 	public static HashMap<Integer, String> typeId2ShortName = null;
 20 | 	public static final int NO_RELATION = -24232;
 21 | 	
 22 | 	public static HashSet<String> yagoTypeList = null;
 23 | 	
 24 | 	public HashSet<Integer> inEdges = new HashSet<Integer>();
 25 | 	public HashSet<Integer> outEdges = new HashSet<Integer>();
 26 | 	public HashSet<Integer> entSet = new HashSet<Integer>();
 27 | 	
 28 | 	/*
 29 | 	 * Eliminate some bad YAGO Types which conflict with:
 30 | 	 * 1, ENT: amazon、earth、the_hunger_game、sparkling_wine
 31 | 	 * 2, TYPE: type
 32 | 	 * 3, RELATION: flow、owner、series、shot、part、care
 33 | 	 * 4, others: peace、vice
 34 | 	 */
 35 | 	public static ArrayList<String> stopYagoTypeList = null;
 36 | 	static void loadStopYagoTypeList()
 37 | 	{
 38 | 		stopYagoTypeList = new ArrayList<String>();
 39 | 		stopYagoTypeList.add("Amazon");
 40 | 		stopYagoTypeList.add("Earth");
 41 | 		stopYagoTypeList.add("TheHungerGames");
 42 | 		stopYagoTypeList.add("SparklingWine");
 43 | 		stopYagoTypeList.add("Type");
 44 | 		stopYagoTypeList.add("Flow");
 45 | 		stopYagoTypeList.add("Owner");
 46 | 		stopYagoTypeList.add("Series");
 47 | 		stopYagoTypeList.add("Shot");
 48 | 		stopYagoTypeList.add("Part");
 49 | 		stopYagoTypeList.add("Care");
 50 | 		stopYagoTypeList.add("Peace");
 51 | 		stopYagoTypeList.add("Vice");
 52 | 		stopYagoTypeList.add("Dodo");
 53 | 		stopYagoTypeList.add("CzechFilms");
 54 | 		stopYagoTypeList.add("ChineseFilms");
 55 | 	}
 56 | 	
 57 | 	public TypeFragment(String fgmt, int fid) 
 58 | 	{
 59 | 		fragmentId = fid;
 60 | 		fragmentType = typeEnum.TYPE_FRAGMENT;
 61 | 		
 62 | 		fgmt = fgmt.replace('|', '#');
 63 | 		String[] ss = fgmt.split("#");
 64 | 		String[] nums;
 65 | 		
 66 | 		if (ss[0].length() > 0) {
 67 | 			nums = ss[0].split(",");
 68 | 			for(int i = 0; i < nums.length; i ++) {
 69 | 				if (nums[i].length() > 0) {
 70 | 					inEdges.add(Integer.parseInt(nums[i]));
 71 | 				}
 72 | 			}
 73 | 		}
 74 | 		else {
 75 | 			inEdges.add(NO_RELATION);
 76 | 		}
 77 | 
 78 | 		if (ss.length > 1 && ss[1].length() > 0) {
 79 | 			nums = ss[1].split(",");
 80 | 			for(int i = 0; i < nums.length; i ++) {
 81 | 				if (nums[i].length() > 0) {
 82 | 					outEdges.add(Integer.parseInt(nums[i]));
 83 | 				}
 84 | 			}
 85 | 		}
 86 | 		else {
 87 | 			outEdges.add(NO_RELATION);
 88 | 		}		
 89 | 		
 90 | 		if(ss.length > 2 && ss[2].length() > 0)
 91 | 		{
 92 | 			nums = ss[2].split(",");
 93 | 			for(int i = 0; i < nums.length; i ++) {
 94 | 				if (nums[i].length() > 0) {
 95 | 					entSet.add(Integer.parseInt(nums[i]));
 96 | 				}
 97 | 			}
 98 | 		}
 99 | 	}
100 | 	
101 | 	public static void load() throws Exception 
102 | 	{	
103 | 		String filename = Globals.localPath+"data/DBpedia2016/fragments/class_RDF_fragment/16type_fragment.txt"; 
104 | 		
105 | 		File file = new File(filename);
106 | 		InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8");
107 | 		BufferedReader br = new BufferedReader(in);
108 | 
109 | 		typeFragments = new HashMap<Integer, TypeFragment>();
110 | 		
111 | 		System.out.println("Loading type IDs and Fragments ...");
112 | 		String line;
113 | 		while((line = br.readLine()) != null) {			
114 | 			String[] lines = line.split("\t");
115 | 			TypeFragment tfgmt = null;
116 | 			if(lines[0].length() > 0 && !lines[0].equals("literal")) {
117 | 				int tid = Integer.parseInt(lines[0]);
118 | 				try{tfgmt = new TypeFragment(lines[1], tid);}
119 | 				catch(Exception e){}
120 | 				
121 | 				
122 | 				typeFragments.put(tid, tfgmt);
123 | 			}
124 | 		}	
125 | 		
126 | 		br.close();
127 | 		
128 | 		// can fix some data there
129 | 		// load Type Id
130 | 		loadId();
131 | 		System.out.println("Load "+typeId2ShortName.size()+" basic types and "+yagoTypeList.size()+" yago types.");
132 | 	}
133 | 	
134 | 	public static void loadId() throws IOException 
135 | 	{
136 | 		String filename = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16basic_types_id.txt";
137 | 		String yagoFileName = Globals.localPath+"data/DBpedia2016/fragments/id_mappings/16yago_types_list.txt";
138 | 
139 | 		File file = new File(filename);
140 | 		InputStreamReader in = new InputStreamReader(new FileInputStream(file),"utf-8");
141 | 		BufferedReader br = new BufferedReader(in);
142 | 
143 | 		typeShortName2IdList = new HashMap<String, ArrayList<Integer>>();
144 | 		typeId2ShortName = new HashMap<Integer, String>();
145 | 
146 | 		String line;
147 | 		while((line = br.readLine()) != null) {			
148 | 			String[] lines = line.split("\t");
149 | 			String typeShortName = lines[0];
150 | 			// reserve typeShortName's capitalization
151 | 			if (!typeShortName2IdList.containsKey(typeShortName)) {
152 | 				typeShortName2IdList.put(typeShortName, new ArrayList<Integer>());
153 | 			}
154 | 			typeShortName2IdList.get(typeShortName).add(Integer.parseInt(lines[1]));
155 | 			typeId2ShortName.put(Integer.parseInt(lines[1]), typeShortName);
156 | 		}
157 | 		
158 | 		// literalType
159 | 		typeShortName2IdList.put("literal_HRZ", new ArrayList<Integer>());
160 | 		typeShortName2IdList.get("literal_HRZ").add(RelationFragment.literalTypeId);
161 | 		typeId2ShortName.put(RelationFragment.literalTypeId, "literal_HRZ");
162 | 		
163 | 		br.close();
164 | 		
165 | 		//load YAGO types
166 | 		in = new InputStreamReader(new FileInputStream(yagoFileName),"utf-8");
167 | 		br = new BufferedReader(in);
168 | 		yagoTypeList = new HashSet<String>();
169 | 		while((line = br.readLine())!=null)
170 | 		{
171 | 			String[] lines = line.split("\t");
172 | 			String typeName = lines[0];
173 | 			yagoTypeList.add(typeName);
174 | 		}
175 | 		
176 | 		loadStopYagoTypeList();
177 | 		yagoTypeList.removeAll(stopYagoTypeList);
178 | 	}
179 | }
180 | 


--------------------------------------------------------------------------------
/src/lcn/SearchInTypeShortName.java:
--------------------------------------------------------------------------------
  1 | package lcn;
  2 | 
  3 | import java.util.ArrayList;
  4 | 
  5 | import org.apache.lucene.analysis.Analyzer;
  6 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
  7 | import org.apache.lucene.queryParser.ParseException;
  8 | import org.apache.lucene.queryParser.QueryParser;
  9 | import org.apache.lucene.search.Hits;
 10 | import org.apache.lucene.search.IndexSearcher;
 11 | import org.apache.lucene.search.Query;
 12 | 
 13 | import fgmt.TypeFragment;
 14 | import qa.Globals;
 15 | import rdf.TypeMapping;
 16 | 
 17 | public class SearchInTypeShortName {
 18 | 	// get id and score -- husen
 19 | 	public ArrayList<TypeMapping> searchTypeScore(String s, double thres1, double thres2, int k) throws Exception
 20 | 	{		
 21 | 		Hits hits = null;
 22 | 		String queryString = s;
 23 | 		Query query = null;
 24 | 		
 25 | 		IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index");
 26 | 
 27 | 		ArrayList<TypeMapping> tmList = new ArrayList<TypeMapping>();
 28 | 
 29 | 		Analyzer analyzer = new StandardAnalyzer();
 30 | 		try {
 31 | 			QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer);
 32 | 			query = qp.parse(queryString);
 33 | 		} catch (ParseException e) {
 34 | 			e.printStackTrace();
 35 | 		}
 36 | 		
 37 | 		if (searcher != null) {
 38 | 			hits = searcher.search(query);
 39 | 			
 40 | 			//System.out.println("find " + hits.length() + " matched type.");
 41 | 			if (hits.length() > 0) {
 42 | 				for (int i=0; i<hits.length(); i++) {
 43 | 					if (i < k) {
 44 | 						//System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
 45 | 					    if(hits.score(i) >= thres1)
 46 | 					    {
 47 | 					    	//System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
 48 | 					    	String type = hits.doc(i).get("TypeShortName");
 49 | 					    	System.out.println("Matched type: " + type + " : " + hits.score(i));
 50 | 					    	
 51 | 					    	ArrayList<Integer> ret_in = TypeFragment.typeShortName2IdList.get(type);
 52 | 					    	if(ret_in!=null)
 53 | 					    	{
 54 | 						    	for(Integer tid: ret_in)
 55 | 						    	{
 56 | 						    		TypeMapping typeMapping = new TypeMapping(tid, hits.doc(i).get("TypeShortName"), hits.score(i));
 57 | 						    		tmList.add(typeMapping);
 58 | 						    	}
 59 | 					    	}
 60 | 					    }
 61 | 					    else {
 62 | 					    	break;
 63 | 					    }
 64 | 					}
 65 | 					else {
 66 | 					    if(hits.score(i) >= thres2)
 67 | 					    {
 68 | 					    	System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
 69 | 
 70 | 					    	ArrayList<Integer> ret_in = TypeFragment.typeShortName2IdList.get(s);
 71 | 					    	if(ret_in!=null)
 72 | 					    	{
 73 | 						    	for(Integer tid: ret_in)
 74 | 						    	{
 75 | 						    		TypeMapping typeMapping = new TypeMapping(tid, hits.doc(i).get("TypeShortName"), hits.score(i));
 76 | 						    		tmList.add(typeMapping);
 77 | 						    	}
 78 | 					    	}
 79 | 					    }
 80 | 					    else {
 81 | 					    	break;
 82 | 					    }						
 83 | 					}
 84 | 				}				
 85 | 			}
 86 | 		}		
 87 | 		return tmList;	
 88 | 	}
 89 | 	
 90 | 	public  ArrayList<String> searchType(String s, double thres1, double thres2, int k) throws Exception
 91 | 	{		
 92 | 		Hits hits = null;
 93 | 		String queryString = null;
 94 | 		Query query = null;
 95 | 		
 96 | 		IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index");
 97 | 		
 98 | 		ArrayList<String> typeNames = new ArrayList<String>(); 
 99 | 		
100 | 		//String[] array = s.split(" ");
101 | 		//queryString = array[array.length-1];
102 | 		queryString = s;
103 | 
104 | 		Analyzer analyzer = new StandardAnalyzer();
105 | 		try {
106 | 			QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer);
107 | 			query = qp.parse(queryString);
108 | 		} catch (ParseException e) {
109 | 			e.printStackTrace();
110 | 		}
111 | 		
112 | 		if (searcher != null) {
113 | 			hits = searcher.search(query);
114 | 			
115 | 			System.out.println("find " + hits.length() + " answars!");
116 | 			if (hits.length() > 0) {
117 | 				for (int i=0; i<hits.length(); i++) {
118 | 					if (i < k) {
119 | 						System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
120 | 					    if(hits.score(i) >= thres1){
121 | 					    	System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
122 | 					    	typeNames.add(hits.doc(i).get("TypeShortName"));
123 | 					    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
124 | 					    }
125 | 					    else {
126 | 					    	//break;
127 | 					    }
128 | 					}
129 | 					else {
130 | 					    if(hits.score(i) >= thres2){
131 | 					    	System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
132 | 					    	typeNames.add(hits.doc(i).get("TypeShortName"));
133 | 					    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
134 | 					    }
135 | 					    else {
136 | 					    	break;
137 | 					    }						
138 | 					}
139 | 				}				
140 | 			}
141 | 		}		
142 | 		return typeNames;	
143 | 	}
144 | 	
145 | 	private boolean satisfiedStrictly (String splittedTypeShortName, String queryString) 
146 | 	{
147 | 		String[] tnames = splittedTypeShortName.toLowerCase().split(" ");
148 | 		String[] qnames = queryString.toLowerCase().split(" ");
149 | 		for (int i = 0; i < tnames.length; i ++) {
150 | 			if (tnames[i].length() == 0) continue;
151 | 			boolean matched = false;
152 | 			for (int j = 0; j < qnames.length; j ++) {
153 | 				if (tnames[i].equals(qnames[j])) {
154 | 					matched = true;
155 | 					break;
156 | 				}
157 | 			}
158 | 			if (!matched && !Globals.stopWordsList.isStopWord(tnames[i])) {
159 | 				return false;
160 | 			}
161 | 		}
162 | 		String qlast = qnames[qnames.length-1];
163 | 		boolean flag = false;
164 | 		for (int i = 0; i < tnames.length; i ++) {
165 | 			if (tnames[i].length() == 0) continue;
166 | 			if (tnames[i].equals(qlast)) {
167 | 				flag = true;
168 | 				break;
169 | 			}
170 | 		}
171 | 		
172 | 		if (flag) return true;
173 | 		else return false;
174 | 	}
175 | 
176 | }
177 | 


--------------------------------------------------------------------------------
/src/qa/extract/CorefResolution.java:
--------------------------------------------------------------------------------
  1 | package qa.extract;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.HashSet;
  5 | 
  6 | import qa.Globals;
  7 | 
  8 | import log.QueryLogger;
  9 | 
 10 | import nlp.ds.DependencyTree;
 11 | import nlp.ds.DependencyTreeNode;
 12 | import nlp.ds.Word;
 13 | import rdf.SimpleRelation;
 14 | 
 15 | public class CorefResolution {
 16 | 	/**
 17 | 	 * 1. a very simple reference resolution
 18 | 	 * 2. Coref Resolution should be done after relation extraction and before items mapping
 19 | 	 */
 20 | 	public void process(ArrayList<SimpleRelation> simpleRelations, QueryLogger qlog) {
 21 | 		if (qlog.s.words.length <= 4) return; // if the sentence is too short, skip the coref step.
 22 | 		System.out.println("=====Co-reference resolution=======");		
 23 | 		ArrayList<SimpleRelation> deleteList = new ArrayList<SimpleRelation>();
 24 | 		
 25 | 		for(SimpleRelation sr : simpleRelations) {
 26 | 			Word w1=null, w2=null;
 27 | 			
 28 | 			if (sr.extractingMethod == 'S') {
 29 | 				w1 = getRefWord(sr.arg1Word.getNnHead(), qlog.s.dependencyTreeStanford, qlog);
 30 | 				w2 = getRefWord(sr.arg2Word.getNnHead(), qlog.s.dependencyTreeStanford, qlog);
 31 | 			}
 32 | 			else if (sr.extractingMethod == 'M') {
 33 | 				w1 = getRefWord(sr.arg1Word.getNnHead(), qlog.s.dependencyTreeMalt, qlog);
 34 | 				w2 = getRefWord(sr.arg2Word.getNnHead(), qlog.s.dependencyTreeMalt, qlog);				
 35 | 			}
 36 | 			else {
 37 | 				continue;
 38 | 			}
 39 | 			
 40 | 			if (w1 != null) {
 41 | 				sr.arg1Word_beforeCRR = sr.arg1Word;
 42 | 				sr.arg1Word = w1;
 43 | 			}
 44 | 			if (w2 != null) {
 45 | 				sr.arg2Word_beforeCRR = sr.arg2Word;
 46 | 				sr.arg2Word = w2;
 47 | 			}
 48 | 			
 49 | 			if (sr.arg1Word == sr.arg2Word)
 50 | 				deleteList.add(sr);
 51 | 		}
 52 | 		
 53 | 		simpleRelations.removeAll(deleteList);
 54 | 		
 55 | 		printCRR(qlog);
 56 | 		System.out.println("===================================");
 57 | 	}
 58 | 
 59 | 	// return the reference word of w
 60 | 	public Word getRefWord (Word w, DependencyTree dt, QueryLogger qlog) {
 61 | 		w = w.getNnHead();
 62 | 		
 63 | 		if (w.crr != null) {
 64 | 			return w.crr;
 65 | 		}
 66 | 						
 67 | 		/*
 68 | 		 * method: (suitable for stanford parser (old version))
 69 | 		 * (1) WDT --det--> []   eg: Which city is located in China?
 70 | 		 * (2) WDT -------> V/J --rcmod--> []   eg: Who is married to someone that was born in Rome?
 71 | 		 * "when is the sth" is conflict with this rule, so discarded. (3) W   -------> be <------- []	eg: Who is the author of WikiLeaks?
 72 | 		 * (4) WDT -------> V --ccomp--> []   eg: The actor that married the child of a politician.
 73 | 		 * (5) DT(that, which) --dep--> V  eg:The actors that married an athlete.   // DS parser error.
 74 | 		 * (6) W(position=1) ------> NN	eg:What are the language used in China?	// DS parser error, should eliminate "WRB"：When was Carlo Giuliani shot?
 75 | 		 * (7) where <--advmod-- V <--advcl-- V --prep/pobj--> []  eg: Who graduate from the school where Keqiang Li graduates?
 76 | 		 */
 77 | 
 78 | 		DependencyTreeNode dtn = dt.getNodeByIndex(w.position);
 79 | 	
 80 | 		// no need for root 
 81 | 		if (dtn.father == null) return null;
 82 | 		
 83 | 		try {
 84 | 			if(dtn.word.posTag.equals("WDT") && dtn.dep_father2child.equals("det")) {	// (1)
 85 | 				if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead());
 86 | 				w.crr = dtn.father.word.getNnHead();
 87 | 			}
 88 | 			else if(dtn.word.posTag.startsWith("W") && !dtn.word.posTag.equals("WRB") && dtn.word.position == 1 && dtn.father.word.posTag.equals("NN")) {	// (6)
 89 | 				if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead());
 90 | 				w.crr = dtn.father.word.getNnHead();
 91 | 			}
 92 | 			else if(dtn.word.posTag.equals("DT") 
 93 | 					&& dtn.dep_father2child.equals("dep") 
 94 | 					&& (dtn.word.baseForm.equals("that")||dtn.word.baseForm.equals("which"))) {	// (5)
 95 | 				if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.word.getNnHead());
 96 | 				w.crr = dtn.father.word.getNnHead();
 97 | 			}
 98 | //			else if(dtn.word.posTag.startsWith("W")
 99 | //					&& dtn.father.word.baseForm.equals("be")) {	// (3)  //&& dtn.dep_father2child.equals("attr")
100 | //				DependencyTreeNode target = dtn.father.containDependencyWithChildren("nsubj");
101 | //				if (target != null) {
102 | //					if(qlog.MODE_debug) System.out.println(w + "-->" + target.word.getNnHead());
103 | //					w.crr = target.word.getNnHead();
104 | //				}
105 | //			}
106 | 			else if(dtn.word.posTag.equals("WDT") 
107 | 					&& (dtn.father.word.posTag.startsWith("V") || dtn.father.word.posTag.startsWith("J"))
108 | 					&& dtn.father.dep_father2child.equals("rcmod")) {	// (2)
109 | 				if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.father.word.getNnHead());
110 | 				w.crr = dtn.father.father.word.getNnHead();
111 | 			}
112 | 			else if(dtn.word.posTag.equals("WDT") 
113 | 					&& dtn.father.word.posTag.startsWith("V")
114 | 					&& dtn.father.dep_father2child.equals("ccomp")) {	// (4)
115 | 				if(qlog.MODE_debug) System.out.println(w + "-->" + dtn.father.father.word.getNnHead());
116 | 				w.crr = dtn.father.father.word.getNnHead();
117 | 			}
118 | 			else if (dtn.word.baseForm.equals("where")
119 | 					&& dtn.dep_father2child.equals("advmod")
120 | 					&& dtn.father.dep_father2child.equals("advcl")) {	// (7)
121 | 				DependencyTreeNode target = dtn.father.father.containDependencyWithChildren("prep");
122 | 				if (target != null) {
123 | 					target = target.containDependencyWithChildren("pobj");
124 | 				}
125 | 				else {
126 | 					for (DependencyTreeNode n : dtn.father.father.childrenList) {
127 | 						if (Globals.pd.relns_object.contains(n.dep_father2child)) {
128 | 							target = n;
129 | 						}
130 | 					}
131 | 				}
132 | 				if (target != null) {
133 | 					if(qlog.MODE_debug) System.out.println(w + "-->" + target.word.getNnHead());
134 | 					w.crr = target.word.getNnHead();
135 | 				}
136 | 			}
137 | 		} catch (Exception e) {}
138 | 		
139 | 		return w.crr;
140 | 	}
141 | 	
142 | 	public void printCRR (QueryLogger qlog) {
143 | 		HashSet<Word> printed = new HashSet<Word>();
144 | 		for (Word w : qlog.s.words) {
145 | 			w = w.getNnHead();
146 | 			if (printed.contains(w)) 
147 | 				continue;
148 | 			if (w.crr != null) 
149 | 				System.out.println("\""+w.getFullEntityName() + "\" is resoluted to \"" + w.crr.getFullEntityName() + "\"");
150 | 			printed.add(w);
151 | 		}
152 | 	}
153 | }
154 | 


--------------------------------------------------------------------------------
/src/rdf/Sparql.java:
--------------------------------------------------------------------------------
  1 | package rdf;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | 
  8 | import qa.Globals;
  9 | 
 10 | public class Sparql implements Comparable<Sparql> 
 11 | {
 12 | 	public ArrayList<Triple> tripleList = new ArrayList<Triple>();
 13 | 	public boolean countTarget = false;
 14 | 	public String mostStr = null;
 15 | 	public String moreThanStr = null;
 16 | 	public double score = 0;
 17 | 	
 18 | 	public String questionFocus = null;	// The answer variable
 19 | 	public HashSet<String> variables = new HashSet<String>();
 20 | 	
 21 | 	public enum QueryType {Select,Ask}
 22 | 	public QueryType queryType = QueryType.Select;
 23 | 	
 24 | 	public HashMap<Integer, SemanticRelation> semanticRelations = null;
 25 | 
 26 | 	public void addTriple(Triple t) 
 27 | 	{
 28 | 		if(!tripleList.contains(t))
 29 | 		{
 30 | 			tripleList.add(t);
 31 | 			score += t.score;
 32 | 		}
 33 | 	}
 34 | 	
 35 | 	public void delTriple(Triple t)
 36 | 	{
 37 | 		if(tripleList.contains(t))
 38 | 		{
 39 | 			tripleList.remove(t);
 40 | 			score -= t.score;
 41 | 		}
 42 | 	}
 43 | 
 44 | 	@Override
 45 | 	public String toString() 
 46 | 	{
 47 | 		String ret = "";
 48 | 		for (Triple t : tripleList) {
 49 | 			ret += t.toString();
 50 | 			ret += '\n';
 51 | 		}
 52 | 		return ret;
 53 | 	}
 54 | 	
 55 | 	public void deduplicate()
 56 | 	{
 57 | 		HashSet<String> set = new HashSet<String>();
 58 | 		ArrayList<Triple> list = new ArrayList<Triple>();
 59 | 		for(Triple t: tripleList)
 60 | 		{
 61 | 			String st = t.toStringWithoutScore();
 62 | 			if(set.contains(st))
 63 | 				list.add(t);
 64 | 			set.add(st);
 65 | 		}
 66 | 		for(Triple t: list)
 67 | 			this.delTriple(t);
 68 | 	}
 69 | 	
 70 | 	// Is it a Basic Graph Pattern without filter and aggregation?
 71 | 	public boolean isBGP()
 72 | 	{
 73 | 		if(moreThanStr != null || mostStr != null || countTarget)
 74 | 			return false;
 75 | 		return true;
 76 | 	}
 77 | 	
 78 | 	//Use to display (can not be executed)
 79 | 	public String toStringForGStore() 
 80 | 	{
 81 | 		String ret = "";
 82 | 		for (Triple t : tripleList) 
 83 | 		{
 84 | 			// !Omit obvious LITERAL
 85 | 			if(t.object.equals("literal_HRZ"))
 86 | 				continue;
 87 | 			
 88 | 			// !Omit some bad TYPEs
 89 | 			if(t.predicateID==Globals.pd.typePredicateID && Globals.pd.bannedTypes.contains(t.object))
 90 | 				continue;
 91 | 			
 92 | 			ret += t.toStringForGStore();
 93 | 			ret += '\n';
 94 | 		}
 95 | 		return ret;
 96 | 	}
 97 | 	
 98 | 	/**
 99 | 	* @description:
100 | 	* 1. Select all variables for BGP queries to display specific information.
101 | 	* 2. DO NOT select all variables when Aggregation like "HAVING" "COUNT" ... 
102 | 	* (It may involves too many results, e.g. "which countries have more than 1000 caves?", caves is no need to display) 
103 | 	* @param: NULL.
104 | 	* @return: A SPARQL query can be executed by GStore (NO prefix of entities/predicates).
105 | 	*/
106 | 	public String toStringForGStore2()
107 | 	{
108 | 		String ret = "";
109 | 		variables.clear();
110 | 		for(Triple t: tripleList)
111 | 		{
112 | 			if (!t.isSubjConstant()) variables.add(t.subject.replaceAll(" ", "_"));
113 | 			if (!t.isObjConstant()) variables.add(t.object.replaceAll(" ", "_"));		
114 | 		}
115 | 		if(variables.size() == 0)
116 | 			queryType = QueryType.Ask;
117 | 		
118 | 		// part1: select / ask ...
119 | 		if (queryType==QueryType.Ask)
120 | 			ret += "ask";
121 | 		else if(countTarget)
122 | 			ret += ("select COUNT(DISTINCT " + questionFocus + ")");
123 | 		else
124 | 		{
125 | 			if(!isBGP())	// AGG: select question focus
126 | 				ret += ("select DISTINCT " + questionFocus);
127 | 			else	// BGP: select all variables
128 | 			{				
129 | 				ret += "select DISTINCT ";
130 | 				for (String v : variables)
131 | 					ret += v + " ";
132 | 			}
133 | 		}					
134 | 		
135 | 		// part2: triples
136 | 		ret += " where { ";
137 | 		for(Triple t : tripleList) 
138 | 		{
139 | 			if (!t.object.equals("literal_HRZ")) {	// need not display literal
140 | 				ret += t.toStringForGStore();
141 | 				ret += ". ";
142 | 			}
143 | 		}
144 | 		ret += "} ";
145 | 		
146 | 		// part3: order by / group by ...
147 | 		if(moreThanStr != null)
148 | 			ret += moreThanStr+" ";
149 | 		if(mostStr != null)
150 | 			ret += mostStr+" ";
151 | 		
152 | 		// part4: limit
153 | 		if(queryType != QueryType.Ask && (mostStr == null || !mostStr.contains("LIMIT")))
154 | 			ret += "LIMIT " + Globals.MaxAnswerNum; 
155 | 		
156 | 		return ret;
157 | 	}
158 | 		
159 | 	public int getVariableNumber()
160 | 	{
161 | 		int res = 0;
162 | 		for (Triple t: tripleList)
163 | 		{
164 | 			if (!t.isSubjConstant()) res++;
165 | 			if (!t.isObjConstant()) res++;			
166 | 		}
167 | 		return res;
168 | 	}
169 | 
170 | 	public void adjustTriplesOrder() 
171 | 	{
172 | 		Collections.sort(this.tripleList);
173 | 	}
174 | 
175 | 	public int compareTo(Sparql o) 
176 | 	{
177 | 		double diff = this.score - o.score;
178 | 		if (diff > 0) 
179 | 			return -1;
180 | 		else if (diff < 0)
181 | 			return 1;
182 | 		else
183 | 			return 0;
184 | 	}
185 | 	
186 | 	@Override 
187 | 	public int hashCode() 
188 |     { 
189 | 		int key = 0;
190 | 		for(Triple t: this.tripleList)
191 | 			key ^= t.hashCode();
192 |         return key; 
193 |     } 
194 | 	
195 | 	@Override 
196 | 	public boolean equals(Object spq) 
197 | 	{ 
198 | 	    Sparql tempSparql= (Sparql) spq; 
199 | 	    if(this.toStringForGStore2().equals(tempSparql.toStringForGStore2()))
200 | 	    	return true; 
201 | 	    else 
202 | 	    	return false; 
203 | 	} 
204 | 	
205 | 	public Sparql(){}
206 | 	public Sparql(HashMap<Integer, SemanticRelation> semanticRelations) 
207 | 	{
208 | 		this.semanticRelations = semanticRelations;
209 | 	}
210 | 	
211 | 	public Sparql copy() 
212 | 	{
213 | 		Sparql spq = new Sparql(this.semanticRelations);
214 | 		for (Triple t : this.tripleList)
215 | 			spq.addTriple(t);
216 | 		return spq;
217 | 	}
218 | 	
219 | 	public void removeLastTriple() 
220 | 	{
221 | 		int idx = tripleList.size()-1;
222 | 		score -= tripleList.get(idx).score;
223 | 		tripleList.remove(idx);
224 | 	}
225 | 	
226 | 	public Sparql removeAllTypeInfo () 
227 | 	{
228 | 		score = 0;
229 | 		ArrayList<Triple> newTripleList = new ArrayList<Triple>();
230 | 		for (Triple t : tripleList) 
231 | 		{	
232 | 			if (t.predicateID != Globals.pd.typePredicateID) 
233 | 			{
234 | 				newTripleList.add(t);
235 | 				score += t.score;
236 | 			}
237 | 		}
238 | 		tripleList = newTripleList;
239 | 		return this;
240 | 	}
241 | 
242 | };
243 | 


--------------------------------------------------------------------------------
/src/qa/parsing/QuestionParsing.java:
--------------------------------------------------------------------------------
  1 | package qa.parsing;
  2 | 
  3 | import log.QueryLogger;
  4 | import nlp.ds.DependencyTree;
  5 | import nlp.ds.DependencyTreeNode;
  6 | import nlp.ds.Word;
  7 | import nlp.ds.Sentence.SentenceType;
  8 | import qa.Globals;
  9 | import rdf.Sparql;
 10 | import rdf.Triple;
 11 | 
 12 | public class QuestionParsing {
 13 | 	public void process(QueryLogger qlog) {
 14 | 		getDependenciesAndNER(qlog);
 15 | 		recognizeSentenceType(qlog);
 16 | 	}
 17 | 	
 18 | 	public void getDependenciesAndNER (QueryLogger qlog) {
 19 | 		long t1 = System.currentTimeMillis();
 20 | 		try {
 21 | 			qlog.s.dependencyTreeStanford = new DependencyTree(qlog.s, Globals.stanfordParser);
 22 | 		}catch(Exception e){
 23 | 			e.printStackTrace();
 24 | 		}
 25 | 		
 26 | 		long t2 = System.currentTimeMillis();
 27 | 		try{
 28 | 			qlog.s.dependencyTreeMalt = new DependencyTree(qlog.s, Globals.maltParser);
 29 | 		}catch(Exception e){
 30 | 			//if errors occur, abandon malt tree
 31 | 			qlog.s.dependencyTreeMalt = qlog.s.dependencyTreeStanford;
 32 | 			System.err.println("MALT parser error! Use stanford parser instead.");
 33 | 		}					
 34 | 		
 35 | 		try {
 36 | 			long t3 = System.currentTimeMillis();
 37 | 			Globals.nerRecognizer.recognize(qlog.s);
 38 | 			long t4 = System.currentTimeMillis();
 39 | 			System.out.println("====StanfordDependencies("+(t2-t1)+"ms)====");
 40 | 			System.out.println(qlog.s.dependencyTreeStanford);
 41 | 			System.out.println("====MaltDependencies("+(t3-t2)+"ms)====");
 42 | 			System.out.println(qlog.s.dependencyTreeMalt);
 43 | 			System.out.println("====NameEntityRecognition("+(t4-t3)+"ms)====");
 44 | 			qlog.s.printNERResult();
 45 | 			
 46 | 			qlog.timeTable.put("StanfordParser", (int)(t2-t1));
 47 | 			qlog.timeTable.put("MaltParser", (int)(t3-t2));
 48 | 			qlog.timeTable.put("NER", (int)(t4-t3));
 49 | 		} catch (Exception e) {
 50 | 			e.printStackTrace();
 51 | 		}
 52 | 	}
 53 | 	
 54 | 	public void recognizeSentenceType(QueryLogger qlog)
 55 | 	{
 56 | 		boolean IsImperativeSentence = recognizeImperativeSentence(qlog.s.dependencyTreeStanford)||
 57 | 									   recognizeImperativeSentence(qlog.s.dependencyTreeMalt);
 58 | 		if (IsImperativeSentence)
 59 | 		{
 60 | 			qlog.s.sentenceType = SentenceType.ImperativeSentence;
 61 | 			//two dependencyTree's ignored words should equal
 62 | 			for (DependencyTreeNode sNode : qlog.s.dependencyTreeStanford.nodesList)
 63 | 				for (DependencyTreeNode mNode : qlog.s.dependencyTreeMalt.nodesList)
 64 | 					if (sNode.equals(mNode) && (sNode.word.isIgnored||mNode.word.isIgnored))
 65 | 						sNode.word.isIgnored = mNode.word.isIgnored = true;
 66 | 			return;
 67 | 		}
 68 | 		
 69 | 		boolean IsSpecialQuestion = recognizeSpecialQuestion(qlog.s.dependencyTreeStanford)||
 70 | 									recognizeSpecialQuestion(qlog.s.dependencyTreeMalt);
 71 | 		if (IsSpecialQuestion)
 72 | 		{
 73 | 			qlog.s.sentenceType = SentenceType.SpecialQuestion;
 74 | 			return;
 75 | 		}
 76 | 		
 77 | 		boolean IsGeneralQuestion = recognizeGeneralQuestion(qlog.s.dependencyTreeStanford)||
 78 | 									recognizeGeneralQuestion(qlog.s.dependencyTreeMalt);
 79 | 		if (IsGeneralQuestion)
 80 | 		{
 81 | 			qlog.s.sentenceType = SentenceType.GeneralQuestion;
 82 | 			return;
 83 | 		}
 84 | 		
 85 | 		//default is special
 86 | 		qlog.s.sentenceType = SentenceType.SpecialQuestion;
 87 | 		
 88 | 	}
 89 | 	
 90 | 	//if imperative, omitting those polite words
 91 | 	private boolean recognizeImperativeSentence(DependencyTree tree) {
 92 | 		if(tree.getRoot().word.posTag.startsWith("V") || tree.getRoot().word.posTag.startsWith("NN")) {
 93 | 			DependencyTreeNode dobj = null;
 94 | 			DependencyTreeNode iobj = null;
 95 | 			for (DependencyTreeNode n : tree.getRoot().childrenList) {
 96 | 				if (n.dep_father2child.equals("dobj")) {
 97 | 					dobj = n;
 98 | 				}
 99 | 				else if (n.dep_father2child.equals("iobj")) {
100 | 					iobj = n;
101 | 				}
102 | 			}
103 | 			if (dobj != null && iobj != null) {
104 | 				tree.getRoot().word.isIgnored = true;
105 | 				iobj.word.isIgnored = true;
106 | 				
107 | 				// give me a list of ..
108 | 				if (dobj.word.baseForm.equals("list"))
109 | 				{
110 | 					dobj.word.isIgnored = true;
111 | 				}
112 | 			
113 | 				return true;
114 | 			}
115 | 			
116 | 			//start with "List": List all games by GMT.
117 | 			if (dobj != null && tree.getRoot().word.baseForm.equals("list"))
118 | 			{
119 | 				//System.out.println("isListSentence!");
120 | 				tree.getRoot().word.isIgnored = true;
121 | 				
122 | 				return true;
123 | 			}
124 | 		}
125 | 		return false;
126 | 	}
127 | 	
128 | 	private boolean recognizeSpecialQuestion(DependencyTree tree)
129 | 	{
130 | 		DependencyTreeNode firstNode = null;
131 | 		for (DependencyTreeNode dtn : tree.nodesList)
132 | 			if (dtn.word.position == 1)
133 | 			{
134 | 				firstNode = dtn;
135 | 				break;
136 | 			}
137 | 		//eg. In which city...
138 | 		if (firstNode!=null && 
139 | 			(firstNode.word.posTag.equals("IN")||firstNode.word.posTag.equals("TO"))&&
140 | 			firstNode.dep_father2child.startsWith("prep"))
141 | 		{
142 | 			firstNode = null;
143 | 			for (DependencyTreeNode dtn : tree.nodesList)
144 | 				if (dtn.word.position == 2)
145 | 				{
146 | 					firstNode = dtn;
147 | 					break;
148 | 				}			
149 | 		}
150 | 
151 | 		if (firstNode != null)
152 | 		{
153 | 			if (firstNode.word.posTag.startsWith("W"))
154 | 				return true;
155 | 		}
156 | 		return false;
157 | 	}
158 | 	
159 | 	private boolean recognizeGeneralQuestion(DependencyTree tree)
160 | 	{
161 | 		DependencyTreeNode firstNode = null;
162 | 		for (DependencyTreeNode dtn : tree.nodesList)
163 | 			if (dtn.word.position == 1)
164 | 			{
165 | 				firstNode = dtn;
166 | 				break;
167 | 			}
168 | 		
169 | 		if (firstNode != null)
170 | 		{
171 | 			String dep = firstNode.dep_father2child;
172 | 			String pos = firstNode.word.posTag;
173 | 			String baseform = firstNode.word.baseForm;
174 | 			
175 | 			if ((baseform.equals("be")||baseform.equals("do")) &&
176 | 				pos.startsWith("VB") &&
177 | 				(dep.equals("root")||dep.equals("cop")||dep.startsWith("aux")))
178 | 				return true;
179 | 		}
180 | 		return false;
181 | 	}
182 | 	
183 | 	public static String detectQuestionFocus(Sparql spq) {
184 | 		String ret = null;
185 | 		int posi = Integer.MAX_VALUE;
186 | 		for (Triple t : spq.tripleList) {
187 | 			
188 | 			if (!t.isSubjConstant()) {
189 | 				Word subj = t.getSubjectWord();
190 | 				if (subj!=null && subj.position < posi) {
191 | 					posi = subj.position;
192 | 					ret = t.subject;
193 | 				}
194 | 			}
195 | 			if (!t.isObjConstant()) {
196 | 				Word obj = t.getObjectWord();
197 | 				if (obj!=null && obj.position < posi) {
198 | 					posi = obj.position;
199 | 					ret = t.object;
200 | 				}
201 | 			}
202 | 		}
203 | 		if (ret != null) return ret.replace(' ', '_');
204 | 		else return null;
205 | 	}
206 | }
207 | 


--------------------------------------------------------------------------------
/src/application/GanswerHandler.java:
--------------------------------------------------------------------------------
  1 | package application;
  2 | import java.io.IOException;  
  3 | 
  4 | import javax.servlet.ServletException;  
  5 | import javax.servlet.http.HttpServletRequest;  
  6 | import javax.servlet.http.HttpServletResponse;  
  7 | 
  8 | import log.QueryLogger;
  9 | 
 10 | import org.json.*;
 11 | import org.eclipse.jetty.server.Request;  
 12 | import org.eclipse.jetty.server.handler.AbstractHandler;
 13 | 
 14 | import rdf.Sparql;
 15 | import qa.GAnswer;
 16 | import qa.Globals;
 17 | import qa.Matches;
 18 | 
 19 | public class GanswerHandler extends AbstractHandler{
 20 | 	public static String errorHandle(String status,String message,String question,QueryLogger qlog){
 21 | 		JSONObject exobj = new JSONObject();
 22 | 		try {
 23 | 			exobj.put("status", status);
 24 | 			exobj.put("message", message);
 25 | 			exobj.put("question", question);
 26 | 			if(qlog!=null&&qlog.rankedSparqls!=null&&qlog.rankedSparqls.size()>0){
 27 | 				exobj.put("sparql", qlog.rankedSparqls.get(0).toStringForGStore2());
 28 | 			}
 29 | 		} catch (Exception e1) {
 30 | 		}
 31 | 		return exobj.toString();
 32 | 	}
 33 | 	
 34 | 	public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response)  
 35 |             throws IOException, ServletException {
 36 | 		String question = "";
 37 | 		QueryLogger qlog = null;
 38 | 		try{
 39 | 			response.setContentType("text/html;charset=utf-8");  
 40 | 	        response.setStatus(HttpServletResponse.SC_OK);
 41 | 	        //step1: parsing input json
 42 | 	        String data = request.getParameter("data");
 43 | 	        data = data.replace("%22","\"");
 44 | 	        JSONObject jsonobj = new JSONObject();
 45 | 	        int needAnswer = 0;
 46 | 	        int needSparql = 1;
 47 | 	        question = "Something wrong if you see this.";
 48 | 			jsonobj = new JSONObject(data);
 49 | 			question = jsonobj.getString("question");
 50 | 			if(jsonobj.isNull("maxAnswerNum")){
 51 | 				needAnswer = GanswerHttp.maxAnswerNum;
 52 | 			}
 53 | 			else{
 54 | 				needAnswer = jsonobj.getInt("maxAnswerNum");
 55 | 			}
 56 | 			if(jsonobj.isNull("maxSparqlNum")){
 57 | 				needSparql = GanswerHttp.maxSparqlNum;
 58 | 			}else{
 59 | 				needSparql = jsonobj.getInt("maxSparqlNum");
 60 | 			}
 61 | 			Globals.MaxAnswerNum = needAnswer;
 62 | 	        
 63 | 	        //step2 run GAnswer Logic
 64 | 	        String input = question;
 65 | 	        GAnswer ga = new GAnswer();
 66 | 	        qlog = ga.getSparqlList(input);
 67 | 	        if(qlog == null || qlog.rankedSparqls == null){
 68 | 				try {
 69 | 					baseRequest.setHandled(true);
 70 | 					response.getWriter().println(errorHandle("500","InvalidQuestionException: the question you input is invalid, please check",question,qlog));
 71 | 				} catch (Exception e1) {
 72 | 				}
 73 | 	        	return;
 74 | 	        }
 75 | 	        int idx;
 76 | 			
 77 | 			//step2 construct response
 78 | 			JSONObject resobj = new JSONObject();
 79 | 			resobj.put("status", "200");
 80 | 			resobj.put("question",jsonobj.getString("question"));
 81 | 			JSONObject tmpobj = new JSONObject();
 82 | 			if(needAnswer > 0){
 83 | 				if(qlog!=null && qlog.rankedSparqls.size()!=0){
 84 | 					Sparql curSpq = null;
 85 | 					Matches m = null;
 86 | 					for(idx = 1;idx<=Math.min(qlog.rankedSparqls.size(), 5);idx+=1){
 87 | 						curSpq = qlog.rankedSparqls.get(idx-1);
 88 | 						if(curSpq.tripleList.size()>0&&curSpq.questionFocus!=null){
 89 | 							m = ga.getAnswerFromGStore2(curSpq);
 90 | 						}
 91 | 						if(m!=null&&m.answers!=null){
 92 | 							qlog.sparql = curSpq;
 93 | 							qlog.match = m;
 94 | 							break;
 95 | 						}
 96 | 					}
 97 | 					if(m==null||m.answers==null){
 98 | 						curSpq = ga.getUntypedSparql(curSpq);
 99 | 						if(curSpq!=null){
100 | 							m = ga.getAnswerFromGStore2(curSpq);
101 | 						}
102 | 						if(m!=null&&m.answers!=null){
103 | 							qlog.sparql = curSpq;
104 | 							qlog.match = m;
105 | 						}
106 | 					}
107 | 					if(qlog.match==null)
108 | 						qlog.match=new Matches();
109 | 					if(qlog.sparql==null)
110 | 						qlog.sparql = qlog.rankedSparqls.get(0);
111 | 					qlog.reviseAnswers();
112 | 					
113 | 					//adding variables to result json
114 | 					JSONArray vararr = new JSONArray();
115 | 					for(String var : qlog.sparql.variables){
116 | 						vararr.put(var);
117 | 					}
118 | 					resobj.put("vars", vararr);
119 | 					
120 | 					//adding answers to result json
121 | 					JSONArray ansobj = new JSONArray();
122 | 					JSONObject bindingobj;
123 | 					System.out.println(qlog.match.answersNum);
124 | 					for(int i=0;i<qlog.match.answersNum;i++){
125 | 						int j = 0;
126 | 						bindingobj = new JSONObject();
127 | 						for(String var:qlog.sparql.variables){
128 | 							JSONObject bidobj = new JSONObject();
129 | 							String ansRiv = qlog.match.answers[i][j].substring(qlog.match.answers[i][j].indexOf(":")+1);
130 | 							bidobj.put("value", ansRiv);
131 | 							if(ansRiv.startsWith("<"))
132 | 								bidobj.put("type", "uri");
133 | 							else
134 | 								bidobj.put("type", "literal");
135 | 							System.out.println(qlog.match.answers[i][j]);
136 | 							j += 1;
137 | 							bindingobj.put(var, bidobj);
138 | 						}
139 | 						ansobj.put(bindingobj);
140 | 					}
141 | 					tmpobj.put("bindings", ansobj);
142 | 				}
143 | 				resobj.put("results", tmpobj);
144 | 			}
145 | 			if(needSparql>0){
146 | 				JSONArray spqarr = new JSONArray();
147 | 				spqarr.put(qlog.sparql.toStringForGStore2());
148 | 				for(idx=0;idx<needSparql-1&&idx<qlog.rankedSparqls.size();idx+=1){
149 | 					if(qlog.sparql.toStringForGStore2().compareTo(qlog.rankedSparqls.get(idx).toStringForGStore2()) != 0)
150 | 						spqarr.put(qlog.rankedSparqls.get(idx).toStringForGStore2());
151 | 				}
152 | 				resobj.put("sparql", spqarr);
153 | 			} 
154 | 	        baseRequest.setHandled(true);  
155 | 	        response.getWriter().println(resobj.toString());
156 | 		}
157 | 		catch(Exception e){
158 | 			if(e instanceof IOException){
159 | 				try {
160 | 					baseRequest.setHandled(true);
161 | 					response.getWriter().println(errorHandle("500","IOException",question,qlog));
162 | 				} catch (Exception e1) {
163 | 				}
164 | 			}
165 | 			else if(e instanceof JSONException){
166 | 				try {
167 | 					baseRequest.setHandled(true);
168 | 					response.getWriter().println(errorHandle("500","JSONException",question,qlog));
169 | 				} catch (Exception e1) {
170 | 				}
171 | 			}
172 | 			else if(e instanceof ServletException){
173 | 				try {
174 | 					baseRequest.setHandled(true);
175 | 					response.getWriter().println(errorHandle("500","ServletException",question,qlog));
176 | 				} catch (Exception e1) {
177 | 				}
178 | 			}
179 | 			else {
180 | 				try {
181 | 					baseRequest.setHandled(true);
182 | 					response.getWriter().println(errorHandle("500","Unkown Exception",question,qlog));
183 | 				} catch (Exception e1) {
184 | 				}
185 | 			}
186 | 		} 
187 |     }  
188 | }
189 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GAnswer System
 2 | 
 3 | GAnswer system is a natural language QA system developed by Institute of Computer Science & Techonology Data Management Lab, Peking University, led by Prof. Zou Lei. GAnswer is able to translate natural language questions to query graphs containing semantic information. Then, the system can further turn query graphs into standard SPARQL query, which will be executed in graph databases, in order to attain answers for the users. We apply an innovative data-driven method for semantic disambiguation. In details, while generating query graphs, we maintain multiple plans for entities and predicate mappings and we conduct semantic disambiguation in the query execution phrase according to entities and predicate matches ( incorrect mappings ).
 4 | 
 5 | This is an implementation for TKDE 2018 paper [Answering Natural Language Questions by Subgraph Matching over Knowledge Graphs](docs/TKDE18_gAnswer.pdf) 
 6 | 
 7 | **For help document, click here [中文(ZH)](docs/gAnswer_help.pdf), [English](docs/gAnswer_help_en.pdf)
 8 | 
 9 | ## Quick Start
10 | First you must download necessary data files dbpedia16.rar [here](https://pan.baidu.com/s/1LHGO0cU5et5o5nQWc3UvVg). The extaction code is 1mcr. This is a Baidu Netdisk link. If you have trouble opening it, please try this google drive [link](https://drive.google.com/open?id=1hmqaftrTo0_qQNRApCuxFXaBx7SosNVy).You should unzip the file into directory named data.Since the complete data require at least 20 GB of main memory, you may try a [smaller dataset](https://pan.baidu.com/s/1Txe_cwpuoohJXH70yfxB-Q) (5GB required). The extract code is zuue. Please notice that the system performance is limited on the small data. We make sure that this data set is capable for example questions on our [official website](http://ganswer.gstore-pku.com/) as well as in QALD data set. Otherwise, you should choose suitable question for testing based on the data files.
11 | 
12 | For those who want to set up their own gstore service, we also supply [dbpedia triple dataset](https://pan.baidu.com/s/1jQ_jGTniflzoqBhpz5tjnw) and the extract code is rpev.
13 | 
14 | ### Deploy GAnswer via jar
15 | We recommend you to deploy GAnswer using the jar files we supply. The specific procedure is as follows：
16 | 
17 | - Download 2 files: Ganswer.jar and dbpedia16.rar. We strongly recommend that you download the up-to-date version of Ganswer.jar from the github releases of this project, to ensure stability.
18 | - Unzip Ganswer.jar
19 | ```java
20 | jar -xvf Ganswer.jar
21 | ```
22 | - You should unzip it into the main project directory. Please make sure that Ganswer.jar itself is under the same path with the unzipped files.
23 | - Unzip dbpedia16.rar. Note that you must place the unzipped files into a directory named data, and this directory should be under the same path with 
24 | ```java
25 | unrar x dbpedia16.rar ./data/
26 | ```
27 | - In other words, your project directory should look like this:
28 | >Main_project_directory
29 | >>Ganswer.jar<br />
30 | >>unzipped files from Ganswer.jar<br />
31 | >>data
32 | >>>unzipped files from dbpedia16.rar<br />
33 | - Run the jar file
34 | ```java
35 | java -jar Ganswer.jar
36 | ```
37 | - Wait for the initialization procedure. When you see "Server Ready!", the initialization is successful and you can access GAnswer service via Http requests.
38 | 
39 | About GAnswer Http API, information can be found in Chapter 2.1.1 in help document.
40 | 
41 | ### Use GAnswer via http request
42 | Here is an example of how to call GAnswer service via http request.
43 | Having started GAnswerHttp, you can activate GAnswer by url as follow:
44 | http://[ip]:[port]/gSolve/?data={maxAnswerNum:1, maxSparqlNum:2, question:Who is the wife of Donald Trump?}
45 | <br />
46 | Here,[ip] and [port] is the ip and port number of GAnswerHttp service (the default port is 9999). By the "data" parameter in the url, you can send a json string to GAnswer.
47 | In this example, you are actually sending the following json data：
48 | ```json
49 | {
50 |   "maxAnswerNum":"1",
51 |   "maxSparqlNum":"2",
52 |   "question":"Whos is the wife of Donald Trump?"
53 | }
54 | ```
55 | Here, maxAnswerNum and maxSparqlNum respetively limit the number of answers and sparql the system will return. Both of them are optional.
56 | If everything goes well, GAnswer will return a json string containing system-generated sparql and corresponding answer.
57 | ```json
58 | {
59 |   "question":"Who is the wife of Donald Trump?",
60 |   "vars":["?wife"],
61 |   "sparql":["select DISTINCT ?wife  where { <Donald_Trump>\t<spouse>\t?wife. } LIMIT 1","select DISTINCT ?wife  where { ?wife\t<spouse>\t<Donald_Trump>. } LIMIT 1"],
62 |   "results":{"bindings":[{"?wife":{"type":"uri","value":"<Ivana_Trump>"}}]},
63 |   "status":"200"
64 | }
65 | ```
66 | For more detail, please check Chapter 2.1.1 of the user guide.
67 | 
68 | ### Run GAnswer in Eclipse
69 | If you would like to run GAnswer in Eclipse, you need to clone or download the source code and import the project into Eclipse. Afterwards, the jar files in lib directory should be added to Build Path.
70 | Due to the sizes, these jar files can not be uploaded to github. Therefore, you can download them [here](https://disk.pku.edu.cn:443/link/AD36D72C28B3A581379EE2748B1A79E7). The extract code is 64jd. You can also download the lib zip through [Google Drive](https://drive.google.com/file/d/1tEsi4pBOBHd2gmwVgIOgt-ypJZQH9G3S).
71 | Meanwhile, dbpedia16.rar is also needed. Please unzipped it into directory named data under the project main directory. Parameters about data path can be found in qa.Globals.localPath.
72 | 
73 | ### Notice
74 | To run GAnswer, you have to deal with multiple dependencies involving jar, data files and external API. Related information is in Chapter 2.4 in the help document.
75 | Having generated sparql querires, by default the system will access a remote gStore for answer, which means extra time may be needed.Therefore, we strongly recommend you to deploy gStore on your own server for best performance.
76 | 
77 | - Download [DBpedia2016 triple file](https://pan.baidu.com/s/1l5Oui65sDn8QPYmA0rUvuA) and extract code is 89yy.
78 | - Deploy [gStore](http://gstore-pku.com) and use DBpedia2016 triple file to build your own database. What's worth mentioning is that the DBpedia 2016 triples file is about 9.9GB and the construction needs more than 10GB of main memory and costs more about 10 hours.
79 | 
80 | ## Other Business
81 | 
82 | You are welcome to use GAnswer and tell us your valuable advice or bug report.
83 | 
84 | If your advice or report are accepted, your contribution will be recorded in our help document.
85 | 
86 | We have published some paper about GAnswer and QA task, which you can find in Chapter 3.2 in help document.
87 | 
88 | ## How to make your own data available on gAnswer
89 | You may have your own set of triples and want to put them into gAnswer.Then you should generate a new set of fragments from your own triples. We have a [detailed tutorial](genrate_fragments/How_to_generate_fragments.md) to help you out.
90 | 
91 | 


--------------------------------------------------------------------------------
/genrate_fragments/How_to_generate_fragments.md:
--------------------------------------------------------------------------------
  1 | ## How to generate fragments out of your own triples
  2 | There are three kinds of fragments in gAnswer: entity fragments, predicate fragments and type fragments. They are information extracted from the triples helping gAnswer improve its results. In this section we will show you how to generate your own fragments step by step with a simple example
  3 | 
  4 | ### Step 1: Clean the triple files
  5 | Suppose we have a triple file containing only seven triples:
  6 | ```java
  7 | <StudentA>  <major>  <computer_science>
  8 | <StudentB>  <friend_of> <StudentA>
  9 | <StudentB>  <deskmate_of> <StudentA>
 10 | <StudentA>  <name>  "Jeff"
 11 | <StudentB>  <name>  "Tom"
 12 | <StudentA>  <type>   <Person>
 13 | <StudentB>  <type>   <Person>
 14 | <computer_science>  <type>  <Subject>
 15 | ```
 16 | Generally speaking, there are three segment
 17 | This is the exactly form of triples we need to generate fragments. However sometimes the entity and predicate contain some extra information. Take dbpedia dataset as an example. The following is the original form of a dbpedia triple
 18 | ```java
 19 | <http://dbpedia.org/resource/Alabama> <http://dbpedia.org/property/demonym> <http://dbpedia.org/resource/Adjectivals_and_demonyms_for_U.S._states> .
 20 | ```
 21 | As you can see, every entity and predicate is marked with an URI, but we don't need the prefix of the URIs. See Step1_clean_triples.py. That is the code we use to clean dbpedia triples. 
 22 | Generally, please remember that making sure the entity and predicate names are clear enough to indicate their true meaning and contain no extra information is all you need to do in this step.
 23 | By the way, if you have more than one triple files, please combine them into one so that the following steps will be easier.
 24 | 
 25 | ### Step 2: remove duplicate triples
 26 | One triple may occur more than once in the clean triple file, especially when you combine many triple files into one.
 27 | gAnswer is OK with receiving duplicate triples but it will influence its performance.
 28 | 
 29 | ### Step 3: extract entity, predicate and type name for id allocation
 30 | To save space cost, the fragment files are not constructed based on entity, predicate and type names themselves but their ids. Therefore, we must extract every entity, predicate and type name out of the triple file and give them a uniue id respectively. In our example,the id files will goes like this:
 31 | ```java
 32 | //Entity ids
 33 | <StudentA>  1
 34 | <StudentB>  2
 35 | <computer_science>  3
 36 | 
 37 | //predicate ids
 38 | <major> 1
 39 | <friend_of> 2
 40 | <type>  3
 41 | <name>  4
 42 | <deskmate_of> 5
 43 | 
 44 | //type ids
 45 | <Person>  1
 46 | <Subject> 2
 47 | ```
 48 | 
 49 | ### Step 4: represent triples with ids
 50 | For convenience, before we generate the fragments, we first replace all the name strings in triple file with corresponding ids.
 51 | In our example, the new triple file is like:
 52 | ```java
 53 | 1 1 3
 54 | 2 2 1
 55 | 1 4 -1
 56 | 2 4 -1
 57 | 1 3 1
 58 | 2 3 1
 59 | 3 3 2
 60 | ```
 61 | Notice that we use -1 to represent values that a not entity nor type, such as numbers and literals.
 62 | 
 63 | ### Step 5: generate entity fragments
 64 | Finally we are going to generate entity fragments now. Every entity has its own piece of fragment.Fragments are information about the edges related with the entity as well as its neighbor entities.First let's clearify the idea of subject and object in a triple. A triple consist of three parts: subject, predicate and object. For example:
 65 | ```java
 66 | <StudentA>  <major>  <computer_science>
 67 | ```
 68 | Here *studentA* is subject, *major* is predicate and *computer_science* is object. Basically, the first element is subject, the second is predicate and the third is object. Sometimes it is the object, not an entity nor type. Value like number and string can also become object.
 69 | 
 70 | We define 5 kinds of edges:
 71 | 1.InEntEdge: The entity is the object of the edge and the subject is also an entity.
 72 | 2.OutEntEdge: The entity is the subject of the edge and the object is also an entity.
 73 | 3.InEdge: The entity is the object of the edge.
 74 | 4.OutEdge: The entity is the subject of the edge.
 75 | 5.typeEdge: The entity ts the subject of the edge whose predicate is *type* and its object is a type.
 76 | 
 77 | Therefore, the structure of a piece of entity fragment is as follow:
 78 | ```java
 79 | <entity id> <InEntEdge list> | <OutEntEdge list> | <InEdge list> | <OutEdge list> | <Type list>
 80 | ```
 81 | Between entity id and InEntEdge list, there should be a \t as divider.
 82 | 
 83 | InEntEdge list and OutEntEdge list should be:
 84 | ```java
 85 | <Subject or object entity id 1> : <Predicate id 1.1> ; <Predicate id 1.2> ; ...... , <Subject or object entity id 2> : <Predicate id 2.1> ; <Predicate id 2.2> ; ......
 86 | ```
 87 | InEdge, OutEdge and Type list is similar but simpler.
 88 | ```java
 89 | <Subject or object entity or type id 1> , <Subject or object entity or type id 2>, <Subject or object entity or type id 3>......
 90 | ```
 91 | 
 92 | Let's go back to our example. For entity *studentA*, its entity fragment should be:
 93 | ```java
 94 | 1   2:2;5 | 3:1 | 2 | 1,4 | 1 
 95 | ```
 96 | The id of *studentA* is 1. So at the beginning of the entity fragment we have a 1. Then we find InEntEdge, OutEntEdge, InEdge, OutEdge and Type list one by one and add them to the entity fragment.
 97 | 
 98 | ### Step 6: Generate type fragment
 99 | Given a specific type, type fragment contains three kinds of information: predicate ids in an InEdge of an entity of this type, predicate ids in an OutEdge of an entity of this type, and all the ids of entity of this type. The structure should be:
100 | ```java
101 | <Type id> <InEdge predicate list> | <OutEdge predicate list> | <Entity list>
102 | ```
103 | In our example, the type fragement of *Person* should be:
104 | ```java
105 | 1 2,5 | 1,4 | 1,2
106 | ```
107 | 
108 | ### Step 7: Generate predicate fragment
109 | Given a specific predicate, there will be more than one piece of predicate fragment. Every piece of predicate fragment comes from a piece triple. We record the types that a predicate may accept as subject or object. Sometimes the object is not an entity and we use *literal* to denote this situation.
110 | The structure of a piece of predicate fragment is:
111 | ```java
112 | [<Type list of the subject entity>] <predicate id>  [<Type list of the object entity> or "literal"]
113 | ```
114 | For predicate *friend_of*, the predicate fragment should be:
115 | ```java
116 | [1] 2 [1]
117 | ```
118 | 
119 | For predicate *name*, the predicate fragment should be:
120 | ```java
121 | [1] 4 literal
122 | ```
123 | 
124 | Please notice that between type lists, predicate id and "literal", \t should be the divider.
125 | 
126 | ### Step 8: Rebuild the lucene fragment for entity fragment and type short name
127 | This is the final step to make gAnswer run on our new data fragments. You can find the relative code under src/lcn/BuildIndexForEntityFragments.java and src/lucene/BuildIndexForTypeShortName.java. All you need to do is to import the project into eclipse and modify the file paths in the relative code and then run the main function in src/lcn/BuildIndexForEntityFragments.java and src/lucene/BuildIndexForTypeShortName.java.
128 | 


--------------------------------------------------------------------------------
/src/rdf/Triple.java:
--------------------------------------------------------------------------------
  1 | package rdf;
  2 | 
  3 | import nlp.ds.Word;
  4 | import qa.Globals;
  5 | 
  6 | public class Triple implements Comparable<Triple>{
  7 | 	public String subject = null;	// subject/object after disambiguation.
  8 | 	public String object = null;
  9 | 	
 10 | 	static public int TYPE_ROLE_ID = -5;
 11 | 	static public int VAR_ROLE_ID = -2;
 12 | 	static public int CAT_ROLE_ID = -8;	// Category
 13 | 	static public String VAR_NAME = "?xxx";
 14 | 	
 15 | 	// subjId/objId: entity id | TYPE_ROLE_ID | VAR_ROLE_ID
 16 | 	public int subjId = -1;
 17 | 	public int objId = -1;
 18 | 	public int predicateID = -1;
 19 | 	public Word subjWord = null;	// only be used when semRltn == null
 20 | 	public Word objWord = null;
 21 | 	
 22 | 	public SemanticRelation semRltn = null;
 23 | 	public double score = 0;
 24 | 	public boolean isSubjObjOrderSameWithSemRltn = true;
 25 | 	public boolean isSubjObjOrderPrefered = false;
 26 | 	
 27 | 	public Word typeSubjectWord = null; // for "type" triples only
 28 | 	
 29 | 	public Triple (Triple t) {
 30 | 		subject = t.subject;
 31 | 		object = t.object;
 32 | 		subjId = t.subjId;
 33 | 		objId = t.objId;
 34 | 		predicateID = t.predicateID;
 35 | 		
 36 | 		semRltn = t.semRltn;
 37 | 		score = t.score;
 38 | 		isSubjObjOrderSameWithSemRltn = t.isSubjObjOrderSameWithSemRltn;
 39 | 		isSubjObjOrderPrefered = t.isSubjObjOrderPrefered;
 40 | 	}
 41 | 	
 42 | 	// A final triple (subject/object order will not changed), does not rely on semantic relation (sr == null), from one word (type variable | embedded info) 
 43 | 	public Triple (int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco) {
 44 | 		subjId = sId;
 45 | 		objId = oId;
 46 | 		subject = s;
 47 | 		predicateID = p;
 48 | 		object = o;
 49 | 		semRltn = sr;
 50 | 		score = sco;
 51 | 	}
 52 | 
 53 | 	// A triple translated from a semantic relation (subject/object order can be changed in later)
 54 | 	public Triple (int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco, boolean isSwap) {
 55 | 		subjId = sId;
 56 | 		objId = oId;
 57 | 		subject = s;
 58 | 		predicateID = p;
 59 | 		object = o;
 60 | 		semRltn = sr;
 61 | 		score = sco;
 62 | 		isSubjObjOrderSameWithSemRltn = isSwap;
 63 | 	}
 64 | 	
 65 | 	// A final triple (subject/object order will not changed), does not rely on semantic relation (sr == null), from two word (implicit relations of modifier)
 66 | 	public Triple(int sId, String s, int p, int oId, String o, SemanticRelation sr, double sco, Word subj, Word obj) {
 67 | 		subjId = sId;
 68 | 		objId = oId;
 69 | 		subject = s;
 70 | 		predicateID = p;
 71 | 		object = o;
 72 | 		semRltn = sr;
 73 | 		score = sco;
 74 | 		subjWord = subj;
 75 | 		objWord = obj;
 76 | 	}
 77 | 
 78 | 	public Triple copy() {
 79 | 		Triple t = new Triple(this);
 80 | 		return t;
 81 | 	}
 82 | 	
 83 | 	public Triple copySwap() {
 84 | 		Triple t = new Triple(this);
 85 | 		String temp;
 86 | 		int tmpId;
 87 | 
 88 | 		tmpId = t.subjId;
 89 | 		t.subjId = t.objId;
 90 | 		t.objId = tmpId;
 91 | 		
 92 | 		temp = t.subject;
 93 | 		t.subject = t.object;
 94 | 		t.object = temp;
 95 | 		
 96 | 		t.isSubjObjOrderSameWithSemRltn = !this.isSubjObjOrderSameWithSemRltn;
 97 | 		t.isSubjObjOrderPrefered = !this.isSubjObjOrderPrefered;
 98 | 		
 99 | 		return t;
100 | 	}
101 | 	
102 | 	public void addScore(double s) {
103 | 		score += s;
104 | 	}
105 | 	
106 | 	public double getScore() {
107 | 		return score;
108 | 	}
109 | 	
110 | 	@Override 
111 | 	public int hashCode() 
112 |     { 
113 |         return new Integer(subjId).hashCode() ^ new Integer(objId).hashCode() ^ new Integer(predicateID).hashCode(); 
114 |     } 
115 | 		
116 | 	@Override
117 | 	public String toString() {
118 | 		return subjId+":<" + subject + "> <" + Globals.pd.getPredicateById(predicateID) + "> "+objId+":<" + object + ">" + " : " + score;
119 | 	}
120 | 
121 | 	public String toStringForGStore() {
122 | 		StringBuilder sb = new StringBuilder("");
123 | 		
124 | 		String _subject = subject;
125 | 		if(_subject.startsWith("?")) 
126 | 			sb.append(_subject+"\t");
127 | 		else 
128 | 			sb.append("<" + _subject + ">\t");
129 | 		
130 | 		sb.append("<" + Globals.pd.getPredicateById(predicateID) + ">\t");
131 | 		
132 | 		String _object;
133 | 		if(predicateID == Globals.pd.typePredicateID && object.contains("|")) 
134 | 			_object = object.substring(0, object.indexOf('|'));
135 | 		else 
136 | 			_object = object;
137 | 		if(_object.startsWith("?")) 
138 | 			sb.append(_object);
139 | 		else 
140 | 			sb.append("<" + _object + ">");
141 | 		
142 | 		return sb.toString().replace(' ', '_');
143 | 	}
144 | 	
145 | 	public String toStringWithoutScore() {
146 | 		return "<" + subject + "> <" + Globals.pd.getPredicateById(predicateID) + "> <" + object + ">";
147 | 	}
148 | 	
149 | 	public Word getSubjectWord () {
150 | 		if (predicateID == Globals.pd.typePredicateID) {
151 | 			return typeSubjectWord;
152 | 		}
153 | 		else if(semRltn == null)
154 | 		{
155 | 			return subjWord;
156 | 		}
157 | 		else {
158 | 			if (isSubjObjOrderSameWithSemRltn) return semRltn.arg1Word;
159 | 			else return semRltn.arg2Word;			
160 | 		}
161 | 		
162 | 	}
163 | 	
164 | 	public Word getObjectWord () {
165 | 		if (predicateID == Globals.pd.typePredicateID) {
166 | 			return typeSubjectWord;
167 | 		}
168 | 		else if(semRltn == null)
169 | 		{
170 | 			return objWord;
171 | 		}
172 | 		else {
173 | 			if (isSubjObjOrderSameWithSemRltn) return semRltn.arg2Word;
174 | 			else return semRltn.arg1Word;
175 | 		}
176 | 	}
177 | 	
178 | 	public boolean isSubjConstant () {
179 | 		if (predicateID == Globals.pd.typePredicateID) {
180 | 			return !subject.startsWith("?");
181 | 		}
182 | 		else {
183 | 			// Triple from semantic (obvious) relation 
184 | 			if(semRltn != null)
185 | 			{
186 | 				if (isSubjObjOrderSameWithSemRltn) return semRltn.isArg1Constant;
187 | 				else return semRltn.isArg2Constant;
188 | 			}
189 | 			// Triple from implicit relation (no semantic relation), it is final triple
190 | 			else
191 | 			{
192 | 				if(subjId != Triple.VAR_ROLE_ID && subjId != Triple.TYPE_ROLE_ID)
193 | 					return true;
194 | 				else
195 | 					return false;
196 | 			}
197 | 		}
198 | 	}
199 | 	
200 | 	public boolean isObjConstant () {
201 | 		if (predicateID == Globals.pd.typePredicateID) {
202 | 			return !object.startsWith("?");
203 | 		}
204 | 		else {
205 | 			if(semRltn != null)
206 | 			{
207 | 				if (isSubjObjOrderSameWithSemRltn) return semRltn.isArg2Constant;
208 | 				else return semRltn.isArg1Constant;
209 | 			}
210 | 			else
211 | 			{
212 | 				if(objId != Triple.VAR_ROLE_ID && objId != Triple.TYPE_ROLE_ID)
213 | 					return true;
214 | 				else
215 | 					return false;
216 | 			}
217 | 		}
218 | 	}
219 | 	
220 | 	public int compareTo(Triple o) 
221 | 	{
222 | 		//Order: Type, Ent&Ent, Ent&Var, Var&Var
223 | 		if(this.predicateID == Globals.pd.typePredicateID)
224 | 		{
225 | 			if(o.predicateID == Globals.pd.typePredicateID)
226 | 				return 0;
227 | 			else
228 | 				return -1;
229 | 		}
230 | 		int cnt1 = 0, cnt2 = 0;
231 | 		if(!this.subject.startsWith("?"))
232 | 			cnt1++;
233 | 		if(!this.object.startsWith("?"))
234 | 			cnt1++;
235 | 		if(!o.subject.startsWith("?"))
236 | 			cnt2++;
237 | 		if(!o.object.startsWith("?"))
238 | 			cnt2++;
239 | 		
240 | 		if(cnt1 == cnt2)
241 | 			return 0;
242 | 		else if(cnt1 > cnt2)
243 | 			return -1;
244 | 		else
245 | 			return 1;
246 | 	}
247 | 	
248 | 	public void swapSubjObjOrder() {		
249 | 		String temp = subject;
250 | 		int tmpId = subjId;
251 | 		subject = object;
252 | 		subjId = objId;
253 | 		object = temp;
254 | 		objId = tmpId;
255 | 		isSubjObjOrderSameWithSemRltn = !isSubjObjOrderSameWithSemRltn;
256 | 	}
257 | };


--------------------------------------------------------------------------------
/src/nlp/tool/CoreNLP.java:
--------------------------------------------------------------------------------
  1 | package nlp.tool;
  2 | 
  3 | import java.util.List;
  4 | import java.util.Properties;
  5 | 
  6 | import nlp.ds.Word;
  7 | import edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation;
  8 | import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
  9 | import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
 10 | import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
 11 | import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 12 | import edu.stanford.nlp.ling.CoreLabel;
 13 | import edu.stanford.nlp.pipeline.Annotation;
 14 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 15 | import edu.stanford.nlp.trees.Tree;
 16 | import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation;
 17 | import edu.stanford.nlp.trees.semgraph.SemanticGraph;
 18 | import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation;
 19 | import edu.stanford.nlp.util.CoreMap;
 20 | 
 21 | public class CoreNLP {
 22 | 
 23 | 	// CoreNLP can also recognize TIME and NUMBER (see SUTime)
 24 | 	private StanfordCoreNLP pipeline_lemma;
 25 | 	
 26 | 	public CoreNLP () {
 27 | 	    // creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
 28 | 	    /*Properties props_all = new Properties();
 29 | 	    props_all.put("annotators", "tokenize, ssplit, pos, lemma, parse");	// full list: "tokenize, ssplit, pos, lemma, ner, parse, dcoref"
 30 | 	    pipeline_all = new StanfordCoreNLP(props_all);*/
 31 | 
 32 | 	    Properties props_lemma = new Properties();
 33 | 	    props_lemma.put("annotators", "tokenize, ssplit, pos, lemma");
 34 | 	    pipeline_lemma = new StanfordCoreNLP(props_lemma);		
 35 | 
 36 | 	}
 37 | 	
 38 | 	// For more efficient usage, refer to "http://www.jarvana.com/jarvana/view/edu/stanford/nlp/stanford-corenlp/1.2.0/stanford-corenlp-1.2.0-javadoc.jar!/edu/stanford/nlp/process/Morphology.html"
 39 | 	public String getBaseFormOfPattern (String text) {
 40 | 		String ret = new String("");
 41 | 		
 42 | 	    // create an empty Annotation just with the given text
 43 | 	    Annotation document = new Annotation(text);
 44 | 	    // run all Annotators on this text
 45 | 	    pipeline_lemma.annotate(document);
 46 | 
 47 | 
 48 | 	    // these are all the sentences in this document
 49 | 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 50 | 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 51 | 	    
 52 | 	    int count = 0;
 53 | 	    for(CoreMap sentence: sentences) {
 54 | 	      // traversing the words in the current sentence
 55 | 	      // a CoreLabel is a CoreMap with additional token-specific methods
 56 | 	      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
 57 | 	        // this is the base form (lemma) of the token
 58 | 	        String lemma = token.getString(LemmaAnnotation.class);
 59 | 	        ret += lemma;
 60 | 	        ret += " ";
 61 | 	      }
 62 | 	      count ++;
 63 | 	      if (count % 100 == 0) {
 64 | 	    	  System.out.println(count);
 65 | 	      }
 66 | 	    }
 67 | 	    
 68 | 	    return ret.substring(0, ret.length()-1);
 69 | 	}
 70 | 	
 71 | 	public SemanticGraph getBasicDependencies (String s) {
 72 | 	    // create an empty Annotation just with the given text
 73 | 	    Annotation document = new Annotation(s);
 74 | 	    
 75 | 	    // run all Annotators on this text
 76 | 	    pipeline_lemma.annotate(document);
 77 | 	    
 78 | 	    // these are all the sentences in this document
 79 | 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
 80 | 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
 81 | 	    
 82 | 	    for(CoreMap sentence: sentences) {
 83 | 	      // this is the Stanford dependency graph of the current sentence
 84 | 	      SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class);
 85 | 	      return dependencies;
 86 | 	    }
 87 | 	    
 88 | 	    return null;
 89 | 	}
 90 | 
 91 | 	public Tree getParseTree (String text) {
 92 | 	    // create an empty Annotation just with the given text
 93 | 	    Annotation document = new Annotation(text);
 94 | 	    
 95 | 	    // run all Annotators on this text
 96 | 	    pipeline_lemma.annotate(document);
 97 | 	    
 98 | 	    // these are all the sentences in this document
 99 | 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
100 | 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
101 | 	    
102 | 	    for(CoreMap sentence: sentences) {
103 | 	    	// this is the parse tree of the current sentence
104 | 	    	return sentence.get(TreeAnnotation.class);
105 | 	    }	    
106 | 	    
107 | 	    return null;
108 | 	}
109 | 	
110 | 	/**
111 | 	 * How to use:
112 | 	 * for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
113 | 	 * 		// this is the text of the token
114 | 	 * 		String word = token.get(TextAnnotation.class);
115 | 	 *		// this is the POS tag of the token
116 | 	 *		String pos = token.get(PartOfSpeechAnnotation.class);
117 | 	 *	}
118 | 	 * @param s
119 | 	 * @return
120 | 	 */
121 | 	public CoreMap getPOS (String s) {
122 | 	    // create an empty Annotation just with the given text
123 | 	    Annotation document = new Annotation(s);
124 | 	    
125 | 	    // run all Annotators on this text
126 | 	    pipeline_lemma.annotate(document);
127 | 	    
128 | 	    // these are all the sentences in this document
129 | 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
130 | 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
131 | 	    
132 | 	    for(CoreMap sentence: sentences) {
133 | 	      // this is the sentence with POS Tags
134 | 	      return sentence;
135 | 	    }
136 | 	    
137 | 	    return null;
138 | 	}
139 | 	
140 | 	public Word[] getTaggedWords (String sentence) {
141 | 		CoreMap taggedSentence = getPOS(sentence);
142 | 		Word[] ret = new Word[taggedSentence.get(TokensAnnotation.class).size()];
143 | 		int count = 0;
144 | 		for (CoreLabel token : taggedSentence.get(TokensAnnotation.class)) {
145 | 			// this is the text of the token
146 | 			String word = token.get(TextAnnotation.class);
147 | 			// this is the POS tag of the token
148 | 			String pos = token.get(PartOfSpeechAnnotation.class);
149 | 			//System.out.println(word+"["+pos+"]");
150 | 			ret[count] = new Word(getBaseFormOfPattern(word.toLowerCase()), word, pos, count+1);
151 | 			count ++;
152 | 		}
153 | 		return ret;
154 | 	}
155 | 	
156 | 	/*public void demo () {
157 | 		// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution 
158 | 	    Properties props = new Properties();
159 | 	    props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
160 | 	    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
161 | 	    
162 | 	    // read some text in the text variable
163 | 	    String text = ... // Add your text here!
164 | 	    
165 | 	    // create an empty Annotation just with the given text
166 | 	    Annotation document = new Annotation(text);
167 | 	    
168 | 	    // run all Annotators on this text
169 | 	    pipeline.annotate(document);
170 | 	    
171 | 	    // these are all the sentences in this document
172 | 	    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
173 | 	    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
174 | 	    
175 | 	    for(CoreMap sentence: sentences) {
176 | 	      // traversing the words in the current sentence
177 | 	      // a CoreLabel is a CoreMap with additional token-specific methods
178 | 	      for (CoreLabel token: sentence.get(TokensAnnotation.class)) {
179 | 	        // this is the text of the token
180 | 	        String word = token.get(TextAnnotation.class);
181 | 	        // this is the POS tag of the token
182 | 	        String pos = token.get(PartOfSpeechAnnotation.class);
183 | 	        // this is the NER label of the token
184 | 	        String ne = token.get(NamedEntityTagAnnotation.class);       
185 | 	      }
186 | 
187 | 	      // this is the parse tree of the current sentence
188 | 	      Tree tree = sentence.get(TreeAnnotation.class);
189 | 
190 | 	      // this is the Stanford dependency graph of the current sentence
191 | 	      SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
192 | 	    }
193 | 
194 | 	    // This is the coreference link graph
195 | 	    // Each chain stores a set of mentions that link to each other,
196 | 	    // along with a method for getting the most representative mention
197 | 	    // Both sentence and token offsets start at 1!
198 | 	    Map<Integer, CorefChain> graph = 
199 | 	      document.get(CorefChainAnnotation.class);
200 | 	}*/
201 | }
202 | 


--------------------------------------------------------------------------------
/src/addition/AddtionalFix.java:
--------------------------------------------------------------------------------
  1 | package addition;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.HashMap;
  5 | 
  6 | import paradict.PredicateIDAndSupport;
  7 | import log.QueryLogger;
  8 | import nlp.ds.Word;
  9 | import nlp.ds.Sentence.SentenceType;
 10 | import qa.Globals;
 11 | import rdf.SemanticUnit;
 12 | import rdf.Sparql;
 13 | import rdf.Sparql.QueryType;
 14 | import rdf.Triple;
 15 | 
 16 | 
 17 | public class AddtionalFix 
 18 | {
 19 | 	public HashMap<String, String> pattern2category = new HashMap<String, String>();
 20 | 	
 21 | 	public AddtionalFix()
 22 | 	{
 23 | 		// Some category mappings for DBpedia, try automatic linking methods later. | base form
 24 | 		pattern2category.put("gangster_from_the_prohibition_era", "Prohibition-era_gangsters");
 25 | 		pattern2category.put("seven_wonder_of_the_ancient_world", "Seven_Wonders_of_the_Ancient_World");
 26 | 		pattern2category.put("three_ship_use_by_columbus", "Christopher_Columbus");
 27 | 		pattern2category.put("13_british_colony", "Thirteen_Colonies");
 28 | 	}
 29 | 	
 30 | 	public void process(QueryLogger qlog)
 31 | 	{
 32 | 		fixCategory(qlog);
 33 | 		oneTriple(qlog);
 34 | 		oneNode(qlog);
 35 | 		
 36 | 		//aggregation
 37 | 		AggregationRecognition ar = new AggregationRecognition();
 38 | 		ar.recognize(qlog);
 39 | 	
 40 | 		//query type
 41 | 		decideQueryType(qlog);
 42 | 	}
 43 | 	
 44 | 	public void decideQueryType(QueryLogger qlog)
 45 | 	{
 46 | 		for(Sparql spq: qlog.rankedSparqls)
 47 | 			if(qlog.s.sentenceType == SentenceType.GeneralQuestion)
 48 | 				spq.queryType = QueryType.Ask;
 49 | 	}
 50 | 	
 51 | 	public void fixCategory(QueryLogger qlog)
 52 | 	{
 53 | 		if(qlog == null || qlog.semanticUnitList == null)
 54 | 			return;
 55 | 		
 56 | 		String var = null, category = null;
 57 | 		for(SemanticUnit su: qlog.semanticUnitList)
 58 | 		{
 59 | 			if(su.centerWord.mayCategory)
 60 | 			{
 61 | 				var = "?"+su.centerWord.originalForm;
 62 | 				category = su.centerWord.category;
 63 | 			}
 64 | 		}
 65 | 		
 66 | 		if(category != null && var != null)
 67 | 			for(Sparql spq: qlog.rankedSparqls)
 68 | 			{
 69 | 				boolean occured = false;
 70 | 				for(Triple tri: spq.tripleList)
 71 | 				{
 72 | 					if(tri.subject.equals(var))
 73 | 					{
 74 | 						occured = true;
 75 | 						break;
 76 | 					}
 77 | 				}
 78 | 				String oName = category;
 79 | 				String pName = "subject";
 80 | 				int pid = Globals.pd.predicate_2_id.get(pName);
 81 | 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, var, pid, Triple.CAT_ROLE_ID, oName, null, 100);
 82 | 				spq.addTriple(triple);
 83 | 			}
 84 | 	}
 85 | 	
 86 | 	/* recognize one-Node query 
 87 | 	 * Two cases：1、Special question|Imperative sentence	2、General question
 88 | 	 * 1-1：how many [], highest [] ...  | For single variable, add constraint (aggregation)
 89 | 	 * 1-2: What is backgammon? | What is a bipolar syndrome? | Search an entity (return itself or its type/description ...)
 90 | 	 * 1-3: Give me all Seven Wonders of the Ancient World. | Notice, "Seven Wonders of the Ancient World" should be recognized as ENT before. (in fact it is CATEGORY in DBpeida)
 91 |  	 * 2-1: Are there any [castles_in_the_United_States](yago:type)
 92 |  	 * 2-2：Was Sigmund Freud married? | Lack of variable node.
 93 |  	 * 2-3：Are penguins endangered? | No suitable relation matching, need transition.
 94 | 	 */ 
 95 | 	public void oneNode(QueryLogger qlog)
 96 | 	{
 97 | 		if(qlog == null || qlog.semanticUnitList == null || qlog.semanticUnitList.size()>1)
 98 | 			return;
 99 | 		
100 | 		Word target = qlog.target;
101 | 		Word[] words = qlog.s.words;
102 | 		if(qlog.s.sentenceType != SentenceType.GeneralQuestion)
103 | 		{
104 | 			//1-1: how many [type] are there | List all [type]
105 | 			if(target.mayType && target.tmList != null)
106 | 			{
107 | 				String subName = "?"+target.originalForm;
108 | 				String typeName = target.tmList.get(0).typeName;
109 | 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, subName, Globals.pd.typePredicateID, Triple.TYPE_ROLE_ID, typeName, null, 100);
110 | 				Sparql sparql = new Sparql();
111 | 				sparql.addTriple(triple);
112 | 				qlog.rankedSparqls.add(sparql);
113 | 			}
114 | 			//1-2: What is [ent]?
115 | 			else if(target.mayEnt && target.emList != null)
116 | 			{
117 | 				if(words.length >= 3 && words[0].baseForm.equals("what") && words[1].baseForm.equals("be"))
118 | 				{
119 | 					int eid = target.emList.get(0).entityID;
120 | 					String subName = target.emList.get(0).entityName;
121 | 					Triple triple =	new Triple(eid, subName, Globals.pd.typePredicateID, Triple.VAR_ROLE_ID, "?"+target.originalForm, null, target.emList.get(0).score);
122 | 					Sparql sparql = new Sparql();
123 | 					sparql.addTriple(triple);
124 | 					qlog.rankedSparqls.add(sparql);
125 | 				}
126 | 			}
127 | 			//1-3: Give me all Seven Wonders of the Ancient World.
128 | 			else if(target.mayCategory && target.category != null)
129 | 			{
130 | 				String oName = target.category;
131 | 				String pName = "subject";
132 | 				int pid = Globals.pd.predicate_2_id.get(pName);
133 | 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, "?"+target.originalForm, pid, Triple.CAT_ROLE_ID, oName, null, 100);
134 | 				Sparql sparql = new Sparql();
135 | 				sparql.addTriple(triple);
136 | 				qlog.rankedSparqls.add(sparql);
137 | 			}
138 | 		}
139 | 		else 
140 | 		{
141 | 			if(target.mayEnt && target.emList != null)
142 | 			{
143 | 				//2-2：Was Sigmund Freud married?
144 | 				String relMention = "";
145 | 				for(Word word: words)
146 | 					if(word != target && !word.baseForm.equals(".") && !word.baseForm.equals("?"))
147 | 						relMention += word.baseForm+" ";
148 | 				if(relMention.length() > 1)
149 | 					relMention = relMention.substring(0, relMention.length()-1);
150 | 				
151 | 				ArrayList<PredicateIDAndSupport> pmList = null;
152 | 				if(Globals.pd.nlPattern_2_predicateList.containsKey(relMention))
153 | 					pmList = Globals.pd.nlPattern_2_predicateList.get(relMention);
154 | 				
155 | 				if(pmList != null && pmList.size() > 0)
156 | 				{
157 | 					int pid = pmList.get(0).predicateID;
158 | 					int eid = target.emList.get(0).entityID;
159 | 					String subName = target.emList.get(0).entityName;
160 | 					Triple triple =	new Triple(eid, subName, pid, Triple.VAR_ROLE_ID, "?x", null, 100);
161 | 					Sparql sparql = new Sparql();
162 | 					sparql.addTriple(triple);
163 | 					qlog.rankedSparqls.add(sparql);
164 | 				}
165 | 		
166 | 				//2-3：Are penguins endangered?
167 | 				else
168 | 				{
169 | 					if(target.position < words.length && pattern2category.containsKey(words[target.position].baseForm))
170 | 					{
171 | 						String oName = pattern2category.get(words[target.position].baseForm);
172 | 						String pName = "subject";
173 | 						int pid = Globals.pd.predicate_2_id.get(pName);
174 | 						int eid = target.emList.get(0).entityID;
175 | 						String subName = target.emList.get(0).entityName;
176 | 						Triple triple =	new Triple(eid, subName, pid, Triple.CAT_ROLE_ID, oName, null, 100);
177 | 						Sparql sparql = new Sparql();
178 | 						sparql.addTriple(triple);
179 | 						qlog.rankedSparqls.add(sparql);
180 | 					}
181 | 				}
182 | 			}
183 | 			//2-1: Are there any [castles_in_the_United_States](yago:type)
184 | 			else if(target.mayType && target.tmList != null)
185 | 			{
186 | 				String typeName = target.tmList.get(0).typeName;
187 | 				String subName = "?" + target.originalForm;
188 | 				//System.out.println("typeName="+typeName+" subName="+subName);
189 | 				Triple triple =	new Triple(Triple.VAR_ROLE_ID, subName, Globals.pd.typePredicateID, Triple.TYPE_ROLE_ID, typeName, null, 100);
190 | 				Sparql sparql = new Sparql();
191 | 				sparql.addTriple(triple);
192 | 				qlog.rankedSparqls.add(sparql);
193 | 			}
194 | 		}
195 | 	}
196 | 	
197 | 	/*
198 | 	 * One triple recognized but no suitable relation.
199 | 	 * */ 
200 | 	public void oneTriple (QueryLogger qlog)
201 | 	{
202 | 		if(qlog == null || qlog.semanticUnitList == null)
203 | 			return;
204 | 		
205 | 		if(qlog.s.sentenceType == SentenceType.SpecialQuestion)
206 | 		{
207 | 			Word[] words = qlog.s.words;
208 | 			if(qlog.semanticUnitList.size() == 2)
209 | 			{
210 | 				Word entWord = null, whWord = null;
211 | 				for(int i=0;i<qlog.semanticUnitList.size();i++)
212 | 				{
213 | 					if(qlog.semanticUnitList.get(i).centerWord.baseForm.startsWith("wh"))
214 | 						whWord = qlog.semanticUnitList.get(i).centerWord;
215 | 					if(qlog.semanticUnitList.get(i).centerWord.mayEnt)
216 | 						entWord = qlog.semanticUnitList.get(i).centerWord;
217 | 				}
218 | 				// 1-1: (what) is [ent] | we guess users may want the type of ent.
219 | 				if(entWord!=null && whWord!= null && words.length >= 3 && words[0].baseForm.equals("what") && words[1].baseForm.equals("be"))
220 | 				{
221 | 					int eid = entWord.emList.get(0).entityID;
222 | 					String subName = entWord.emList.get(0).entityName;
223 | 					Triple triple =	new Triple(eid, subName, Globals.pd.typePredicateID, Triple.VAR_ROLE_ID, "?"+whWord.originalForm, null, entWord.emList.get(0).score);
224 | 					Sparql sparql = new Sparql();
225 | 					sparql.addTriple(triple);
226 | 					qlog.rankedSparqls.add(sparql);
227 | 				}
228 | 			}
229 | 		}
230 | 	}
231 | }
232 | 
233 | 


--------------------------------------------------------------------------------
/src/fgmt/EntityFragment.java:
--------------------------------------------------------------------------------
  1 | package fgmt;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.Collections;
  6 | import java.util.HashMap;
  7 | import java.util.HashSet;
  8 | 
  9 | import rdf.EntityMapping;
 10 | import lcn.EntityFragmentFields;
 11 | import lcn.EntityNameAndScore;
 12 | import lcn.SearchInEntityFragments;
 13 | 
 14 | public class EntityFragment extends Fragment {
 15 | 	
 16 | 	public int eId;
 17 | 	public HashSet<Integer> inEdges = new HashSet<Integer>();
 18 | 	public HashSet<Integer> outEdges = new HashSet<Integer>();
 19 | 	public HashSet<Integer> types = new HashSet<Integer>();	
 20 | 	
 21 | 	// in/out entity and the connected edges. Eg, <eId><director><tom> <eId><star><tom>, then outEntMap of eId contains <tom,<director,star>>
 22 | 	public HashMap<Integer, ArrayList<Integer>> inEntMap = new HashMap<Integer, ArrayList<Integer>>(); // notice the input file should no redundant triple.
 23 | 	public HashMap<Integer, ArrayList<Integer>> outEntMap = new HashMap<Integer, ArrayList<Integer>>();
 24 | 		
 25 | 	static double thres1 = 0.4;
 26 | 	static double thres2 = 0.8;
 27 | 	static int thres3 = 3;
 28 | 	static int k = 50;
 29 | 	
 30 | 	/**
 31 | 	 * mention to entity using Lucene index.
 32 | 	 * 
 33 | 	 * rule：
 34 | 	 * select top-k results of each phrase. 
 35 | 	 * (1)if current lowest score < thres1, drop those score < thres1.
 36 | 	 * (2)if current lowest score > thres2, add those score > thres2.
 37 | 	 * 
 38 | 	 * exact match：
 39 | 	 * (1)Lucene score = 1.
 40 | 	 * (2)String match (lowercase): edit distance <= thres3.
 41 | 	 * 
 42 | 	 * score：
 43 | 	 * use Lucene score directly.
 44 | 	 * 
 45 | 	 * @param phrase
 46 | 	 * @return
 47 | 	 */
 48 | 	public static HashMap<Integer, Double> getCandEntityNames2(String phrase) {
 49 | 		
 50 | 		HashMap<Integer, Double> ret = new HashMap<Integer, Double>();
 51 | 		ArrayList<EntityNameAndScore> list1 = getCandEntityNames_subject(phrase, thres1, thres2, k);
 52 | 		
 53 | 		if(list1 == null)
 54 | 			return ret;
 55 | 		
 56 | 		int iter_size = 0;
 57 | 		if (list1.size() <= k) {
 58 | 			iter_size = list1.size();
 59 | 		}
 60 | 		else if (list1.size() > k) {
 61 | 			if (list1.get(k-1).score >= thres2) {
 62 | 				iter_size = list1.size();
 63 | 			}
 64 | 			else {
 65 | 				iter_size = k;
 66 | 			}
 67 | 		}
 68 | 		for(int i = 0; i < iter_size; i ++) {
 69 | 			if (i < k) {
 70 | 				ret.put(list1.get(i).entityID, getScore(phrase, list1.get(i).entityName, list1.get(i).score));
 71 | 			}
 72 | 			else if (list1.get(i).score >= thres2) {
 73 | 				ret.put(list1.get(i).entityID, getScore(phrase, list1.get(i).entityName, list1.get(i).score));
 74 | 			}
 75 | 			else {
 76 | 				break;
 77 | 			}
 78 | 		}
 79 | 
 80 | 		return ret;
 81 | 	}	
 82 | 	
 83 | 	public static ArrayList<EntityMapping> getEntityMappingList (String n) 
 84 | 	{
 85 | 		HashMap<Integer, Double> map = getCandEntityNames2(n);
 86 | 		ArrayList<EntityMapping> ret = new ArrayList<EntityMapping>();
 87 | 		for (int eid : map.keySet()) 
 88 | 		{
 89 | 			String s = EntityFragmentFields.entityId2Name.get(eid);
 90 | 			ret.add(new EntityMapping(eid, s, map.get(eid)));
 91 | 		}
 92 | 		Collections.sort(ret);
 93 | 		return ret;
 94 | 	}
 95 | 	
 96 | 	public static double getScore (String s1, String s2, double luceneScore) {
 97 | 		double ret = luceneScore*100.0/(Math.log(calEditDistance(s1, s2)*1.5+1)+1);
 98 | 		return ret;
 99 | 	}
100 | 	
101 | 	/**
102 | 	 * Edit distance (all lowercase)
103 | 	 * @param s1
104 | 	 * @param s2
105 | 	 * @return
106 | 	 */
107 | 	public static int calEditDistance (String s1, String s2) {
108 | 		s1 = s1.toLowerCase();
109 | 		s2 = s2.toLowerCase();
110 | 		
111 | 		int d[][];
112 |         int n = s1.length(); 
113 |         int m = s2.length(); 
114 |         int i, j, temp;
115 |         char ch1, ch2;
116 | 		
117 |         if(n == 0) { 
118 |             return m; 
119 |         } 
120 |         if(m == 0) { 
121 |             return n; 
122 |         } 
123 | 
124 |         d = new int[n+1][m+1]; 
125 |         for(i=0; i<=n; i++) {
126 |             d[i][0] = i; 
127 |         } 
128 |         for(j=0; j<=m; j++) {
129 |             d[0][j] = j; 
130 |         } 
131 | 
132 |         for(i=1; i<=n; i++) {
133 |             ch1 = s1.charAt(i-1); 
134 |             for(j=1; j<=m; j++) { 
135 |                 ch2 = s2.charAt(j-1); 
136 |                 if(ch1 == ch2) { 
137 |                     temp = 0; 
138 |                 } else { 
139 |                     temp = 1; 
140 |                 } 
141 |                 d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]+temp); 
142 |             } 
143 |         } 
144 | 
145 | 	    return d[n][m]; 
146 | 	}
147 | 	
148 | 	private static int min(int a, int b, int c) {
149 | 		int ab = a<b?a:b;
150 | 		return ab<c?ab:c;
151 | 	}	
152 | 	
153 | 	public static ArrayList<EntityNameAndScore> getCandEntityNames_subject(String phrase, double thres1, double thres2, int k) {
154 | 		SearchInEntityFragments sf = new SearchInEntityFragments();
155 | 		//System.out.println("EntityFragment.getCandEntityNames_subject() ...");
156 | 		
157 | 		ArrayList<EntityNameAndScore> ret_sf = null;
158 | 		try {
159 | 			ret_sf = sf.searchName(phrase, thres1, thres2, k);
160 | 		} catch (IOException e) {
161 | 			//e.printStackTrace();
162 | 			System.err.println("Reading lcn index error");
163 | 		}
164 | 		
165 | 		return ret_sf;
166 | 	}
167 | 
168 | 	public static EntityFragment getEntityFragmentByEntityId(Integer entityId)
169 | 	{
170 | 		if(!EntityFragmentFields.entityFragmentString.containsKey(entityId))
171 | 			return null;
172 | 		String fgmt = EntityFragmentFields.entityFragmentString.get(entityId);
173 | 		EntityFragment ef = new EntityFragment(entityId, fgmt);
174 | 		return ef;
175 | 	}
176 | 	
177 | 	public static String getEntityFgmtStringByName(String entityName) 
178 | 	{
179 | 		int id = EntityFragmentFields.entityName2Id.get(entityName);	
180 | 		String fgmt = EntityFragmentFields.entityFragmentString.get(id);
181 | 		return fgmt;
182 | 	}
183 | 	
184 | 	public EntityFragment(int eid, String fgmt) 
185 | 	{
186 | 		eId = eid;
187 | 		fragmentType = typeEnum.ENTITY_FRAGMENT;
188 | 		
189 | 		//eg: 11	|3961112:2881;410;,4641020:2330;,
190 | 		fgmt = fgmt.replace('|', '#');
191 | 		String[] fields = fgmt.split("#");
192 | 		
193 | 		if(fields.length > 0 && fields[0].length() > 0) 
194 | 		{
195 | 			String[] entEdgesArr = fields[0].split(",");
196 | 			for(int i = 0; i < entEdgesArr.length; i ++) 
197 | 			{
198 | 				String[] nums = entEdgesArr[i].split(":");
199 | 				if(nums.length != 2)
200 | 					continue;
201 | 				int intEntId = Integer.valueOf(nums[0]);
202 | 				String[] intEdges = nums[1].split(";");
203 | 				ArrayList<Integer> intEdgeList = new ArrayList<Integer>();
204 | 				for(String outEdge: intEdges)
205 | 				{
206 | 					intEdgeList.add(Integer.valueOf(outEdge));
207 | 				}
208 | 				if(intEdgeList.size()>0)
209 | 					inEntMap.put(intEntId, intEdgeList);
210 | 			}
211 | 		}
212 | 		
213 | 		if(fields.length > 1 && fields[1].length() > 0) 
214 | 		{
215 | 			String[] entEdgesArr = fields[1].split(",");
216 | 			for(int i = 0; i < entEdgesArr.length; i ++) 
217 | 			{
218 | 				String[] nums = entEdgesArr[i].split(":");
219 | 				if(nums.length != 2)
220 | 					continue;
221 | 				int outEntId = Integer.valueOf(nums[0]);
222 | 				String[] outEdges = nums[1].split(";");
223 | 				ArrayList<Integer> outEdgeList = new ArrayList<Integer>();
224 | 				for(String outEdge: outEdges)
225 | 				{
226 | 					outEdgeList.add(Integer.valueOf(outEdge));
227 | 				}
228 | 				if(outEdgeList.size()>0)
229 | 					outEntMap.put(outEntId, outEdgeList);
230 | 			}
231 | 		}
232 | 		
233 | 		if(fields.length > 2 && fields[2].length() > 0) {
234 | 			String[] nums = fields[2].split(",");
235 | 			for(int i = 0; i < nums.length; i ++) {
236 | 				if (nums[i].length() > 0) {
237 | 					inEdges.add(Integer.parseInt(nums[i]));
238 | 				}
239 | 			}
240 | 		}
241 | 		if(fields.length > 3 && fields[3].length() > 0) {
242 | 			String[] nums = fields[3].split(",");
243 | 			for(int i = 0; i < nums.length; i ++) {
244 | 				if (nums[i].length() > 0) {
245 | 					outEdges.add(Integer.parseInt(nums[i]));
246 | 				}
247 | 			}
248 | 		}
249 | 		if(fields.length > 4 && fields[4].length() > 0) {
250 | 			String[] nums = fields[4].split(",");
251 | 			for(int i = 0; i < nums.length; i ++) {
252 | 				if (nums[i].length() > 0) {
253 | 					types.add(Integer.parseInt(nums[i]));
254 | 				}
255 | 			}
256 | 		}
257 | 	}
258 | 	
259 | 	@Override
260 | 	public String toString() 
261 | 	{
262 | 		StringBuilder ret = new StringBuilder("");
263 | 		for(Integer inEnt: inEntMap.keySet())
264 | 		{
265 | 			ArrayList<Integer> inEdgeList = inEntMap.get(inEnt);
266 | 			if(inEdgeList==null || inEdgeList.size()==0)
267 | 				continue;
268 | 			ret.append(inEnt+":");
269 | 			for(int inEdge: inEdgeList)
270 | 				ret.append(inEdge+";");
271 | 			ret.append(",");
272 | 		}
273 | 		ret.append('|');
274 | 		for(Integer outEnt: outEntMap.keySet())
275 | 		{
276 | 			ArrayList<Integer> outEdgeList = outEntMap.get(outEnt);
277 | 			if(outEdgeList==null || outEdgeList.size()==0)
278 | 				continue;
279 | 			ret.append(outEnt+":");
280 | 			for(int outEdge: outEdgeList)
281 | 				ret.append(outEdge+";");
282 | 			ret.append(",");
283 | 		}
284 | 		ret.append('|');
285 | 		for(Integer p : inEdges) {
286 | 			ret.append(p);
287 | 			ret.append(',');
288 | 		}
289 | 		ret.append('|');
290 | 		for(Integer p : outEdges) {
291 | 			ret.append(p);
292 | 			ret.append(',');
293 | 		}
294 | 		ret.append('|');
295 | 		for(Integer t : types) {
296 | 			ret.append(t);
297 | 			ret.append(',');
298 | 		}
299 | 		return ret.toString();
300 | 	}
301 | }
302 | 


--------------------------------------------------------------------------------
/src/qa/GAnswer.java:
--------------------------------------------------------------------------------
  1 | package qa;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.Collections;
  5 | import java.util.List;
  6 | 
  7 | import jgsc.GstoreConnector;
  8 | import log.QueryLogger;
  9 | import nlp.ds.Sentence;
 10 | import nlp.ds.Sentence.SentenceType;
 11 | import qa.parsing.QuestionParsing;
 12 | import qa.parsing.BuildQueryGraph;
 13 | import rdf.Sparql;
 14 | import utils.FileUtil;
 15 | import addition.AddtionalFix;
 16 | import qa.Globals;
 17 | 
 18 | public class GAnswer {
 19 | 	
 20 | 	public static final int MAX_SPQ_NUM = 3;
 21 | 	
 22 | 	public static void init() {
 23 | 		System.out.println("gAnswer2 init ...");
 24 | 		
 25 | 		Globals.init();
 26 | 		
 27 | 		System.out.println("gAnswer2 init ... ok!");
 28 | 	}
 29 | 	
 30 | 	public QueryLogger getSparqlList(String input) 
 31 | 	{
 32 | 		QueryLogger qlog = null;
 33 | 		try 
 34 | 		{
 35 | 			if (input.length() <= 5)
 36 | 				return null;
 37 | 			
 38 | 			System.out.println("[Input:] "+input);
 39 | 			
 40 | 			// step 0: Node (entity & type & literal) Recognition 
 41 | 			long t0 = System.currentTimeMillis(), t, NRtime;
 42 | 			Query query = new Query(input);
 43 | 			qlog = new QueryLogger(query);
 44 | 			ArrayList<Sparql> rankedSparqls = new ArrayList<Sparql>();
 45 | 			NRtime = (int)(System.currentTimeMillis()-t0);
 46 | 			System.out.println("step0 [Node Recognition] : "+ NRtime +"ms");	
 47 | 			
 48 | 			// Try to solve each NR plan, and combine the ranked SPARQLs.
 49 | 			// We only reserve LOG of BEST NR plan for convenience.
 50 | 			for(int i=query.sList.size()-1; i>=0; i--)
 51 | 			{
 52 | 				Sentence possibleSentence = query.sList.get(i);
 53 | 				qlog.reloadSentence(possibleSentence);
 54 | //				qlog.isMaltParserUsed = true;
 55 | 				
 56 | 				// LOG
 57 | 				System.out.println("transQ: "+qlog.s.plainText);
 58 | 				qlog.NRlog = query.preLog;
 59 | 				qlog.SQGlog = "Id: "+query.queryId+"\nQuery: "+query.NLQuestion+"\n";
 60 | 				qlog.SQGlog += qlog.NRlog;
 61 | 				qlog.timeTable.put("step0", (int)NRtime);
 62 | 				
 63 | 				// step 1: question parsing (dependency tree, sentence type)
 64 | 				t = System.currentTimeMillis();
 65 | 				QuestionParsing step1 = new QuestionParsing();
 66 | 				step1.process(qlog);
 67 | 				qlog.timeTable.put("step1", (int)(System.currentTimeMillis()-t));
 68 | 			
 69 | 				// step 2: build query graph (structure construction, relation extraction, top-k join) 
 70 | 				t = System.currentTimeMillis();
 71 | 				BuildQueryGraph step2 = new BuildQueryGraph();
 72 | 				step2.process(qlog);
 73 | 				qlog.timeTable.put("step2", (int)(System.currentTimeMillis()-t));
 74 | 				
 75 | 				// step 3: some fix (such as "one-node" or "ask-one-triple") and aggregation
 76 | 				t = System.currentTimeMillis();
 77 | 				AddtionalFix step3 = new AddtionalFix();
 78 | 				step3.process(qlog);
 79 | 				
 80 | 				// Collect SPARQLs.
 81 | 				rankedSparqls.addAll(qlog.rankedSparqls);
 82 | 				qlog.timeTable.put("step3", (int)(System.currentTimeMillis()-t));
 83 | 			}
 84 | 
 85 | 			// deduplicate in SPARQL
 86 | 			for(Sparql spq: rankedSparqls)
 87 | 				spq.deduplicate();
 88 | 			
 89 | 			// Sort (descending order).
 90 | 			Collections.sort(rankedSparqls);
 91 | 			qlog.rankedSparqls = rankedSparqls;
 92 | 			System.out.println("number of rankedSparqls = " + qlog.rankedSparqls.size());
 93 | 			
 94 | 			// Detect question focus.
 95 | 			for (int i=0; i<qlog.rankedSparqls.size(); i++) 
 96 | 			{
 97 | 				// First detect by SPARQLs.
 98 | 				Sparql spq = qlog.rankedSparqls.get(i);
 99 | 				String questionFocus = QuestionParsing.detectQuestionFocus(spq);
100 | 				
101 | 				// If failed, use TARGET directly.
102 | 				if(questionFocus == null)
103 | 					questionFocus = "?"+qlog.target.originalForm;
104 | 				
105 | 				spq.questionFocus = questionFocus;
106 | 			}
107 | 						
108 | 			return qlog;
109 | 		} 
110 | 		catch (Exception e) {
111 | 			e.printStackTrace();
112 | 			return qlog;
113 | 		}	
114 | 	}
115 | 	
116 | 	public String getStdSparqlWoPrefix(QueryLogger qlog, Sparql curSpq) 
117 | 	{
118 | 		if(qlog == null || curSpq == null)
119 | 			return null;
120 | 		
121 | 		String res = "";
122 | 		if (qlog.s.sentenceType==SentenceType.GeneralQuestion)
123 | 			res += "ask where";
124 | 		else
125 | 		{
126 | 			if(!curSpq.countTarget)
127 | 				res += ("select DISTINCT " + curSpq.questionFocus + " where");		
128 | 			else
129 | 				res += ("select COUNT(DISTINCT " + curSpq.questionFocus + ") where");	
130 | 		}					
131 | 		res += "\n";
132 | 		res += curSpq.toStringForGStore();
133 | 		if(curSpq.moreThanStr != null)
134 | 		{
135 | 			res += curSpq.moreThanStr+"\n";
136 | 		}
137 | 		if(curSpq.mostStr != null)
138 | 		{
139 | 			res += curSpq.mostStr+"\n";
140 | 		}
141 | 		
142 | 		return res;
143 | 	}
144 | 	
145 | 	// Notice, this will change the original SPARQL.
146 | 	public Sparql getUntypedSparql (Sparql spq) 
147 | 	{
148 | 		if(spq == null)
149 | 			return null;
150 | 		spq.removeAllTypeInfo();
151 | 		if (spq.tripleList.size() == 0) return null;
152 | 		return spq;
153 | 	}
154 | 	
155 | 	public Matches getAnswerFromGStore2 (Sparql spq)
156 | 	{
157 | 		// modified by Lin Yinnian using ghttp - 2018-9-28
158 | 		GstoreConnector gc = new GstoreConnector(Globals.QueryEngineIP, Globals.QueryEnginePort);
159 |         String answer = gc.query("root", "123456", "dbpedia16", spq.toStringForGStore2());
160 |         System.out.println(answer);
161 | 		String[] rawLines = answer.split("\n");
162 | 		
163 | 		Matches ret = new Matches();
164 | 		if (rawLines.length == 0 || rawLines[0].equals("[empty result]"))
165 | 		{
166 | 			ret.answersNum = 0;
167 | 			return ret;
168 | 		}
169 | 		int ansNum = rawLines.length-1;
170 | 		String[] varLineContents = rawLines[0].split("\t");
171 | 		int varNum = varLineContents.length;
172 | 		ret.answers = new String[ansNum][varNum];
173 | 		ret.answersNum = ansNum;
174 | 		
175 | 		System.out.println("ansNum=" + ansNum);
176 | 		System.out.println("varNum=" + varNum);
177 | 		System.out.println("rawLines.length=" + rawLines.length);	
178 | 		
179 | 		for (int i=1;i<rawLines.length;i++)
180 | 		{
181 | 			// if one answer of rawAnswer contains '\n', it may leads error so we just return.
182 | 			if(i-1 >= ansNum)
183 | 				break;
184 | 			
185 | 			String[] ansLineContents = rawLines[i].split("\t");
186 | 			for (int j=0;j<varNum;j++)
187 | 			{
188 | 				ret.answers[i-1][j] = varLineContents[j] + ":" + ansLineContents[j];
189 | 			}
190 | 		}
191 | 		
192 | 		return ret;
193 | 	}
194 | 	
195 | 	public static void main (String[] args)
196 | 	{			
197 | 		Globals.init();
198 | 		GAnswer ga = new GAnswer();
199 | 		int i =1;
200 | 		
201 | 		//file in/output
202 | 		List<String> inputList = FileUtil.readFile("E:/Linyinnian/qald6_special.txt");
203 | 		for(String input: inputList) 
204 | 		{	
205 | 			ArrayList<String> outputs = new ArrayList<String>();
206 | 			ArrayList<String> spqs = new ArrayList<String>(); 
207 | 			spqs.add("id:"+String.valueOf(i));
208 | 			i++;
209 | 			
210 | 			long parsing_st_time = System.currentTimeMillis();
211 | 			
212 | 			QueryLogger qlog = ga.getSparqlList(input);
213 | 			if(qlog == null || qlog.rankedSparqls == null)
214 | 				continue;
215 | 			
216 | 			long parsing_ed_time = System.currentTimeMillis();
217 | 			System.out.println("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
218 | 			System.out.println("TripleCheck time: "+ qlog.timeTable.get("TripleCheck") + "ms");
219 | 			System.out.println("SparqlCheck time: "+ qlog.timeTable.get("SparqlCheck") + "ms");
220 | 			System.out.println("Ranked Sparqls: " + qlog.rankedSparqls.size());
221 | 			
222 | 			outputs.add(qlog.SQGlog);
223 | 			outputs.add(qlog.SQGlog + "Building HQG time: "+ (qlog.timeTable.get("step0")+qlog.timeTable.get("step1")+qlog.timeTable.get("step2")-qlog.timeTable.get("BQG_topkjoin")) + "ms");
224 | 			outputs.add("TopKjoin time: "+ qlog.timeTable.get("BQG_topkjoin") + "ms");
225 | 			outputs.add("Question Understanding time: "+ (int)(parsing_ed_time - parsing_st_time)+ "ms");
226 | 			
227 | 			long excuting_st_time = System.currentTimeMillis();
228 | 			Matches m = null;
229 | 			System.out.println("[RESULT]");
230 | 			ArrayList<String> lastSpqList = new ArrayList<String>();
231 | 			int idx;
232 | 			// Consider top-5 SPARQLs
233 | 			for(idx=1; idx<=Math.min(qlog.rankedSparqls.size(), 5); idx++) 
234 | 			{
235 | 				Sparql curSpq = qlog.rankedSparqls.get(idx-1);
236 | 				String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, curSpq);
237 | 				lastSpqList.add(stdSPQwoPrefix);
238 | 				
239 | 				System.out.println("[" + idx + "]" + "score=" + curSpq.score);
240 | 				System.out.println(stdSPQwoPrefix);
241 | 
242 | 				// Print top-3 SPARQLs to file.
243 | 				if(idx <= MAX_SPQ_NUM)
244 | //				  spqs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix);
245 | 					outputs.add("[" + idx + "]" + "score=" + curSpq.score + "\n" + stdSPQwoPrefix);
246 | 					
247 | //				// Execute by Virtuoso or GStore when answers not found
248 | 				if(m == null || m.answers == null)
249 | 				{
250 | 					if(curSpq.tripleList.size()>0 && curSpq.questionFocus!=null)
251 | 					{
252 | 						m = ga.getAnswerFromGStore2(curSpq);
253 | 					}
254 | 					if(m != null && m.answers != null) 
255 |                     {
256 |                         // Found results using current SPQ, then we can break and print result.
257 |                         qlog.sparql = curSpq;
258 |                         qlog.match = m;
259 |                         qlog.reviseAnswers();
260 |                         System.out.println("Query Executing time: "+ (int)(System.currentTimeMillis() - excuting_st_time)+ "ms");
261 |                     }
262 | 				}
263 | 			}		
264 | 			
265 | 			// Some TYPEs can be omitted, (such as <type> <yago:Wife>)
266 | 			if(!qlog.rankedSparqls.isEmpty())
267 | 			{
268 | 				Sparql untypedSparql = ga.getUntypedSparql(qlog.rankedSparqls.get(0));
269 | 				if(untypedSparql != null)
270 | 				{
271 | 					String stdSPQwoPrefix = ga.getStdSparqlWoPrefix(qlog, untypedSparql);
272 | 					if(!lastSpqList.contains(stdSPQwoPrefix))
273 | //						spqs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n");
274 | 						outputs.add("[" + Math.min(MAX_SPQ_NUM+1, idx) + "]" + "score=" + 1000 + "\n" + stdSPQwoPrefix + "\n");
275 | 				}
276 | 			}
277 | 			
278 | 			FileUtil.writeFile(outputs, "E:/Linyinnian/qald6_special_out.txt", true);
279 | 		}
280 | 			
281 | 	}
282 | }
283 | 


--------------------------------------------------------------------------------